Commit c9b4e3c5 authored by Andrey Filippov's avatar Andrey Filippov

minor clean up, option to use larger output buffer

parent b70d24e3
......@@ -48,6 +48,7 @@ module dtt_iv_8x8#(
parameter DSP_A_WIDTH = 25,
parameter DSP_P_WIDTH = 48,
parameter COSINE_SHIFT= 17,
parameter ODEPTH = 5, // output buffer depth (bits). Here 5, put can use more if used as a full block buffer
parameter COS_01_32 = 130441, // int(round((1<<17) * cos( 1*pi/32)))
parameter COS_03_32 = 125428, // int(round((1<<17) * cos( 3*pi/32)))
parameter COS_04_32 = 121095, // int(round((1<<17) * cos( 4*pi/32)))
......@@ -97,8 +98,6 @@ module dtt_iv_8x8#(
wire signed [TRANSPOSE_WIDTH-1:0] dcth_dout0;
wire signed [TRANSPOSE_WIDTH-1:0] dcth_dout1;
// wire dcth_pre2_start_out0;
// wire dcth_pre2_start_out1;
wire dcth_en_out0;
wire dcth_en_out1;
......@@ -111,9 +110,7 @@ module dtt_iv_8x8#(
reg [1:0] transpose_w_page;
reg [6:0] transpose_cntr; // transpose memory counter, [6] == 1 when the last page is being finished
reg transpose_in_run;
// wire transpose_start = dcth_phin_run && (dcth_phin [6:0] == 7'h10);
wire transpose_start = dcth_phin_run && (dcth_phin [6:0] == 7'h11);
// wire transpose_start = dcth_phin_run && (dcth_phin [6:0] == 7'h12);
reg [2:0] transpose_wa_low; // [2:0] transpose memory low address bits, [3] - other group (of 16)
reg [4:0] transpose_wa_high; // high bits of transpose memory write address
wire [7:0] transpose_wa = {transpose_wa_high,transpose_wa_low};
......@@ -175,63 +172,33 @@ module dtt_iv_8x8#(
reg [6:0] dctv_out_cntr; // count output data from second (vertical) pass (bit 6 - stopping)
reg dctv_out_run; //
// wire dctv_out_start = dctv_phin [6:0] == 'h10;
wire dctv_out_start = dctv_phin [6:0] == 'h11;
reg [4:0] dctv_out_wa_1;
reg [1:0] dctv_out_we_1;
reg [ODEPTH-1:0] dctv_out_wa;
reg [1:0] dctv_out_we;
reg dctv_out_sel; // select DCTv channel output;
reg signed [OUT_WIDTH-1:0] dctv_out_ram_1[0:31];
reg [2:0] dctv_out_debug_ram_1[0:31];
reg [6:0] dctv_out_ra_1;
// wire [3:0] dctv_out_ra_1_w = {dctv_out_ra_1[3:1], ~dctv_out_ra_1[0]};
/*
wire [3:0] dctv_out_ra_1_w = {dctv_out_ra_1[3],
dctv_out_ra_1[2] ? dctv_out_ra_1[1] : (~dctv_out_ra_1[1] ^ dctv_out_ra_1[0]),
~dctv_out_ra_1[2] ^ dctv_out_ra_1[0],
~dctv_out_ra_1[0]};
*/
// wire dctv_out_start_1 = dctv_out_cntr[6:0] == 'h0c; // 'h0b;
/// wire dctv_out_start_1 = dctv_out_cntr[6:0] == 'h0b; // 'h0b;
reg signed [OUT_WIDTH-1:0] dctv_out_ram[0: ((1<<ODEPTH)-1)]; // [0:31];
reg [2:0] dctv_out_debug_ram[0:((1<<ODEPTH)-1)]; // [0:31];
reg [6:0] dctv_out_ra;
wire dctv_out_start_1 = dctv_out_cntr[6:0] == 'h0e; // 'h0b;
reg dctv_out_run_1;
reg signed [OUT_WIDTH-1:0] dctv_out_reg_1;
reg [2:0] dctv_out_debug_reg_1; // SuppressThisWarning VEditor - simulation only
/*
reg signed [OUT_WIDTH-1:0] dctv_out_ram_2[0:7];
reg [2:0] dctv_out_debug_ram_2[0:7];
reg dctv_out_we_2;
reg [2:0] dctv_out_wa_2;
reg [6:0] dctv_out_ra_2;
// wire dctv_out_start_2 = dctv_out_ra_1[6:0] == 2;
wire dctv_out_start_2 = dctv_out_ra_1[6:0] == 8;
reg dctv_out_run_2;
reg signed [OUT_WIDTH-1:0] dctv_out_reg_2;
reg [2:0] dctv_out_debug_reg_2; // SuppressThisWarning VEditor - simulation only
*/
// reg [1:0] mode_in; //
// reg [1:0] mode_in; //
// reg modev_in; // delayed mode_in[0]
reg pre_dv;
reg signed [OUT_WIDTH-1:0] dctv_out_reg;
reg [2:0] dctv_out_debug_reg; // SuppressThisWarning VEditor - simulation only
reg [1:0] mode_h; // registered at start, [1] used for hor (first) pass
reg [1:0] mode_h_late; // mode_h registered @ pre_last_in
// reg [1:0] mode_hv; // passing vertical mode wit the same dealy as horizontal
reg [1:0] mode_v; // mode_h_late registered @ transpose_out_start ([0]used for vert pass)
// mode_out mode_v registered @ pre_first_out_w
// wire [1:0] pre2_start_outh; // 2 cycles before horizontal output data is valid
// wire [1:0] pre2_start_outv; // 2 cycles before vertical output data is valid
wire [1:0] pre2_dsth; // 2 cycles before horizontal output data is valid, 0 dct, 1 - dst
wire [1:0] pre2_dstv; // 2 cycles before vertical output data is valid, 0 dct, 1 - dst
reg pre_dsth; // 1 cycles before horizontal output data is valid, 0 dct, 1 - dst
reg pre_dstv; // 1 cycles before vertical output data is valid, 0 dct, 1 - dst
// wire pre_first_out_w = dctv_out_ra_1[6:0] == 1;
wire pre_first_out_w = dctv_out_start_1;
wire [OUT_WIDTH-1:0] debug_dctv_dout = dctv_out_sel? dctv_dout1: dctv_dout0; // SuppressThisWarning VEditor - simulation only
// assign d_out = dctv_out_reg_2;
assign d_out = dctv_out_reg_1;
assign d_out = dctv_out_reg;
assign pre_last_in = pre_last_in_r;
......@@ -384,70 +351,48 @@ module dtt_iv_8x8#(
if (!dctv_out_run || dctv_out_start) dctv_out_cntr <= 0;
else dctv_out_cntr <= dctv_out_cntr + 1;
dctv_out_we_1 <= {dctv_out_we_1[0], dctv_en_out0 | dctv_en_out1};
dctv_out_we <= {dctv_out_we[0], dctv_en_out0 | dctv_en_out1};
dctv_out_sel <= dctv_out_cntr[0];
case (dctv_out_cntr[3:0])
4'h0: dctv_out_wa_1[3:0] <= 0 ^ {3{pre_dstv}};
4'h1: dctv_out_wa_1[3:0] <= 9 ^ {3{pre_dstv}};
4'h2: dctv_out_wa_1[3:0] <= 7 ^ {3{pre_dstv}};
4'h3: dctv_out_wa_1[3:0] <= 14 ^ {3{pre_dstv}};
4'h4: dctv_out_wa_1[3:0] <= 4 ^ {3{pre_dstv}};
4'h5: dctv_out_wa_1[3:0] <= 10 ^ {3{pre_dstv}};
4'h6: dctv_out_wa_1[3:0] <= 3 ^ {3{pre_dstv}};
4'h7: dctv_out_wa_1[3:0] <= 13 ^ {3{pre_dstv}};
4'h8: dctv_out_wa_1[3:0] <= 1 ^ {3{pre_dstv}};
4'h9: dctv_out_wa_1[3:0] <= 8 ^ {3{pre_dstv}};
4'ha: dctv_out_wa_1[3:0] <= 6 ^ {3{pre_dstv}};
4'hb: dctv_out_wa_1[3:0] <= 15 ^ {3{pre_dstv}};
4'hc: dctv_out_wa_1[3:0] <= 2 ^ {3{pre_dstv}};
4'hd: dctv_out_wa_1[3:0] <= 12 ^ {3{pre_dstv}};
4'he: dctv_out_wa_1[3:0] <= 5 ^ {3{pre_dstv}};
4'hf: dctv_out_wa_1[3:0] <= 11 ^ {3{pre_dstv}};
4'h0: dctv_out_wa[3:0] <= 4'h0 ^ {1'b0,{3{pre_dstv}}};
4'h1: dctv_out_wa[3:0] <= 4'h9 ^ {1'b0,{3{pre_dstv}}};
4'h2: dctv_out_wa[3:0] <= 4'h7 ^ {1'b0,{3{pre_dstv}}};
4'h3: dctv_out_wa[3:0] <= 4'he ^ {1'b0,{3{pre_dstv}}};
4'h4: dctv_out_wa[3:0] <= 4'h4 ^ {1'b0,{3{pre_dstv}}};
4'h5: dctv_out_wa[3:0] <= 4'ha ^ {1'b0,{3{pre_dstv}}};
4'h6: dctv_out_wa[3:0] <= 4'h3 ^ {1'b0,{3{pre_dstv}}};
4'h7: dctv_out_wa[3:0] <= 4'hd ^ {1'b0,{3{pre_dstv}}};
4'h8: dctv_out_wa[3:0] <= 4'h1 ^ {1'b0,{3{pre_dstv}}};
4'h9: dctv_out_wa[3:0] <= 4'h8 ^ {1'b0,{3{pre_dstv}}};
4'ha: dctv_out_wa[3:0] <= 4'h6 ^ {1'b0,{3{pre_dstv}}};
4'hb: dctv_out_wa[3:0] <= 4'hf ^ {1'b0,{3{pre_dstv}}};
4'hc: dctv_out_wa[3:0] <= 4'h2 ^ {1'b0,{3{pre_dstv}}};
4'hd: dctv_out_wa[3:0] <= 4'hc ^ {1'b0,{3{pre_dstv}}};
4'he: dctv_out_wa[3:0] <= 4'h5 ^ {1'b0,{3{pre_dstv}}};
4'hf: dctv_out_wa[3:0] <= 4'hb ^ {1'b0,{3{pre_dstv}}};
endcase
dctv_out_wa_1[4] <= dctv_out_cntr[4] ^ (~dctv_out_cntr[3] & dctv_out_cntr[0]);
// It is possible to fill large output memory buffer, in that case
dctv_out_wa[ODEPTH-1:4] <= dctv_out_cntr[ODEPTH-1:4] - (~dctv_out_cntr[3] & dctv_out_cntr[0]);
// write first stage of output reordering
if (dctv_out_we_1[1]) dctv_out_ram_1[dctv_out_wa_1] <= dctv_out_sel? dctv_dout1: dctv_dout0;
if (dctv_out_we_1[1]) dctv_out_debug_ram_1[dctv_out_wa_1] <= dctv_out_sel? dctv_yindex1: dctv_yindex0;
if (dctv_out_we[1]) dctv_out_ram[dctv_out_wa] <= dctv_out_sel? dctv_dout1: dctv_dout0;
if (dctv_out_we[1]) dctv_out_debug_ram[dctv_out_wa] <= dctv_out_sel? dctv_yindex1: dctv_yindex0;
if (rst) dctv_out_run_1 <= 0;
else if (dctv_out_start_1) dctv_out_run_1 <= 1;
else if (&dctv_out_ra_1[5:0]) dctv_out_run_1 <= 0;
if (rst) pre_dv <= 0;
else if (dctv_out_start_1) pre_dv <= 1;
else if (&dctv_out_ra[5:0]) pre_dv <= 0;
if (!dctv_out_run_1 || dctv_out_start_1) dctv_out_ra_1 <= 0;
else dctv_out_ra_1 <= dctv_out_ra_1 + 1;
// reading first stage of output reorder RAM
// if (dctv_out_run_1) dctv_out_reg_1 <= dctv_out_ram_1[dctv_out_ra_1_w];
// if (dctv_out_run_1) dctv_out_debug_reg_1 <= dctv_out_debug_ram_1[dctv_out_ra_1_w];
if (dctv_out_run_1) dctv_out_reg_1 <= dctv_out_ram_1[dctv_out_ra_1[4:0]];
if (dctv_out_run_1) dctv_out_debug_reg_1 <= dctv_out_debug_ram_1[dctv_out_ra_1[4:0]];
// last stage of the output reordering - 8 register memory
/*
dctv_out_we_2 <= dctv_out_run_1;
dctv_out_wa_2 <= dctv_out_ra_1_w[2:0];
// write last stage of output reordering
if (dctv_out_we_2) dctv_out_ram_2[dctv_out_wa_2] <= dctv_out_reg_1;
if (dctv_out_we_2) dctv_out_debug_ram_2[dctv_out_wa_2] <= dctv_out_debug_reg_1;
if (rst) dctv_out_run_2 <= 0;
else if (dctv_out_start_2) dctv_out_run_2 <= 1;
else if (&dctv_out_ra_2[5:0]) dctv_out_run_2 <= 0;
if (!dctv_out_run_2 || dctv_out_start_2) dctv_out_ra_2 <= 0;
else dctv_out_ra_2 <= dctv_out_ra_2 + 1;
if (!pre_dv || dctv_out_start_1) dctv_out_ra <= 0;
else dctv_out_ra <= dctv_out_ra + 1;
// reading first stage of output reorder RAM
if (dctv_out_run_2) dctv_out_reg_2 <= dctv_out_ram_2[dctv_out_ra_2[2:0]];
if (dctv_out_run_2) dctv_out_debug_reg_2 <= dctv_out_debug_ram_2[dctv_out_ra_2[2:0]];
*/
if (pre_dv) dctv_out_reg <= dctv_out_ram[dctv_out_ra[4:0]];
if (pre_dv) dctv_out_debug_reg <= dctv_out_debug_ram[dctv_out_ra[4:0]];
pre_first_out <= pre_first_out_w;
// dv <= dctv_out_run_2;
dv <= dctv_out_run_1;
dv <= pre_dv;
end
always @ (posedge clk) begin
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment