Commit 530030f6 authored by Andrey Filippov's avatar Andrey Filippov

Switched to new implementation of 8x8 DCT, generated documentation

parent 0e866d77
...@@ -23,7 +23,7 @@ py393/dbg* ...@@ -23,7 +23,7 @@ py393/dbg*
debug/* debug/*
html/* html/*
man/* man/*
x393_docs/*
includes/x393_cur_params_sim.vh includes/x393_cur_params_sim.vh
includes/x393_cur_params_target_*.vh includes/x393_cur_params_target_*.vh
py393/exp_gpio.py py393/exp_gpio.py
......
...@@ -40,7 +40,14 @@ ...@@ -40,7 +40,14 @@
*/ */
`timescale 1ns/1ps `timescale 1ns/1ps
module cmprs_macroblock_buf_iface ( module cmprs_macroblock_buf_iface #(
`ifdef USE_OLD_DCT
parameter DCT_PIPELINE_PAUSE = 0 // No need to delay
`else
parameter DCT_PIPELINE_PAUSE = 48 // TODO: find really required value (minimal), adjust counter bits (now 6)
// 48 seems to be OK (may be less)
`endif
)(
// input rst, // input rst,
input xclk, // global clock input, compressor single clock rate input xclk, // global clock input, compressor single clock rate
...@@ -98,6 +105,7 @@ module cmprs_macroblock_buf_iface ( ...@@ -98,6 +105,7 @@ module cmprs_macroblock_buf_iface (
wire frame_pre_start_w; // start sequence for a new frame wire frame_pre_start_w; // start sequence for a new frame
reg frame_pre_start_r; reg frame_pre_start_r;
reg [ 8:0] mb_pre_start; // 1-hot macroblock pre start calcualtions - TODO: adjust width reg [ 8:0] mb_pre_start; // 1-hot macroblock pre start calcualtions - TODO: adjust width
reg mb_pre_start4_first; // first cycle after mb_pre_start[3]
wire [ 2:0] buf_diff; // difference between page needed and next valid - should be negative to have it ready wire [ 2:0] buf_diff; // difference between page needed and next valid - should be negative to have it ready
wire buf_ready_w; // External memory buffer has all the pages needed wire buf_ready_w; // External memory buffer has all the pages needed
...@@ -117,6 +125,8 @@ module cmprs_macroblock_buf_iface ( ...@@ -117,6 +125,8 @@ module cmprs_macroblock_buf_iface (
reg frame_pre_run; reg frame_pre_run;
reg [1:0] frame_may_start; reg [1:0] frame_may_start;
reg [5:0] dct_pipeline_delay_cntr;
`ifdef DEBUG_RING `ifdef DEBUG_RING
assign dbg_add_invalid = add_invalid; assign dbg_add_invalid = add_invalid;
assign dbg_mb_release_buf = mb_release_buf; assign dbg_mb_release_buf = mb_release_buf;
...@@ -180,9 +190,17 @@ module cmprs_macroblock_buf_iface ( ...@@ -180,9 +190,17 @@ module cmprs_macroblock_buf_iface (
// calculate before starting each macroblock (will wait if buffer is not ready) (TODO: align mb_pre_start[0] to mb_pre_end[2] - same) // calculate before starting each macroblock (will wait if buffer is not ready) (TODO: align mb_pre_start[0] to mb_pre_end[2] - same)
//mb_pre_start_w //mb_pre_start_w
// TODO: Here enforce minimal pause (if not zero for the DCT pipeline to recover
// will wait for buf_ready_w, but not less than DCT_PIPELINE_PAUSE (or no wait at all)
mb_pre_start4_first <=mb_pre_start[3];
if (xrst) dct_pipeline_delay_cntr <= 0;
else if (mb_pre_start4_first && !buf_ready_w) dct_pipeline_delay_cntr <= DCT_PIPELINE_PAUSE -1;
else if (|dct_pipeline_delay_cntr) dct_pipeline_delay_cntr <= dct_pipeline_delay_cntr -1;
if (!frame_en_r) mb_pre_start <= 0; if (!frame_en_r) mb_pre_start <= 0;
if (mb_pre_start_w) mb_pre_start <= 1; if (mb_pre_start_w) mb_pre_start <= 1;
else if (!mb_pre_start[4] || buf_ready_w) mb_pre_start <= mb_pre_start << 1; else if (!mb_pre_start[4] || (buf_ready_w && !(|dct_pipeline_delay_cntr))) mb_pre_start <= mb_pre_start << 1;
if (mb_pre_start[1]) mbl_x_r[6:3] <= mb_first_in_row? {2'b0,left_marg[4:3]} : mbl_x_next_r[6:3]; if (mb_pre_start[1]) mbl_x_r[6:3] <= mb_first_in_row? {2'b0,left_marg[4:3]} : mbl_x_next_r[6:3];
if (mb_pre_start[2]) mbl_x_last_r[7:3] <= {1'b0,mbl_x_r[6:3]} + {2'b0,mb_w_m1[5:3]}; if (mb_pre_start[2]) mbl_x_last_r[7:3] <= {1'b0,mbl_x_r[6:3]} + {2'b0,mb_w_m1[5:3]};
......
...@@ -965,39 +965,10 @@ module jp_channel#( ...@@ -965,39 +965,10 @@ module jp_channel#(
if (dct_last_in) first_block_dct <= first_block_color_after; if (dct_last_in) first_block_dct <= first_block_color_after;
end end
// 8x8 DCT implementing Chen algorithm and 2 passes
`ifdef USE_OLD_XDCT393 // Each pass (1d) uses 5 DSP48E1 modules (2 - multipliers and 3 SIMD (2x24) adder/subracters
// Needs a small (<48, but did not calculate yet) pause between block if they did not come
xdct393 xdct393_i ( // immediately after each other. This pause is needed to restart pipeline
.clk (xclk), // input
.en (frame_en), // input if zero will reset transpose memory page numbers
.start (dct_start), // input single-cycle start pulse that goes with the first pixel data. Other 63 should follow
.xin (yc_nodc), // input[9:0]
.last_in (dct_last_in), // output reg output high during input of the last of 64 pixels in a 8x8 block //
.pre_first_out (dct_pre_first_out), // outpu 1 cycle ahead of the first output in a 64 block
/// .dv (dct_dv), // output data output valid. Will go high on the 94-th cycle after the start (now - on 95-th?)
.dv (), // not used: output data output valid. Will go high on the 94-th cycle after the start (now - on 95-th?)
.d_out (dct_out) // output[12:0]
);
`else
xdct393r xdct393_i (
.clk (xclk), // input
.en (frame_en), // input if zero will reset transpose memory page numbers
.start (dct_start), // input single-cycle start pulse that goes with the first pixel data. Other 63 should follow
.xin (yc_nodc), // input[9:0]
.last_in (dct_last_in), // output reg output high during input of the last of 64 pixels in a 8x8 block //
.pre_first_out (dct_pre_first_out), // outpu 1 cycle ahead of the first output in a 64 block
/// .dv (dct_dv), // output data output valid. Will go high on the 94-th cycle after the start (now - on 95-th?)
.dv (), // not used: output data output valid. Will go high on the 94-th cycle after the start (now - on 95-th?)
.d_out (dct_out) // output[12:0]
);
/* New DCT, now in passive mode */
// TODO: enforce minimal pause (when not butted together)
wire dct_last_in_debug;
wire dct_pre_first_out_debug;
wire dct_dv_debug;
wire [12:0] dct_dout_debug;
dct2d8x8_chen #( dct2d8x8_chen #(
.INPUT_WIDTH (10), .INPUT_WIDTH (10),
...@@ -1005,27 +976,23 @@ module jp_channel#( ...@@ -1005,27 +976,23 @@ module jp_channel#(
.STAGE1_SAFE_BITS (3), .STAGE1_SAFE_BITS (3),
.STAGE2_SAFE_BITS (3), .STAGE2_SAFE_BITS (3),
.TRANSPOSE_WIDTH (16), .TRANSPOSE_WIDTH (16),
.TRIM_STAGE_1 (0), .TRIM_STAGE_1 (1),
.TRIM_STAGE_2 (2), .TRIM_STAGE_2 (0),
.DSP_WIDTH (24), .DSP_WIDTH (24),
.DSP_OUT_WIDTH (24),
.DSP_B_WIDTH (18), .DSP_B_WIDTH (18),
.DSP_A_WIDTH (25), .DSP_A_WIDTH (25),
.DSP_P_WIDTH (48), .DSP_P_WIDTH (48)
.DSP_M_WIDTH (43)
) dct2d8x8_chen_i ( ) dct2d8x8_chen_i (
.clk (xclk), // input .clk (xclk), // input
.rst (!frame_en), // input .rst (!frame_en), // input
.start (dct_start), // input .start (dct_start), // input
.xin (yc_nodc), // input[9:0] signed .xin (yc_nodc), // input[9:0] signed
.last_in (dct_last_in_debug), // output reg .last_in (dct_last_in), // output reg
.pre_first_out (dct_pre_first_out_debug), // output .pre_first_out (dct_pre_first_out), // output
.dv (dct_dv_debug), // output .dv (), // output
.d_out (dct_dout_debug) // output[12:0] signed .d_out (dct_out) // output[12:0] signed
); );
`endif
wire quant_start; wire quant_start;
dly_16 #(.WIDTH(1)) i_quant_start (.clk(xclk),.rst(1'b0), .dly(4'd0), .din(dct_pre_first_out), .dout(quant_start)); // dly=0+1 dly_16 #(.WIDTH(1)) i_quant_start (.clk(xclk),.rst(1'b0), .dly(4'd0), .din(dct_pre_first_out), .dout(quant_start)); // dly=0+1
......
/**********************************************************************
** -----------------------------------------------------------------------------**
** xdct393r.v
**
** 8x8 discrete Cosine Transform
** adding more registers to increase bandwidth
**
** Copyright (C) 2002-2015 Elphel, Inc
**
** -----------------------------------------------------------------------------**
** xdct393r is free software - hardware description language (HDL) code.
**
** This program is free software: you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation, either version 3 of the License, or
** (at your option) any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program. If not, see <http://www.gnu.org/licenses/>.
** -----------------------------------------------------------------------------**
**
** Modified by Andrey Filippov - goal to make it work in start/stop mode, using
** "start" input (going together with the first data, no restriction on the gap between 64-pixel blocks (>=0)
** Removed "RST" input ("en" is only used to reset ping-pong transpose memory address)
** Split module in 2 stages
** Also saved some area - original design compiled by XST to 865 slices (XC2S300e), this one - 780!
**
** It is based on the original design (Xilix app. note XAPP610) by:
** Author: Latha Pillai
** Senior Applications Engineer
**
** Video Applications
** Advanced Products Group
** Xilinx, Inc.
**
** Copyright (c) 2001 Xilinx, Inc.
** All rights reserved
**
** Date: Feb. 10, 2002
**
** RESTRICTED RIGHTS LEGEND
**
** This software has not been published by the author, and
** has been disclosed to others for the purpose of enhancing
** and promoting design productivity in Xilinx products.
**
** Therefore use, duplication or disclosure, now and in the
** future should give consideration to the productivity
** enhancements afforded the user of this code by the author's
** efforts. Thank you for using our products !
**
** Disclaimer: THESE DESIGNS ARE PROVIDED "AS IS" WITH NO WARRANTY
** WHATSOEVER AND XILINX SPECIFICALLY DISCLAIMS ANY
** IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR
** A PARTICULAR PURPOSE, OR AGAINST INFRINGEMENT.
***********************************************************************/
/*
after I added DC subtraction before DCT I got 9-bit (allthough not likely to go out of 8bit range) signed data.
also increased transpose memory to 9 bits (anyway it is 16-bit wide) - see if it will help to prevent saturation
without significant increase in gates
Saturatuion is still visible on real pictures, but there was a bug - addsub<i>a_comp, addsub<i>b_comp where not using their
MSB. I added 1 more bit to add_sub<i>a and add_sub<i>b and fixed that bug. Only 2 mofre slices were used
*/
`timescale 1ns/1ps
// For xdct353 - increasing data in 9 bits -> 10 bits, out 12 bits ->13 bits
module xdct393r ( // increased latency by 3
input clk, // system clock, posedge
input en, // if zero will reset transpose memory page njumbers
input start, // single-cycle start pulse that goes with the first pixel data. Other 63 should follow
input [9:0] xin, // [7:0] - input data
output reg last_in, // output high during input of the last of 64 pixels in a 8x8 block
output pre_first_out, // 1 cycle ahead of the first output in a 64 block
output dv, // data output valid. Will go high on the 94-th cycle after the start
output [12:0] d_out); // [8:0]output data
wire stage1_done;
wire tm_page;
wire tm_we;
wire [6:0] tm_ra;
wire [6:0] tm_wa;
wire [15:0] tm_out;
wire [15:0] tm_di;
// reg stage1_done_r; // delay by one clock to use memory output register
wire tm_re; // =1'b1; // TODO: generate, for now just 1'b1
wire tm_regen;
always @ (posedge clk) begin
last_in <= (tm_wa[5:0]== 6'h30);
// stage1_done_r <= stage1_done;
// tm_regen <= tm_re;
end
dct393r_stage1 i_dct_stage1(
.clk (clk),
.en (en),
.start (start),
.xin (xin), // [7:0]
.we (tm_we), // write to transpose memory
.wr_cntr (tm_wa), // [6:0] transpose memory write address
.z_out (tm_di[15:0]),
.page (tm_page),
.done (stage1_done));
dct393r_stage2 i_dct_stage2(
.clk (clk),
.en (en),
.start (stage1_done), // stage 1 finished, data available in transpose memory (extra RAM latency)
.page (tm_page), // transpose memory page finished, valid at start
.rd_cntr (tm_ra[6:0]), // [6:0] transpose memory read address
.ren (tm_re), // output
.regen (tm_regen), // output reg
.tdin (tm_out[15:0]), // [7:0] - data from transpose memory
.endv (pre_first_out), // output
.dv (dv), // data output valid
.dct2_out (d_out[12:0])); // [10:0]output data
ram18_var_w_var_r #(
.REGISTERS (1),
.LOG2WIDTH_WR (4),
.LOG2WIDTH_RD (4),
.DUMMY(0)
) i_transpose_mem (
.rclk (clk), // input
.raddr ({3'b0,tm_ra[6:0]}), // input[9:0]
.ren (tm_re), // input
.regen (tm_regen), // input
.data_out (tm_out[15:0]), // output[15:0]
.wclk (clk), // input
.waddr ({3'b0,tm_wa[6:0]}), // input[9:0]
.we (tm_we), // input
.web (4'hf), // input[3:0]
.data_in (tm_di[15:0]) // input[15:0]
);
endmodule
// 01/24/2004: Moved all clocks in stage 1 to "negedge" to reduce current pulses
module dct393r_stage1 ( // increased latency by 1
input clk, // system clock, posedge
input en,
input start, // single-cycle start pulse to replace RST
input [ 9:0] xin, // [7:0]
output we, // write to transpose memory
output [ 6:0] wr_cntr, // [6:0] transpose memory write address
output reg [15:0] z_out, //data to transpose memory
output page, // transpose memory page just filled (valid @ done)
output done); // last cycle writing to transpose memory - may use after it (move it earlier?)
/* constants */
localparam C3= 16'd54491;
localparam S3= 16'd36410;
localparam C4= 16'd46341;
localparam C6= 16'd25080;
localparam S6= 16'd60547;
localparam C7= 16'd12785;
localparam S7= 16'd64277;
reg [16:0] memory1a, memory2a, memory3a, memory4a;
/* 1D section */
/* The max value of a pixel after processing (to make their expected mean to zero)
is 127. If all the values in a row are 127, the max value of the product terms
would be (127*2)*(23170/256) and that of z_out_int would be (127*8)*23170/256.
This value divided by 2raised to 8 is equivalent to ignoring the 8 lsb bits of the value */
reg [ 9:0] xa0_in, xa1_in, xa2_in, xa3_in, xa4_in, xa5_in, xa6_in, xa7_in;
reg [ 9:0] xa0_reg, xa1_reg, xa2_reg, xa3_reg, xa4_reg, xa5_reg, xa6_reg, xa7_reg;
reg [ 9:0] addsub1a_comp, addsub2a_comp, addsub3a_comp, addsub4a_comp;
// reg [10:0] addsub1a_comp, addsub2a_comp, addsub3a_comp, addsub4a_comp; // AF2015: increasing width - was limiting
reg [10:0] add_sub1a, add_sub2a, add_sub3a, add_sub4a;
reg save_sign1a, save_sign2a, save_sign3a, save_sign4a;
reg [17:0] p1a, p2a, p3a, p4a;
wire [35:0] p1a_all, p2a_all, p3a_all, p4a_all;
reg toggleA;
reg [18:0] z_out_int1, z_out_int2;
reg [18:0] z_out_int;
wire [15:0] z_out_prelatch;
reg [ 2:0] indexi;
/* clks and counters */
reg [ 6:0] wr_cntr_prelatch;
/* memory section */
reg done_prelatch;
reg we_prelatch;
wire enwe;
wire pre_sxregs;
reg sxregs;
reg page_prelatch;
// TODO: See if negedge is needed
wire nclk = ~clk; // seems that everything here is running at negedge (and delays too), but not the transpose memory
// to conserve energy by disabling toggleA
wire sxregs_d8;
reg enable_toggle;
// SRL16_1 i_sxregs_d8 (.Q(sxregs_d8), .A0(1'b1), .A1(1'b1), .A2(1'b1), .A3(1'b0), .CLK(clk),.D(sxregs)); // dly=7+1
dly_16 #(.WIDTH(1)) i_sxregs_d8(.clk(nclk),.rst(1'b0), .dly(4'd7), .din(sxregs), .dout(sxregs_d8)); // dly=7+1
// SRL16_1 i_pre_sxregs (.Q(pre_sxregs), .A0(1'b0), .A1(1'b1), .A2(1'b1), .A3(1'b0), .CLK(clk), .D(start)); // dly=6+1
dly_16 #(.WIDTH(1)) i_pre_sxregs(.clk(nclk),.rst(1'b0), .dly(4'd6), .din(start), .dout(pre_sxregs)); // dly=6+1
// SRL16_1 i_enwe (.Q(enwe), .A0(1'b1), .A1(1'b0), .A2(1'b1), .A3(1'b0), .CLK(clk), .D(pre_sxregs)); // dly=5+1
dly_16 #(.WIDTH(1)) i_enwe(.clk(nclk),.rst(1'b0), .dly(4'd5), .din(pre_sxregs), .dout(enwe)); // dly=5+1
always @ (posedge nclk) begin
enable_toggle <= en && (sxregs || (enable_toggle && !sxregs_d8));
done_prelatch<= (wr_cntr_prelatch[5:0]==6'h3f);
if (wr_cntr_prelatch[5:0]==6'h3f) page_prelatch <= wr_cntr_prelatch[6];
we_prelatch<= enwe || (en && we_prelatch && (wr_cntr_prelatch[5:0]!=6'h3f));
if (!en) wr_cntr_prelatch <= 7'b0;
else if (we_prelatch) wr_cntr_prelatch <= wr_cntr_prelatch + 1;
sxregs <= pre_sxregs || ((wr_cntr_prelatch[2:0]==3'h1) && (wr_cntr_prelatch[5:3]!=3'h7));
toggleA <= sxregs || (enable_toggle && (~toggleA));
if (sxregs) indexi <= 3'h7;
else if (enable_toggle) indexi<=indexi+1;
end
/* 1D-DCT BEGIN */
// store 1D-DCT constant coefficient values for multipliers */
always @ (posedge nclk) begin
case (indexi)
0 : begin memory1a <= {1'b0,C4}; //8'd91
memory2a <= {1'b0,C4}; //8'd91
memory3a <= {1'b0,C4}; //8'd91
memory4a <= {1'b0,C4}; //8'd91
end
1 : begin memory1a <= {1'b0,S7}; //8'd126;
memory2a <= {1'b0,C3}; //8'd106;
memory3a <= {1'b0,S3}; //8'd71;
memory4a <= {1'b0,C7}; //8'd25;
end
2 : begin memory1a <= {1'b0,S6}; //8'd118;
memory2a <= {1'b0,C6}; //8'd49;
memory3a <= {1'b1,C6}; //-8'd49;
memory4a <= {1'b1,S6}; //-8'd118
end
3 : begin memory1a <= {1'b0,C3}; // 8'd106;
memory2a <= {1'b1,C7}; //-8'd25;
memory3a <= {1'b1,S7}; //-8'd126;
memory4a <= {1'b1,S3}; //-8'd71;
end
4 : begin memory1a <= {1'b0,C4}; // 8'd91;
memory2a <= {1'b1,C4}; //-8'd91;
memory3a <= {1'b1,C4}; //-8'd91;
memory4a <= {1'b0,C4}; // 8'd91;
end
5 : begin memory1a <= {1'b0,S3}; // 8'd71;
memory2a <= {1'b1,S7}; //-8'd126;
memory3a <= {1'b0,C7}; // 8'd25;
memory4a <= {1'b0,C3}; // 8'd106;
end
6 : begin memory1a <= {1'b0,C6}; // 8'd49;
memory2a <= {1'b1,S6}; //-8'd118;
memory3a <= {1'b0,S6}; // 8'd118;
memory4a <= {1'b1,C6}; //-8'd49;
end
7 : begin memory1a <= {1'b0,C7}; // 8'd25;
memory2a <= {1'b1,S3}; //-8'd71;
memory3a <= {1'b0,C3}; // 8'd106;
memory4a <= {1'b1,S7}; //-8'd126;
end
endcase
end
/* 8-bit input shifted 8 times through a shift register*/
// xa0_in will see output registers from posedge, may be replaced by latches if needed - but currently delay is under 5ns
always @ (posedge nclk) begin
xa0_in <= xin;
xa1_in <= xa0_in;
xa2_in <= xa1_in;
xa3_in <= xa2_in;
xa4_in <= xa3_in;
xa5_in <= xa4_in;
xa6_in <= xa5_in;
xa7_in <= xa6_in;
end
/* shifted inputs registered every 8th clk (using cntr8)*/
always @ (posedge nclk) if (sxregs) begin
xa0_reg <= xa0_in;
xa1_reg <= xa1_in;
xa2_reg <= xa2_in;
xa3_reg <= xa3_in;
xa4_reg <= xa4_in;
xa5_reg <= xa5_in;
xa6_reg <= xa6_in;
xa7_reg <= xa7_in;
end
/* adder / subtractor block */
always @ (negedge clk)
if (toggleA == 1'b1) begin
add_sub1a <= {xa7_reg[9],xa7_reg[9:0]} + {xa0_reg[9],xa0_reg[9:0]};
add_sub2a <= {xa6_reg[9],xa6_reg[9:0]} + {xa1_reg[9],xa1_reg[9:0]};
add_sub3a <= {xa5_reg[9],xa5_reg[9:0]} + {xa2_reg[9],xa2_reg[9:0]};
add_sub4a <= {xa4_reg[9],xa4_reg[9:0]} + {xa3_reg[9],xa3_reg[9:0]};
end else begin
add_sub1a <= {xa7_reg[9],xa7_reg[9:0]} - {xa0_reg[9],xa0_reg[9:0]};
add_sub2a <= {xa6_reg[9],xa6_reg[9:0]} - {xa1_reg[9],xa1_reg[9:0]};
add_sub3a <= {xa5_reg[9],xa5_reg[9:0]} - {xa2_reg[9],xa2_reg[9:0]};
add_sub4a <= {xa4_reg[9],xa4_reg[9:0]} - {xa3_reg[9],xa3_reg[9:0]};
end
// First valid add_sub appears at the 10th clk (8 clks for shifting inputs,
// 9th clk for registering shifted input and 10th clk for add_sub
// to synchronize the i value to the add_sub value, i value is incremented
// only after 10 clks
// Adding these wires to get rid of the MSB that is always 0
wire [10:0] addsub1a_comp_w = add_sub1a[10]? (-add_sub1a) : add_sub1a;
wire [10:0] addsub2a_comp_w = add_sub2a[10]? (-add_sub2a) : add_sub2a;
wire [10:0] addsub3a_comp_w = add_sub3a[10]? (-add_sub3a) : add_sub3a;
wire [10:0] addsub4a_comp_w = add_sub4a[10]? (-add_sub4a) : add_sub4a;
always @ (posedge nclk) begin
save_sign1a <= add_sub1a[10];
save_sign2a <= add_sub2a[10];
save_sign3a <= add_sub3a[10];
save_sign4a <= add_sub4a[10];
addsub1a_comp <= addsub1a_comp_w[9:0]; //add_sub1a[10]? (-add_sub1a) : add_sub1a;
addsub2a_comp <= addsub2a_comp_w[9:0]; //add_sub2a[10]? (-add_sub2a) : add_sub2a;
addsub3a_comp <= addsub3a_comp_w[9:0]; //add_sub3a[10]? (-add_sub3a) : add_sub3a;
addsub4a_comp <= addsub4a_comp_w[9:0]; //add_sub4a[10]? (-add_sub4a) : add_sub4a;
end
assign p1a_all = addsub1a_comp * memory1a[15:0]; // [16] is sign!
assign p2a_all = addsub2a_comp * memory2a[15:0];
assign p3a_all = addsub3a_comp * memory3a[15:0];
assign p4a_all = addsub4a_comp * memory4a[15:0];
reg [17:0] p1a_all_r;
reg [17:0] p2a_all_r;
reg [17:0] p3a_all_r;
reg [17:0] p4a_all_r;
reg p1a_sig, p2a_sig, p3a_sig, p4a_sig;
always @ (posedge nclk) begin
p1a_all_r <= p1a_all[26:9];
p2a_all_r <= p2a_all[26:9];
p3a_all_r <= p3a_all[26:9];
p4a_all_r <= p4a_all[26:9];
p1a_sig <= (save_sign1a ^ memory1a[16]);
p2a_sig <= (save_sign2a ^ memory2a[16]);
p3a_sig <= (save_sign3a ^ memory3a[16]);
p4a_sig <= (save_sign4a ^ memory4a[16]);
end
always @ (posedge nclk) begin
p1a <= p1a_sig ? (-p1a_all_r) : p1a_all_r;
p2a <= p2a_sig ? (-p2a_all_r) : p2a_all_r;
p3a <= p3a_sig ? (-p3a_all_r) : p3a_all_r;
p4a <= p4a_sig ? (-p4a_all_r) : p4a_all_r;
end
/* Final adder. Adding the ouputs of the 4 multipliers */
always @ (posedge nclk) begin
z_out_int1 <= ({p1a[17],p1a} + {p2a[17],p2a});
z_out_int2 <= ({p3a[17],p3a} + {p4a[17],p4a});
z_out_int <= (z_out_int1 + z_out_int2);
end
// rounding of the value
assign z_out_prelatch[15:0] = z_out_int[18:3]+ z_out_int[2]; // correct rounding
// outputs from output latches to cross clock edge boundary
always @ (posedge clk) begin
z_out[15:0] <= z_out_prelatch[15:0];
// wr_cntr[6:0] <= wr_cntr_prelatch[6:0];
// done <= done_prelatch;
// we <= we_prelatch;
// page <= page_prelatch;
end
dly_16 #(.WIDTH(10)) i_delayed_outs(
.clk(clk),
.rst(1'b0),
.dly(4'd1),
.din( {wr_cntr_prelatch[6:0], done_prelatch, we_prelatch, page_prelatch}),
.dout({wr_cntr[6:0], done, we, page}));
/* 1D-DCT END */
endmodule
module dct393r_stage2 ( // increased latency by 2 clocks
input clk, // system clock, posedge
input en,
input start, // stage 1 finished, data available in transpose memory
input page, // transpose memory page finished, valid at start
output [6:0] rd_cntr, // [6:0] transpose memory read address
output ren, // read enable transpose memory
output reg regen, // register enable in transpose memory
input [15:0] tdin, // [15:0] - data from transpose memory, added 6 bit fractional part
output reg endv, // one cycle ahead of starting (continuing) dv
output reg dv, // data output valid
output reg [12:0] dct2_out);// [8:0]output data
/* constants */
localparam C3= 16'd54491;
localparam S3= 16'd36410;
localparam C4= 16'd46341;
localparam C6= 16'd25080;
localparam S6= 16'd60547;
localparam C7= 16'd12785;
localparam S7= 16'd64277;
reg [16:0] memory1a, memory2a, memory3a, memory4a;
reg [2:0] indexi;
/* 2D section */
reg [15:0] xb0_in, xb1_in, xb2_in, xb3_in, xb4_in, xb5_in, xb6_in, xb7_in;
reg [15:0] xb0_reg, xb1_reg, xb2_reg, xb3_reg, xb4_reg, xb5_reg, xb6_reg, xb7_reg;
reg [16:0] add_sub1b, add_sub2b, add_sub3b, add_sub4b;
reg [15:0] addsub1b_comp, addsub2b_comp, addsub3b_comp, addsub4b_comp;
reg save_sign1b, save_sign2b, save_sign3b, save_sign4b;
reg [18:0] p1b, p2b, p3b, p4b;
wire [35:0] p1b_all, p2b_all, p3b_all, p4b_all;
reg toggleB;
reg [19:0] dct2d_int1, dct2d_int2;
reg [20:0] dct_2d_int;
wire [12:0] dct_2d_rnd;
// transpose memory read address
reg [ 5:0] rd_cntrs;
reg rd_page;
// start with the same as stage1
wire sxregs;
// to conserve energy by disabling toggleB
wire sxregs_d8;
reg enable_toggle;
reg en_started;
wire pre2_endv;
wire pre2_disdv; // AF2015: was missing
reg pre_endv;
reg pre_disdv;
reg pre_dv;
// SRL16 i_endv (.Q(endv), .A0(1'b0), .A1(1'b1), .A2(1'b1), .A3(1'b1), .CLK(clk), .D(start)); // dly=14+1
// dly_16 #(.WIDTH(1)) i_endv(.clk(clk),.rst(1'b0), .dly(4'd14), .din(start), .dout(endv)); // dly=14+1
dly_16 #(.WIDTH(1)) i_pre2_endv(.clk(clk),.rst(1'b0), .dly(4'd15), .din(start), .dout(pre2_endv)); // dly=15+1
// SRL16 i_disdv (.Q(disdv), .A0(1'b0), .A1(1'b1), .A2(1'b1), .A3(1'b1), .CLK(clk), .D(rd_cntrs[5:0]==6'h3f)); // dly=14+1
// dly_16 #(.WIDTH(1)) i_disdv(.clk(clk),.rst(1'b0), .dly(4'd14), .din(rd_cntrs[5:0]==6'h3f), .dout(disdv)); // dly=14+1
dly_16 #(.WIDTH(1)) i_pre2_disdv(.clk(clk),.rst(1'b0), .dly(4'd15), .din(rd_cntrs[5:0]==6'h3f), .dout(pre2_disdv)); // dly=15+1
// SRL16 i_sxregs (.Q(sxregs), .A0(1'b0), .A1(1'b0), .A2(1'b0), .A3(1'b1), .CLK(clk),.D((rd_cntr[5:3]==3'h0) && en_started)); // dly=8+1
// dly_16 #(.WIDTH(1)) i_sxregs(.clk(clk),.rst(1'b0), .dly(4'd8), .din((rd_cntr[5:3]==3'h0) && en_started), .dout(sxregs)); // dly=8+1
dly_16 #(.WIDTH(1)) i_sxregs(.clk(clk),.rst(1'b0), .dly(4'd9), .din((rd_cntrs[2:0]==3'h0) && en_started), .dout(sxregs)); // dly=9+1
// SRL16 i_sxregs_d8 (.Q(sxregs_d8), .A0(1'b1), .A1(1'b1), .A2(1'b1), .A3(1'b0), .CLK(clk),.D(sxregs && en_started)); // dly=7+1
dly_16 #(.WIDTH(1)) i_sxregs_d8(.clk(clk),.rst(1'b0), .dly(4'd7), .din(sxregs && en_started), .dout(sxregs_d8)); // dly=7+1
assign ren = en_started;
always @ (posedge clk) begin
enable_toggle <= en && (sxregs || (enable_toggle && !sxregs_d8));
// en_started <= en && (start || en_started);
if (!en) en_started <= 0;
else if (start) en_started <= 1;
else if (rd_cntrs[5:0] == 6'h3f) en_started <= 0; // should be after (start) as they happen simultaneously
regen <= en_started;
pre_endv <=pre2_endv;
endv <= pre_endv; // output reg
pre_disdv <= pre2_disdv;
pre_dv <= en && (pre_endv || (pre_dv && ~pre_disdv));
// dv <= en && (endv || (dv && ~disdv));
dv <= en && pre_dv; // output reg
toggleB <= sxregs || (enable_toggle && (~toggleB));
if (sxregs) indexi <= 3'h7;
else if (enable_toggle) indexi<=indexi+1;
if (start) rd_page <= page;
if (start) rd_cntrs[5:0] <=6'b0; // will always count, but that does not matter- What about saving energy ;-) ? Saved...
else if (rd_cntrs[5:0]!=6'h3f) rd_cntrs[5:0] <= rd_cntrs[5:0]+1;
end
assign rd_cntr[6:0]= {rd_page,rd_cntrs[2:0],rd_cntrs[5:3]}; // transposed counter
// duplicate memory<i>a from stage 1
// store 1D-DCT constant coeeficient values for multipliers */
always @ (posedge clk) begin
case (indexi)
0 : begin memory1a <= {1'b0,C4}; //8'd91
memory2a <= {1'b0,C4}; //8'd91
memory3a <= {1'b0,C4}; //8'd91
memory4a <= {1'b0,C4}; //8'd91
end
1 : begin memory1a <= {1'b0,S7}; //8'd126;
memory2a <= {1'b0,C3}; //8'd106;
memory3a <= {1'b0,S3}; //8'd71;
memory4a <= {1'b0,C7}; //8'd25;
end
2 : begin memory1a <= {1'b0,S6}; //8'd118;
memory2a <= {1'b0,C6}; //8'd49;
memory3a <= {1'b1,C6}; //-8'd49;
memory4a <= {1'b1,S6}; //-8'd118
end
3 : begin memory1a <= {1'b0,C3}; // 8'd106;
memory2a <= {1'b1,C7}; //-8'd25;
memory3a <= {1'b1,S7}; //-8'd126;
memory4a <= {1'b1,S3}; //-8'd71;
end
4 : begin memory1a <= {1'b0,C4}; // 8'd91;
memory2a <= {1'b1,C4}; //-8'd91;
memory3a <= {1'b1,C4}; //-8'd91;
memory4a <= {1'b0,C4}; // 8'd91;
end
5 : begin memory1a <= {1'b0,S3}; // 8'd71;
memory2a <= {1'b1,S7}; //-8'd126;
memory3a <= {1'b0,C7}; // 8'd25;
memory4a <= {1'b0,C3}; // 8'd106;
end
6 : begin memory1a <= {1'b0,C6}; // 8'd49;
memory2a <= {1'b1,S6}; //-8'd118;
memory3a <= {1'b0,S6}; // 8'd118;
memory4a <= {1'b1,C6}; //-8'd49;
end
7 : begin memory1a <= {1'b0,C7}; // 8'd25;
memory2a <= {1'b1,S3}; //-8'd71;
memory3a <= {1'b0,C3}; // 8'd106;
memory4a <= {1'b1,S7}; //-8'd126;
end
endcase
end
always @ (posedge clk) begin
xb0_in <= tdin;
xb1_in <= xb0_in;
xb2_in <= xb1_in;
xb3_in <= xb2_in;
xb4_in <= xb3_in;
xb5_in <= xb4_in;
xb6_in <= xb5_in;
xb7_in <= xb6_in;
end
/* register inputs, inputs read in every eighth clk*/
always @ (posedge clk) if (sxregs) begin
xb0_reg <= xb0_in;
xb1_reg <= xb1_in;
xb2_reg <= xb2_in;
xb3_reg <= xb3_in;
xb4_reg <= xb4_in;
xb5_reg <= xb5_in;
xb6_reg <= xb6_in;
xb7_reg <= xb7_in;
end
always @ (posedge clk)
if (toggleB == 1'b1) begin
add_sub1b <= {xb7_reg[15],xb7_reg[15:0]} + {xb0_reg[15],xb0_reg[15:0]};
add_sub2b <= {xb6_reg[15],xb6_reg[15:0]} + {xb1_reg[15],xb1_reg[15:0]};
add_sub3b <= {xb5_reg[15],xb5_reg[15:0]} + {xb2_reg[15],xb2_reg[15:0]};
add_sub4b <= {xb4_reg[15],xb4_reg[15:0]} + {xb3_reg[15],xb3_reg[15:0]};
end else begin
add_sub1b <= {xb7_reg[15],xb7_reg[15:0]} - {xb0_reg[15],xb0_reg[15:0]};
add_sub2b <= {xb6_reg[15],xb6_reg[15:0]} - {xb1_reg[15],xb1_reg[15:0]};
add_sub3b <= {xb5_reg[15],xb5_reg[15:0]} - {xb2_reg[15],xb2_reg[15:0]};
add_sub4b <= {xb4_reg[15],xb4_reg[15:0]} - {xb3_reg[15],xb3_reg[15:0]};
end
// Adding these wires to get rid of the MSB that is always 0
wire [16:0] addsub1b_comp_w = add_sub1b[16]? (-add_sub1b) : add_sub1b;
wire [16:0] addsub2b_comp_w = add_sub2b[16]? (-add_sub2b) : add_sub2b;
wire [16:0] addsub3b_comp_w = add_sub3b[16]? (-add_sub3b) : add_sub3b;
wire [16:0] addsub4b_comp_w = add_sub4b[16]? (-add_sub4b) : add_sub4b;
always @ (posedge clk) begin
save_sign1b <= add_sub1b[16];
save_sign2b <= add_sub2b[16];
save_sign3b <= add_sub3b[16];
save_sign4b <= add_sub4b[16];
addsub1b_comp <= addsub1b_comp_w[15:0]; // add_sub1b[16]? (-add_sub1b) : add_sub1b;
addsub2b_comp <= addsub2b_comp_w[15:0]; // add_sub2b[16]? (-add_sub2b) : add_sub2b;
addsub3b_comp <= addsub3b_comp_w[15:0]; // add_sub3b[16]? (-add_sub3b) : add_sub3b;
addsub4b_comp <= addsub4b_comp_w[15:0]; // add_sub4b[16]? (-add_sub4b) : add_sub4b;
end
assign p1b_all = addsub1b_comp * memory1a[15:0]; // MSB [16] is sign!
assign p2b_all = addsub2b_comp * memory2a[15:0];
assign p3b_all = addsub3b_comp * memory3a[15:0];
assign p4b_all = addsub4b_comp * memory4a[15:0];
reg [18:0] p1b_all_r;
reg [18:0] p2b_all_r;
reg [18:0] p3b_all_r;
reg [18:0] p4b_all_r;
reg p1b_sig, p2b_sig, p3b_sig, p4b_sig;
always @ (posedge clk) begin
p1b_all_r <= p1b_all[32:14];
p2b_all_r <= p2b_all[32:14];
p3b_all_r <= p3b_all[32:14];
p4b_all_r <= p4b_all[32:14];
p1b_sig <= (save_sign1b ^ memory1a[16]);
p2b_sig <= (save_sign2b ^ memory2a[16]);
p3b_sig <= (save_sign3b ^ memory3a[16]);
p4b_sig <= (save_sign4b ^ memory4a[16]);
end
always @ (posedge clk) begin
p1b[18:0] <= p1b_sig ? (-p1b_all_r) :(p1b_all_r);
p2b[18:0] <= p2b_sig ? (-p2b_all_r) :(p2b_all_r);
p3b[18:0] <= p3b_sig ? (-p3b_all_r) :(p3b_all_r);
p4b[18:0] <= p4b_sig ? (-p4b_all_r) :(p4b_all_r);
end
/* multiply the outputs of the add/sub block with the 8 sets of stored coefficients */
/* Final adder. Adding the ouputs of the 4 multipliers */
always @ (posedge clk) begin
dct2d_int1 <= ({p1b[18],p1b[18:0]} + {p2b[18],p2b[18:0]});
dct2d_int2 <= ({p3b[18],p3b[18:0]} + {p4b[18],p4b[18:0]});
dct_2d_int <= ({dct2d_int1[19],dct2d_int1[19:0]} + {dct2d_int2[19],dct2d_int2[19:0]});
if (pre_dv) dct2_out[12:0] <= dct_2d_rnd[12:0] + dct_2d_int[7];
end
assign dct_2d_rnd[12:0] = dct_2d_int[20:8];
// assign dct2_out[12:0] = dct_2d_rnd[12:0] + dct_2d_int[7];
endmodule
/******************************************************************************* /*!
* <b>Module:</b>dct1d_chen * <b>Module:</b>dct1d_chen
* @file dct1d_chen.v * @file dct1d_chen.v
* @date:2016-06-05 * @date 2016-06-05
* @author: Andrey Filippov * @author Andrey Filippov
* *
* @brief: 1d 8-point DCT based on Chen algorithm * @brief 1d 8-point DCT based on Chen algorithm
* *
* @copyright Copyright (c) 2016 Elphel, Inc. * @copyright Copyright (c) 2016 Elphel, Inc.
* *
...@@ -35,17 +35,19 @@ ...@@ -35,17 +35,19 @@
* the combined code. This permission applies to you if the distributed code * the combined code. This permission applies to you if the distributed code
* contains all the components and scripts required to completely simulate it * contains all the components and scripts required to completely simulate it
* with at least one of the Free Software programs. * with at least one of the Free Software programs.
*******************************************************************************/ */
`timescale 1ns/1ps `timescale 1ns/1ps
module dct1d_chen#( module dct1d_chen#(
parameter WIDTH = 24, parameter WIDTH = 24,
parameter OUT_WIDTH = 24, parameter OUT_WIDTH = 16,
parameter B_WIDTH = 18, parameter B_WIDTH = 18,
parameter A_WIDTH = 25, parameter A_WIDTH = 25,
parameter P_WIDTH = 48, parameter P_WIDTH = 48,
parameter M_WIDTH = 43, // actual multiplier width (== (A_WIDTH +B_WIDTH) // parameter M_WIDTH = 43, // actual multiplier width (== (A_WIDTH +B_WIDTH)
parameter ROUND_OUT = 8, // cut these number of LSBs on the output, round result (in addition to COSINE_SHIFT)
parameter COSINE_SHIFT= 17, parameter COSINE_SHIFT= 17,
parameter COS_1_16 = 128553, // (1<<17) * cos(1*pi/16) parameter COS_1_16 = 128553, // (1<<17) * cos(1*pi/16)
parameter COS_2_16 = 121095, // (2<<17) * cos(1*pi/16) parameter COS_2_16 = 121095, // (2<<17) * cos(1*pi/16)
parameter COS_3_16 = 108982, // (3<<17) * cos(1*pi/16) parameter COS_3_16 = 108982, // (3<<17) * cos(1*pi/16)
...@@ -59,11 +61,13 @@ module dct1d_chen#( ...@@ -59,11 +61,13 @@ module dct1d_chen#(
input en, input en,
input [2 * WIDTH -1:0] d10_32_76_54, // Concatenated input data {x[1],x[0]}/{x[3],x[2]}/ {x[7],x[6]}/{x[5],x[4]} input [2 * WIDTH -1:0] d10_32_76_54, // Concatenated input data {x[1],x[0]}/{x[3],x[2]}/ {x[7],x[6]}/{x[5],x[4]}
input start, // {x[1],x[0]} available next after start, {x[3],x[2]} - second next, then {x[7],x[6]} and {x[5],x[4]} input start, // {x[1],x[0]} available next after start, {x[3],x[2]} - second next, then {x[7],x[6]} and {x[5],x[4]}
output [WIDTH -1:0] dout, output [OUT_WIDTH -1:0] dout,
output reg pre2_start_out, // 2 clock cycle before F4 output, full dout sequence output reg pre2_start_out, // 2 clock cycle before F4 output, full dout sequence
// start_out-X-F4-X-F2-X-F6-F5-F0-F3-X-F1-X-F7 // start_out-X-F4-X-F2-X-F6-F5-F0-F3-X-F1-X-F7
output reg en_out // valid at the same time slot as pre2_start_out (goes active with pre2_start_out) output reg en_out // valid at the same time slot as pre2_start_out (goes active with pre2_start_out)
); );
localparam TOTAL_RSHIFT= COSINE_SHIFT + ROUND_OUT;
localparam BEFORE_SAT_WIDTH = P_WIDTH - TOTAL_RSHIFT;
reg signed [B_WIDTH-1:0] dsp_ma_bin; reg signed [B_WIDTH-1:0] dsp_ma_bin;
wire dsp_ma_ceb1_1; // load b1 register wire dsp_ma_ceb1_1; // load b1 register
wire dsp_ma_ceb2_1; // load b2 register wire dsp_ma_ceb2_1; // load b2 register
...@@ -94,6 +98,7 @@ module dct1d_chen#( ...@@ -94,6 +98,7 @@ module dct1d_chen#(
wire dsp_ma_neg_m_2; // 1 - negate multiplier result wire dsp_ma_neg_m_2; // 1 - negate multiplier result
wire dsp_ma_accum_2; // 0 - use multiplier result, 1 add to accumulator wire dsp_ma_accum_2; // 0 - use multiplier result, 1 add to accumulator
wire signed [P_WIDTH-1:0] dsp_ma_p_2; wire signed [P_WIDTH-1:0] dsp_ma_p_2;
wire signed [P_WIDTH-1:0] dsp_ma_p_mux;
// Multipler A/D inputs before shift // Multipler A/D inputs before shift
wire signed [WIDTH-1:0] dsp_ma_ain24_1; wire signed [WIDTH-1:0] dsp_ma_ain24_1;
...@@ -142,10 +147,25 @@ module dct1d_chen#( ...@@ -142,10 +147,25 @@ module dct1d_chen#(
reg [7:0] phase; reg [7:0] phase;
reg [2:0] phase_cnt; reg [2:0] phase_cnt;
reg [OUT_WIDTH -1:0] dout_r; reg [OUT_WIDTH -1:0] dout_r;
wire [OUT_WIDTH -1:0] dout1_w; // wire [OUT_WIDTH -1:0] dout1_w;
wire [OUT_WIDTH -1:0] dout2_w; // wire [OUT_WIDTH -1:0] dout2_w;
wire dout_round_c;
wire[BEFORE_SAT_WIDTH -1:0] dout_round_w; // after rounding, before (optional) saturation
reg [BEFORE_SAT_WIDTH -1:0] dout_round_r; // after rounding, before (optional) saturation
wire [OUT_WIDTH -1:0] dout_sat_w;
wire[BEFORE_SAT_WIDTH -1:0] dout_round; // after rounding, before (optional) saturation
reg [2:0] per_type; // idle/last:0, first cycle - 1, 2-nd - 2, other - 3,... ~en->6 ->7 -> 0 (to generate pre2_start_out) reg [2:0] per_type; // idle/last:0, first cycle - 1, 2-nd - 2, other - 3,... ~en->6 ->7 -> 0 (to generate pre2_start_out)
// Temporarily adding 1 extra latency cycle for rounding/saturation. TODO: Remove when moved to DSP itself
reg pre3_start_out; // 3 clock cycle before F4 output, full dout sequence
// start_out-X-F4-X-F2-X-F6-F5-F0-F3-X-F1-X-F7
reg pre_en_out; // valid at the same time slot as pre2_start_out (goes active with pre2_start_out)
// .ain ({simd_a1,simd_a0}), // input[47:0] // .ain ({simd_a1,simd_a0}), // input[47:0]
// .bin ({simd_b1,simd_b0}), // input[47:0] // .bin ({simd_b1,simd_b0}), // input[47:0]
// dsp_addsub_simd1_i input connections // dsp_addsub_simd1_i input connections
...@@ -233,7 +253,7 @@ module dct1d_chen#( ...@@ -233,7 +253,7 @@ module dct1d_chen#(
assign dsp_ma_ced_2 = phase[1] | phase[6]; assign dsp_ma_ced_2 = phase[1] | phase[6];
assign dsp_ma_sela_2 = phase[1] | phase[6]; assign dsp_ma_sela_2 = phase[1] | phase[6];
assign dsp_ma_seld_2 = phase[0] | phase[2] | phase[5] | phase[7]; assign dsp_ma_seld_2 = phase[0] | phase[2] | phase[5] | phase[7];
assign dsp_ma_neg_m_2 = phase[6]; assign dsp_ma_neg_m_2 = phase[1] | phase[6];
assign dsp_ma_accum_2 = phase[0] | phase[2] | phase[4] | phase[6]; assign dsp_ma_accum_2 = phase[0] | phase[2] | phase[4] | phase[6];
// dsp_ma2_i data input connections // dsp_ma2_i data input connections
assign dsp_ma_ain24_2 = simd_p5; assign dsp_ma_ain24_2 = simd_p5;
...@@ -255,10 +275,37 @@ module dct1d_chen#( ...@@ -255,10 +275,37 @@ module dct1d_chen#(
// assign dout1_w = dsp_ma_p_1[M_WIDTH -: WIDTH]; // adding one bit for adder (two MPY outputs are added) // assign dout1_w = dsp_ma_p_1[M_WIDTH -: WIDTH]; // adding one bit for adder (two MPY outputs are added)
// assign dout2_w = dsp_ma_p_2[M_WIDTH -: WIDTH]; // adding one bit for adder (two MPY outputs are added) // assign dout2_w = dsp_ma_p_2[M_WIDTH -: WIDTH]; // adding one bit for adder (two MPY outputs are added)
assign dout1_w = dsp_ma_p_1[COSINE_SHIFT +: WIDTH]; // adding one bit for adder (two MPY outputs are added) assign dsp_ma_p_mux = phase_cnt[0] ? dsp_ma_p_1 : dsp_ma_p_2;
assign dout2_w = dsp_ma_p_2[COSINE_SHIFT +: WIDTH]; // adding one bit for adder (two MPY outputs are added)
// assign dout1_w = dsp_ma_p_1[COSINE_SHIFT +: OUT_WIDTH]; // adding one bit for adder (two MPY outputs are added)
// assign dout2_w = dsp_ma_p_2[COSINE_SHIFT +: OUT_WIDTH]; // adding one bit for adder (two MPY outputs are added)
assign dout_round_c = dsp_ma_p_mux[TOTAL_RSHIFT-1];
assign dout_round_w = dsp_ma_p_mux[TOTAL_RSHIFT +: BEFORE_SAT_WIDTH] + dout_round_c;
// Saturation (only if BEFORE_SAT_WIDTH > OUT_WIDTH)
localparam TRIM_MSB = BEFORE_SAT_WIDTH - OUT_WIDTH;
generate
if (TRIM_MSB < 0) begin // should never happen
assign dout_sat_w = { {(-TRIM_MSB){dout_round[BEFORE_SAT_WIDTH-1]}},dout_round };
end else if (TRIM_MSB == 0) begin
assign dout_sat_w = dout_round[0 +: OUT_WIDTH];
end else begin //! saturate. TODO: Maybe (and also symmetric rounding) can be done in DSP itself using masks?
assign dout_sat_w = (dout_round[BEFORE_SAT_WIDTH-1 -: TRIM_MSB] == {TRIM_MSB{dout_round[BEFORE_SAT_WIDTH-1]}})?
dout_round[0 +: OUT_WIDTH]:
{dout_round[BEFORE_SAT_WIDTH-1], {OUT_WIDTH-1{~dout_round[BEFORE_SAT_WIDTH-1]}}};
end
endgenerate
// to possibly remove registers with generate
assign dout_round= dout_round_r;
//BEFORE_SAT_WIDTH
// wire dout_round_c;
// wire [OUT_WIDTH -1:0] dout_round_w;
//ROUND_OUT
//phase_cnt[0] ? dout1_w : dout2_w;
assign dout = dout_r; assign dout = dout_r;
always @ (posedge clk) begin always @ (posedge clk) begin
...@@ -284,16 +331,24 @@ module dct1d_chen#( ...@@ -284,16 +331,24 @@ module dct1d_chen#(
3'h6: dsp_ma_bin <= COS_4_16; 3'h6: dsp_ma_bin <= COS_4_16;
3'h7: dsp_ma_bin <= COS_6_16; 3'h7: dsp_ma_bin <= COS_6_16;
endcase endcase
dout_r <= phase_cnt[0] ? dout1_w : dout2_w; // dout_r <= phase_cnt[0] ? dout1_w : dout2_w;
dout_round_r <= dout_round_w;
dout_r <= dout_sat_w;
if (rst) pre3_start_out <= 0;
else pre3_start_out <= (per_type == 2) && phase[3];
if (rst) pre2_start_out <= 0; pre2_start_out <=pre3_start_out;
else pre2_start_out <= (per_type == 2) && phase[3];
if (rst || !(en || (|phase))) en_out <= 0;
if (rst || !(en || (|phase))) pre_en_out <= 0;
else if (phase[3]) begin else if (phase[3]) begin
if (per_type == 2) en_out <= 1; if (per_type == 2) pre_en_out <= 1;
else if (per_type[2]) en_out <= 0; else if (per_type[2]) pre_en_out <= 0;
end end
en_out <= pre_en_out;
end end
dsp_addsub_simd #( dsp_addsub_simd #(
......
/******************************************************************************* /*!
* <b>Module:</b>dct1d_chen_reorder_in * <b>Module:</b>dct1d_chen_reorder_in
* @file dct1d_chen_reorder_in.v * @file dct1d_chen_reorder_in.v
* @date:2016-06-08 * @date 2016-06-08
* @author: Andrey Filippov * @author Andrey Filippov
* *
* @brief: Reorder scan-line pixel stream for dct1d_chen module * @brief Reorder scan-line pixel stream for dct1d_chen module
* *
* @copyright Copyright (c) 2016 Elphel, Inc. * @copyright Copyright (c) 2016 Elphel, Inc.
* *
...@@ -35,7 +35,7 @@ ...@@ -35,7 +35,7 @@
* the combined code. This permission applies to you if the distributed code * the combined code. This permission applies to you if the distributed code
* contains all the components and scripts required to completely simulate it * contains all the components and scripts required to completely simulate it
* with at least one of the Free Software programs. * with at least one of the Free Software programs.
*******************************************************************************/ */
`timescale 1ns/1ps `timescale 1ns/1ps
module dct1d_chen_reorder_in#( module dct1d_chen_reorder_in#(
......
/******************************************************************************* /*!
* <b>Module:</b>dct1d_chen_reorder_out * <b>Module:</b>dct1d_chen_reorder_out
* @file dct1d_chen_reorder_out.v * @file dct1d_chen_reorder_out.v
* @date:2016-06-08 * @date 2016-06-08
* @author: Andrey Filippov * @author Andrey Filippov
* *
* @brief: Reorder data from dct1d_chen output to natural sequence * @brief Reorder data from dct1d_chen output to natural sequence
* *
* @copyright Copyright (c) 2016 Elphel, Inc. * @copyright Copyright (c) 2016 Elphel, Inc.
* *
...@@ -35,7 +35,7 @@ ...@@ -35,7 +35,7 @@
* the combined code. This permission applies to you if the distributed code * the combined code. This permission applies to you if the distributed code
* contains all the components and scripts required to completely simulate it * contains all the components and scripts required to completely simulate it
* with at least one of the Free Software programs. * with at least one of the Free Software programs.
*******************************************************************************/ */
`timescale 1ns/1ps `timescale 1ns/1ps
module dct1d_chen_reorder_out#( module dct1d_chen_reorder_out#(
...@@ -62,6 +62,7 @@ module dct1d_chen_reorder_out#( ...@@ -62,6 +62,7 @@ module dct1d_chen_reorder_out#(
reg [2:0] per_type; // idle/last:0, first cycle - 1, 2-nd - 2, other - 3,... ~en->6 ->7 -> 0 (to generate pre2_start_out) reg [2:0] per_type; // idle/last:0, first cycle - 1, 2-nd - 2, other - 3,... ~en->6 ->7 -> 0 (to generate pre2_start_out)
reg start_out_r; reg start_out_r;
reg en_out_r; reg en_out_r;
wire stop_out; // qualify with en
assign dout = dout_r; assign dout = dout_r;
assign start_out = start_out_r; assign start_out = start_out_r;
assign en_out = en_out_r; assign en_out = en_out_r;
...@@ -98,16 +99,30 @@ module dct1d_chen_reorder_out#( ...@@ -98,16 +99,30 @@ module dct1d_chen_reorder_out#(
if ((per_type == 2) && (cntr_in == 1)) raddr <= {~cntr_in[3], 3'b0}; if ((per_type == 2) && (cntr_in == 1)) raddr <= {~cntr_in[3], 3'b0};
else if ((raddr[2:0] != 0) || (per_type !=0)) raddr <= raddr + 1; else if ((raddr[2:0] != 0) || (per_type !=0)) raddr <= raddr + 1;
dout_r <= reord_buf_ram[raddr]; if (en_out_r) dout_r <= reord_buf_ram[raddr];
start_out_r <= (per_type == 2) && (cntr_in == 1); start_out_r <= (per_type == 2) && (cntr_in == 1);
if (rst ||(per_type == 0) ) en_out_r <= 0; if (rst ||(per_type == 0) ) en_out_r <= 0;
else if (cntr_in == 1) en_out_r <= (per_type == 2) || !per_type[2]; // else if (cntr_in == 1) en_out_r <= (per_type == 2) || !per_type[2];
else if ((cntr_in == 1) && (per_type == 2)) en_out_r <= 1;
else if (stop_out && !en) en_out_r <= 0;
//stop_out
dv <= en_out_r;
if (rst) dv <= 0; // if (rst) dv <= 0;
else if (start_out_r) dv <= 1; // else if (start_out_r) dv <= 1;
else if ((raddr[2:0] == 0) && !en_out_r) dv <= 0; // else if ((raddr[2:0] == 0) && !en_out_r) dv <= 0;
end end
dly01_16 dly01_16_i (
.clk (clk), // input
.rst (rst), // input
.dly (4'd8), // input[3:0]
.din ((&cntr_in[2:0]) && !en), // input
.dout (stop_out) // output
);
endmodule endmodule
/******************************************************************************* /*!
* <b>Module:</b>dct2d8x8_chen * <b>Module:</b>dct2d8x8_chen
* @file dct2d8x8_chen.v * @file dct2d8x8_chen.v
* @date:2016-06-10 * @date 2016-06-10
* @author: Andrey Filippov * @author Andrey Filippov
* *
* @brief: 2-d DCT implementation of Chen algorithm * @brief 2-d DCT implementation of Chen algorithm
* *
* @copyright Copyright (c) 2016 Elphel, Inc. * @copyright Copyright (c) 2016 Elphel, Inc.
* *
...@@ -35,7 +35,7 @@ ...@@ -35,7 +35,7 @@
* the combined code. This permission applies to you if the distributed code * the combined code. This permission applies to you if the distributed code
* contains all the components and scripts required to completely simulate it * contains all the components and scripts required to completely simulate it
* with at least one of the Free Software programs. * with at least one of the Free Software programs.
*******************************************************************************/ */
`timescale 1ns/1ps `timescale 1ns/1ps
module dct2d8x8_chen#( module dct2d8x8_chen#(
...@@ -45,13 +45,13 @@ module dct2d8x8_chen#( ...@@ -45,13 +45,13 @@ module dct2d8x8_chen#(
parameter STAGE2_SAFE_BITS = 3, // leave this number of extra bits on DCT1D input to prevent output saturation parameter STAGE2_SAFE_BITS = 3, // leave this number of extra bits on DCT1D input to prevent output saturation
parameter TRANSPOSE_WIDTH = 16, // transpose memory width parameter TRANSPOSE_WIDTH = 16, // transpose memory width
parameter TRIM_STAGE_1 = 1, // Trim these MSBs from the stage1 results (1 - matches old DCT) parameter TRIM_STAGE_1 = 1, // Trim these MSBs from the stage1 results (1 - matches old DCT)
parameter TRIM_STAGE_2 = 2, // Trim these MSBs from the stage2 results TODO: put real value parameter TRIM_STAGE_2 = 0, // Trim these MSBs from the stage2 results
parameter DSP_WIDTH = 24, parameter DSP_WIDTH = 24,
parameter DSP_OUT_WIDTH = 24, // parameter DSP_OUT_WIDTH = 24,
parameter DSP_B_WIDTH = 18, parameter DSP_B_WIDTH = 18,
parameter DSP_A_WIDTH = 25, parameter DSP_A_WIDTH = 25,
parameter DSP_P_WIDTH = 48, parameter DSP_P_WIDTH = 48
parameter DSP_M_WIDTH = 43 // actual multiplier width (== (A_WIDTH +B_WIDTH) // parameter DSP_M_WIDTH = 43 // actual multiplier width (== (A_WIDTH +B_WIDTH)
) ( ) (
input clk, /// system clock, posedge input clk, /// system clock, posedge
input rst, // sync reset input rst, // sync reset
...@@ -68,6 +68,8 @@ module dct2d8x8_chen#( ...@@ -68,6 +68,8 @@ module dct2d8x8_chen#(
localparam REPLICATE_IN_STAGE2 = STAGE2_SAFE_BITS; localparam REPLICATE_IN_STAGE2 = STAGE2_SAFE_BITS;
localparam PAD_IN_STAGE2 = DSP_WIDTH - TRANSPOSE_WIDTH - STAGE2_SAFE_BITS ; localparam PAD_IN_STAGE2 = DSP_WIDTH - TRANSPOSE_WIDTH - STAGE2_SAFE_BITS ;
localparam ROUND_STAGE1 = DSP_WIDTH - TRANSPOSE_WIDTH - TRIM_STAGE_1;
localparam ROUND_STAGE2 = DSP_WIDTH - OUTPUT_WIDTH - TRIM_STAGE_2;
reg signed [INPUT_WIDTH-1:0] xin_r; reg signed [INPUT_WIDTH-1:0] xin_r;
...@@ -82,7 +84,7 @@ module dct2d8x8_chen#( ...@@ -82,7 +84,7 @@ module dct2d8x8_chen#(
wire signed [DSP_WIDTH-1:0] dct1in_pad_h; wire signed [DSP_WIDTH-1:0] dct1in_pad_h;
wire signed [DSP_WIDTH-1:0] dct1in_pad_l; wire signed [DSP_WIDTH-1:0] dct1in_pad_l;
wire signed [DSP_OUT_WIDTH-1:0] dct1_out; wire signed [TRANSPOSE_WIDTH-1:0] dct1_out;
wire stage1_pre2_start_out; wire stage1_pre2_start_out;
// wire stage1_pre2_en_out; // wire stage1_pre2_en_out;
...@@ -94,20 +96,43 @@ module dct2d8x8_chen#( ...@@ -94,20 +96,43 @@ module dct2d8x8_chen#(
wire signed [DSP_WIDTH-1:0] dct2in_pad_h; wire signed [DSP_WIDTH-1:0] dct2in_pad_h;
wire signed [DSP_WIDTH-1:0] dct2in_pad_l; wire signed [DSP_WIDTH-1:0] dct2in_pad_l;
wire signed [DSP_OUT_WIDTH-1:0] dct2_out; wire signed [OUTPUT_WIDTH-1:0] dct2_out;
wire stage2_pre2_start_out; wire stage2_pre2_start_out;
wire stage2_pre2_en_out; wire stage2_pre2_en_out;
wire signed [OUTPUT_WIDTH-1:0] dct2_trimmed; // wire signed [OUTPUT_WIDTH-1:0] dct2_trimmed;
assign dct1in_pad_h = {{REPLICATE_IN_STAGE1{dct1in_h[INPUT_WIDTH-1]}}, dct1in_h, {PAD_IN_STAGE1{1'b0}}}; assign dct1in_pad_h = {{REPLICATE_IN_STAGE1{dct1in_h[INPUT_WIDTH-1]}}, dct1in_h, {PAD_IN_STAGE1{1'b0}}};
assign dct1in_pad_l = {{REPLICATE_IN_STAGE1{dct1in_l[INPUT_WIDTH-1]}}, dct1in_l, {PAD_IN_STAGE1{1'b0}}}; assign dct1in_pad_l = {{REPLICATE_IN_STAGE1{dct1in_l[INPUT_WIDTH-1]}}, dct1in_l, {PAD_IN_STAGE1{1'b0}}};
assign transpose_din = dct1_out[DSP_OUT_WIDTH-1-TRIM_STAGE_1 -:TRANSPOSE_WIDTH]; assign transpose_din = dct1_out;
/*
generate
if (TRIM_STAGE_1 == 0) begin
assign transpose_din = dct1_out[DSP_OUT_WIDTH-1 -:TRANSPOSE_WIDTH];
end else begin //! saturate. TODO: Maybe (and also symmetric rounding) can be done in DSP itself using masks?
assign transpose_din = (dct1_out[DSP_OUT_WIDTH-1 -: TRIM_STAGE_1] == {TRIM_STAGE_1{dct1_out[DSP_OUT_WIDTH-1]}})?
dct1_out[DSP_OUT_WIDTH-1-TRIM_STAGE_1 -: TRANSPOSE_WIDTH]:
{dct1_out[DSP_OUT_WIDTH-1], {TRANSPOSE_WIDTH-1{~dct1_out[DSP_OUT_WIDTH-1]}}};
end
endgenerate
*/
assign dct2in_pad_h = {{REPLICATE_IN_STAGE2{transpose_douth[TRANSPOSE_WIDTH-1]}}, transpose_douth, {PAD_IN_STAGE2{1'b0}}}; assign dct2in_pad_h = {{REPLICATE_IN_STAGE2{transpose_douth[TRANSPOSE_WIDTH-1]}}, transpose_douth, {PAD_IN_STAGE2{1'b0}}};
assign dct2in_pad_l = {{REPLICATE_IN_STAGE2{transpose_doutl[TRANSPOSE_WIDTH-1]}}, transpose_doutl, {PAD_IN_STAGE2{1'b0}}}; assign dct2in_pad_l = {{REPLICATE_IN_STAGE2{transpose_doutl[TRANSPOSE_WIDTH-1]}}, transpose_doutl, {PAD_IN_STAGE2{1'b0}}};
assign dct2_trimmed = dct2_out[DSP_OUT_WIDTH-1-TRIM_STAGE_2 -:OUTPUT_WIDTH]; // assign dct2_trimmed = dct2_out;
/*
generate
if (TRIM_STAGE_2 == 0) begin
assign dct2_trimmed = dct2_out[DSP_OUT_WIDTH-1 -: OUTPUT_WIDTH];
end else begin //! saturate. Maybe (and also symmetric rounding) can be done in DSP itself using masks?
assign dct2_trimmed = (dct2_out[DSP_OUT_WIDTH-1 -: TRIM_STAGE_2] == {TRIM_STAGE_2{dct2_out[DSP_OUT_WIDTH-1]}})?
dct2_out[DSP_OUT_WIDTH-1-TRIM_STAGE_2 -:OUTPUT_WIDTH]:
{dct2_out[DSP_OUT_WIDTH-1], {OUTPUT_WIDTH-1{~dct2_out[DSP_OUT_WIDTH-1]}}};
end
endgenerate
*/
always @(posedge clk) begin always @(posedge clk) begin
start_in_r <= start; start_in_r <= start;
...@@ -141,11 +166,11 @@ module dct2d8x8_chen#( ...@@ -141,11 +166,11 @@ module dct2d8x8_chen#(
wire dbg_stage1_pre2_en_out; wire dbg_stage1_pre2_en_out;
dct1d_chen #( dct1d_chen #(
.WIDTH (DSP_WIDTH), .WIDTH (DSP_WIDTH),
.OUT_WIDTH (DSP_OUT_WIDTH), .OUT_WIDTH (TRANSPOSE_WIDTH), // DSP_OUT_WIDTH),
.B_WIDTH (DSP_B_WIDTH), .B_WIDTH (DSP_B_WIDTH),
.A_WIDTH (DSP_A_WIDTH), .A_WIDTH (DSP_A_WIDTH),
.P_WIDTH (DSP_P_WIDTH), .P_WIDTH (DSP_P_WIDTH),
.M_WIDTH (DSP_M_WIDTH) .ROUND_OUT (ROUND_STAGE1) // cut these number of LSBs on the output, round result (in addition to COSINE_SHIFT)
) dct1d_chen_stage1_i ( ) dct1d_chen_stage1_i (
.clk (clk), // input .clk (clk), // input
.rst (rst), // input .rst (rst), // input
...@@ -170,12 +195,12 @@ module dct2d8x8_chen#( ...@@ -170,12 +195,12 @@ module dct2d8x8_chen#(
); );
dct1d_chen #( dct1d_chen #(
.WIDTH(DSP_WIDTH), .WIDTH (DSP_WIDTH),
.OUT_WIDTH(DSP_OUT_WIDTH), .OUT_WIDTH (OUTPUT_WIDTH),
.B_WIDTH(DSP_B_WIDTH), .B_WIDTH (DSP_B_WIDTH),
.A_WIDTH(DSP_A_WIDTH), .A_WIDTH (DSP_A_WIDTH),
.P_WIDTH(DSP_P_WIDTH), .P_WIDTH (DSP_P_WIDTH),
.M_WIDTH(DSP_M_WIDTH) .ROUND_OUT (ROUND_STAGE2) // cut these number of LSBs on the output, round result (in addition to COSINE_SHIFT)
) dct1d_chen_stage2_i ( ) dct1d_chen_stage2_i (
.clk (clk), // input .clk (clk), // input
.rst (rst), // input .rst (rst), // input
...@@ -193,7 +218,7 @@ module dct2d8x8_chen#( ...@@ -193,7 +218,7 @@ module dct2d8x8_chen#(
.clk (clk), // input .clk (clk), // input
.rst (rst), // input .rst (rst), // input
.en (stage2_pre2_en_out), // input .en (stage2_pre2_en_out), // input
.din (dct2_trimmed), // input[23:0] .din (dct2_out), // input[23:0]
.pre2_start (stage2_pre2_start_out), // input .pre2_start (stage2_pre2_start_out), // input
.dout (d_out), // output[23:0] .dout (d_out), // output[23:0]
.start_out (pre_first_out), // output reg .start_out (pre_first_out), // output reg
...@@ -202,13 +227,16 @@ module dct2d8x8_chen#( ...@@ -202,13 +227,16 @@ module dct2d8x8_chen#(
); );
// Just for debugging/comparing with old 1-d DCT: // Just for debugging/comparing with old 1-d DCT:
wire [DSP_WIDTH-1:0] dbg_d_out; `ifdef SIMULATION // no sense to synthesize it
`ifdef DEBUG_DCT1D
wire [TRANSPOSE_WIDTH-1:0] dbg_d_out;
//wire [15:0] dbg_d_out13=dbg_d_out[7 +: 16] ;
wire dbg_dv; wire dbg_dv;
wire dbg_en_out; wire dbg_en_out;
wire dbg_pre_first_out; wire dbg_pre_first_out;
dct1d_chen_reorder_out #( dct1d_chen_reorder_out #(
.WIDTH (DSP_WIDTH) .WIDTH (TRANSPOSE_WIDTH)
) dct1d_chen_reorder_out_dbg_i ( ) dct1d_chen_reorder_out_dbg_i (
.clk (clk), // input .clk (clk), // input
.rst (rst), // input .rst (rst), // input
...@@ -220,5 +248,7 @@ wire dbg_pre_first_out; ...@@ -220,5 +248,7 @@ wire dbg_pre_first_out;
.dv (dbg_dv), // output reg .dv (dbg_dv), // output reg
.en_out (dbg_en_out) // output reg .en_out (dbg_en_out) // output reg
); );
`endif
`endif
endmodule endmodule
/******************************************************************************* /*!
* <b>Module:</b>dct_chen_transpose * <b>Module:</b>dct_chen_transpose
* @file dct_chen_transpose.v * @file dct_chen_transpose.v
* @date:2016-06-09 * @date 2016-06-09
* @author: Andrey Filippov * @author Andrey Filippov
* *
* @brief: Reorder+transpose data between two 1-d DCT passes * @brief Reorder+transpose data between two 1-d DCT passes
* *
* @copyright Copyright (c) 2016 Elphel, Inc. * @copyright Copyright (c) 2016 Elphel, Inc.
* *
...@@ -35,7 +35,7 @@ ...@@ -35,7 +35,7 @@
* the combined code. This permission applies to you if the distributed code * the combined code. This permission applies to you if the distributed code
* contains all the components and scripts required to completely simulate it * contains all the components and scripts required to completely simulate it
* with at least one of the Free Software programs. * with at least one of the Free Software programs.
*******************************************************************************/ */
`timescale 1ns/1ps `timescale 1ns/1ps
module dct_chen_transpose#( module dct_chen_transpose#(
...@@ -70,6 +70,7 @@ module dct_chen_transpose#( ...@@ -70,6 +70,7 @@ module dct_chen_transpose#(
reg [2*WIDTH-1:0] ram_reg2; reg [2*WIDTH-1:0] ram_reg2;
wire pre_rstart_w = wcntr[5:0] == 61; wire pre_rstart_w = wcntr[5:0] == 61;
reg [1:0] rstop_r; reg [1:0] rstop_r;
reg first_after_pause; // first block after pause - do not write 2 items to the "past"
assign wpage = wcntr[6] ^ wrow_mod[3]; // previous page for row 0, col 1 & 3 assign wpage = wcntr[6] ^ wrow_mod[3]; // previous page for row 0, col 1 & 3
assign wrow_mod = {1'b0, wrow} - wcol13; assign wrow_mod = {1'b0, wrow} - wcol13;
...@@ -93,7 +94,7 @@ module dct_chen_transpose#( ...@@ -93,7 +94,7 @@ module dct_chen_transpose#(
else if (pre_we_r) wcntr <= wcntr + 1; // including page, should be before 'if (pre2_start)' else if (pre_we_r) wcntr <= wcntr + 1; // including page, should be before 'if (pre2_start)'
else if (pre2_start) wcntr <= {wcntr[6], 6'b0}; // if happens during pre_we_r - will be ignored, otherwise (after pause) will zero in-page adderss else if (pre2_start) wcntr <= {wcntr[6], 6'b0}; // if happens during pre_we_r - will be ignored, otherwise (after pause) will zero in-page adderss
we_r <= pre_we_r; we_r <= pre_we_r && (!first_after_pause || !wcol13 || (|wrow)); // do not write first after pause to the "past"
if (we_r) transpose_ram[waddr] <= din; if (we_r) transpose_ram[waddr] <= din;
...@@ -118,6 +119,11 @@ module dct_chen_transpose#( ...@@ -118,6 +119,11 @@ module dct_chen_transpose#(
if (rst) en_out <= 0; if (rst) en_out <= 0;
else if (rcntr == 1) en_out <= 1; else if (rcntr == 1) en_out <= 1;
else if (rstop_r[1]) en_out <= 0; else if (rstop_r[1]) en_out <= 0;
if (rst) first_after_pause <= 0;
else if (pre2_start && !we_r) first_after_pause <= 1;
else if (&wcntr[5:0]) first_after_pause <= 0;
end end
dly01_16 dly01_16_stop_i ( dly01_16 dly01_16_stop_i (
......
/******************************************************************************* /*!
* <b>Module:</b>dsp_addsub_simd * <b>Module:</b>dsp_addsub_simd
* @file dsp_addsub_simd.v * @file dsp_addsub_simd.v
* @date:2016-06-05 * @date 2016-06-05
* @author: Andrey Filippov * @author Andrey Filippov
* *
* @brief: SIMD adder/subtracter * @brief SIMD adder/subtracter
* *
* @copyright Copyright (c) 2016 Elphel, Inc. * @copyright Copyright (c) 2016 Elphel, Inc.
* *
...@@ -35,7 +35,7 @@ ...@@ -35,7 +35,7 @@
* the combined code. This permission applies to you if the distributed code * the combined code. This permission applies to you if the distributed code
* contains all the components and scripts required to completely simulate it * contains all the components and scripts required to completely simulate it
* with at least one of the Free Software programs. * with at least one of the Free Software programs.
*******************************************************************************/ */
`timescale 1ns/1ps `timescale 1ns/1ps
module dsp_addsub_simd#( module dsp_addsub_simd#(
...@@ -70,7 +70,7 @@ module dsp_addsub_simd#( ...@@ -70,7 +70,7 @@ module dsp_addsub_simd#(
DSP48E1 #( DSP48E1 #(
.ACASCREG (1), .ACASCREG (1),
.ADREG (0), // (1), .ADREG (1),
.ALUMODEREG (1), .ALUMODEREG (1),
.AREG (1), // (1) .AREG (1), // (1)
.AUTORESET_PATDET ("NO_RESET"), .AUTORESET_PATDET ("NO_RESET"),
...@@ -81,7 +81,7 @@ module dsp_addsub_simd#( ...@@ -81,7 +81,7 @@ module dsp_addsub_simd#(
.CARRYINREG (1), .CARRYINREG (1),
.CARRYINSELREG (1), .CARRYINSELREG (1),
.CREG (1), //(1), .CREG (1), //(1),
.DREG (0), //(1), .DREG (1),
.INMODEREG (1), .INMODEREG (1),
.IS_ALUMODE_INVERTED (4'b0), .IS_ALUMODE_INVERTED (4'b0),
.IS_CARRYIN_INVERTED (1'b0), .IS_CARRYIN_INVERTED (1'b0),
...@@ -131,7 +131,7 @@ module dsp_addsub_simd#( ...@@ -131,7 +131,7 @@ module dsp_addsub_simd#(
.CECTRL (1'b1), // input .CECTRL (1'b1), // input
.CED (1'b0), // input .CED (1'b0), // input
.CEINMODE (1'b1), // input .CEINMODE (1'b1), // input
.CEM (1'b1), // input .CEM (1'b0), // input
.CEP (cep), // input .CEP (cep), // input
.CLK (clk), // input .CLK (clk), // input
.D (25'h1ffffff),// input[24:0] .D (25'h1ffffff),// input[24:0]
...@@ -145,9 +145,9 @@ module dsp_addsub_simd#( ...@@ -145,9 +145,9 @@ module dsp_addsub_simd#(
.RSTB (rst), // input .RSTB (rst), // input
.RSTC (rst), // input .RSTC (rst), // input
.RSTCTRL (rst), // input .RSTCTRL (rst), // input
.RSTD (rst), // input .RSTD (1'b0), // input
.RSTINMODE (rst), // input .RSTINMODE (rst), // input
.RSTM (rst), // input .RSTM (1'b0), // input
.RSTP (rst) // input .RSTP (rst) // input
); );
`else `else
......
/******************************************************************************* /*!
* dsp_ma * dsp_ma
* @file dsp_ma.v * @file dsp_ma.v
* @date:2016-06-05 * @date 2016-06-05
* @author: Andrey Filippov * @author Andrey Filippov
* *
* @brief: DSP with multi-input multiplier and accumulator * @brief DSP with multi-input multiplier and accumulator
* *
* @copyright Copyright (c) 2016 Elphel, Inc. * @copyright Copyright (c) 2016 Elphel, Inc.
* *
...@@ -35,7 +35,7 @@ ...@@ -35,7 +35,7 @@
* the combined code. This permission applies to you if the distributed code * the combined code. This permission applies to you if the distributed code
* contains all the components and scripts required to completely simulate it * contains all the components and scripts required to completely simulate it
* with at least one of the Free Software programs. * with at least one of the Free Software programs.
*******************************************************************************/ */
`timescale 1ns/1ps `timescale 1ns/1ps
module dsp_ma #( module dsp_ma #(
......
/******************************************************************************* /*!
* dsp_ma_preadd * dsp_ma_preadd
* @file dsp_ma_preadd.v * @file dsp_ma_preadd.v
* @date:2016-06-05 * @date 2016-06-05
* @author: Andrey Filippov * @author Andrey Filippov
* *
* @brief: DSP with multi-input multiplier and accumulator with pre-adder * @brief DSP with multi-input multiplier and accumulator with pre-adder
* *
* @copyright Copyright (c) 2016 Elphel, Inc. * @copyright Copyright (c) 2016 Elphel, Inc.
* *
...@@ -35,7 +35,7 @@ ...@@ -35,7 +35,7 @@
* the combined code. This permission applies to you if the distributed code * the combined code. This permission applies to you if the distributed code
* contains all the components and scripts required to completely simulate it * contains all the components and scripts required to completely simulate it
* with at least one of the Free Software programs. * with at least one of the Free Software programs.
*******************************************************************************/ */
`timescale 1ns/1ps `timescale 1ns/1ps
module dsp_ma_preadd #( module dsp_ma_preadd #(
......
...@@ -35,7 +35,9 @@ ...@@ -35,7 +35,9 @@
* contains all the components and scripts required to completely simulate it * contains all the components and scripts required to completely simulate it
* with at least one of the Free Software programs. * with at least one of the Free Software programs.
*/ */
parameter FPGA_VERSION = 32'h03930096; // serial, next parameter FPGA_VERSION = 32'h03930098; // serial, trying dct_chen - works, removing old completely
// parameter FPGA_VERSION = 32'h03930097; // serial, trying dct_chen - works
// parameter FPGA_VERSION = 32'h03930096; // serial, next (before changing DCT)
// parameter FPGA_VERSION = 32'h03930095; // parallel -0.068/-0.342/5 82.38% // parameter FPGA_VERSION = 32'h03930095; // parallel -0.068/-0.342/5 82.38%
// parameter FPGA_VERSION = 32'h03930094; // hispi, disabling debug -0.187/-1.252/16 84.14% // parameter FPGA_VERSION = 32'h03930094; // hispi, disabling debug -0.187/-1.252/16 84.14%
// parameter FPGA_VERSION = 32'h03930093; // hispi, masking sensor data to memory buffer, debug still on // parameter FPGA_VERSION = 32'h03930093; // hispi, masking sensor data to memory buffer, debug still on
......
...@@ -1104,6 +1104,10 @@ write_sensor_i2c 0 1 0 0x302e0010 ...@@ -1104,6 +1104,10 @@ write_sensor_i2c 0 1 0 0x302e0010
#Exposure 0x800 lines #Exposure 0x800 lines
write_sensor_i2c 0 1 0 0x30120800 write_sensor_i2c 0 1 0 0x30120800
#test - running 8, 8-bit
write_sensor_i2c 0 1 0 0x30700101
################## Serial - chn3 #################### ################## Serial - chn3 ####################
cd /usr/local/verilog/; test_mcntrl.py @hargs cd /usr/local/verilog/; test_mcntrl.py @hargs
bitstream_set_path /usr/local/verilog/x393_hispi.bit bitstream_set_path /usr/local/verilog/x393_hispi.bit
...@@ -1124,7 +1128,7 @@ write_sensor_i2c 3 1 0 0x3028000a ...@@ -1124,7 +1128,7 @@ write_sensor_i2c 3 1 0 0x3028000a
write_sensor_i2c 3 1 0 0x302c000d write_sensor_i2c 3 1 0 0x302c000d
write_sensor_i2c 3 1 0 0x302e0010 write_sensor_i2c 3 1 0 0x302e0010
#exposure #exposure
write_sensor_i2c 3 1 0 0x30120200 write_sensor_i2c 3 1 0 0x30120800
compressor_control 3 2 compressor_control 3 2
......
...@@ -42,6 +42,8 @@ ...@@ -42,6 +42,8 @@
`define SYSTEM_DEFINES `define SYSTEM_DEFINES
// TODO: Later compare instantiate/infer // TODO: Later compare instantiate/infer
`define INSTANTIATE_DSP48E1 `define INSTANTIATE_DSP48E1
`define DEBUG_DCT1D // undefine after debugging is over
// `define USE_OLD_DCT
// Parameters from x393_sata project // Parameters from x393_sata project
`define USE_DRP `define USE_DRP
......
...@@ -38,7 +38,7 @@ PROJECT_NUMBER = 1.0 ...@@ -38,7 +38,7 @@ PROJECT_NUMBER = 1.0
# If a relative path is entered, it will be relative to the location # If a relative path is entered, it will be relative to the location
# where doxygen was started. If left blank the current directory will be used. # where doxygen was started. If left blank the current directory will be used.
OUTPUT_DIRECTORY = OUTPUT_DIRECTORY = x393_docs
# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create # If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
# 4096 sub-directories (in 2 levels) under the output directory of each output # 4096 sub-directories (in 2 levels) under the output directory of each output
......
[*] [*]
[*] GTKWave Analyzer v3.3.66 (w)1999-2015 BSI [*] GTKWave Analyzer v3.3.66 (w)1999-2015 BSI
[*] Sun Jun 12 10:04:58 2016 [*] Mon Jun 13 02:28:45 2016
[*] [*]
[dumpfile] "/home/andrey/git/x393/simulation/x393_testbench03-20160612033213998.fst" [dumpfile] "/home/andrey/git/x393/simulation/x393_testbench03-20160612183504062.fst"
[dumpfile_mtime] "Sun Jun 12 09:48:19 2016" [dumpfile_mtime] "Mon Jun 13 00:51:06 2016"
[dumpfile_size] 85326946 [dumpfile_size] 85539825
[savefile] "/home/andrey/git/x393/x393_testbench04.sav" [savefile] "/home/andrey/git/x393/x393_testbench04.sav"
[timestart] 90696800 [timestart] 74900000
[size] 1823 1180 [size] 1823 1180
[pos] 0 0 [pos] 0 0
*-15.073349 90841667 209370000 209396667 209423333 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 *-22.194141 94601000 209370000 209396667 209423333 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
[treeopen] x393_testbench03. [treeopen] x393_testbench03.
[treeopen] x393_testbench03.read_compressor_frame_irq. [treeopen] x393_testbench03.read_compressor_frame_irq.
[treeopen] x393_testbench03.read_contol_register_irq. [treeopen] x393_testbench03.read_contol_register_irq.
...@@ -33,6 +33,11 @@ ...@@ -33,6 +33,11 @@
[treeopen] x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i. [treeopen] x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.
[treeopen] x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.xdct393_i. [treeopen] x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.xdct393_i.
[treeopen] x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1]. [treeopen] x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].
[treeopen] x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.
[treeopen] x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.cmprs_macroblock_buf_iface_i.
[treeopen] x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.
[treeopen] x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.
[treeopen] x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.xdct393_i.
[treeopen] x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[2]. [treeopen] x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[2].
[treeopen] x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[3]. [treeopen] x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[3].
[treeopen] x393_testbench03.x393_i.compressor393_i.genblk3. [treeopen] x393_testbench03.x393_i.compressor393_i.genblk3.
...@@ -62,10 +67,10 @@ ...@@ -62,10 +67,10 @@
[treeopen] x393_testbench03.x393_i.sensors393_i.sensor_channel_block[3]. [treeopen] x393_testbench03.x393_i.sensors393_i.sensor_channel_block[3].
[treeopen] x393_testbench03.x393_i.sensors393_i.sensor_channel_block[3].sensor_channel_i. [treeopen] x393_testbench03.x393_i.sensors393_i.sensor_channel_block[3].sensor_channel_i.
[treeopen] x393_testbench03.x393_i.sensors393_i.sensor_channel_block[3].sensor_channel_i.sensor_i2c_io_i. [treeopen] x393_testbench03.x393_i.sensors393_i.sensor_channel_block[3].sensor_channel_i.sensor_i2c_io_i.
[sst_width] 238 [sst_width] 395
[signals_width] 260 [signals_width] 338
[sst_expanded] 1 [sst_expanded] 1
[sst_vpaned_height] 420 [sst_vpaned_height] 421
@820 @820
x393_testbench03.TEST_TITLE[639:0] x393_testbench03.TEST_TITLE[639:0]
@c00200 @c00200
...@@ -1600,7 +1605,164 @@ x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.xdct ...@@ -1600,7 +1605,164 @@ x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.xdct
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.xdct393_i.dv x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.xdct393_i.dv
@420 @420
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.xdct393_i.d_out[12:0] x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.xdct393_i.d_out[12:0]
@200
-dct_chen_out
@420
[color] 2
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.d_out[12:0]
@800200
-chn1
@200
-xdct
@28
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.xdct393_i.start
@420
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.xdct393_i.tm_di[15:0]
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.xdct393_i.d_out[12:0]
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.xdct_dout_debug[12:0]
@28
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.xdct393_i.dv
@200
-dct_chen
@28
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.start
@420
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.d_out[12:0]
@28
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dv
@c08420
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct_dout_diff_debug[12:0]
@28
(0)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct_dout_diff_debug[12:0]
(1)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct_dout_diff_debug[12:0]
(2)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct_dout_diff_debug[12:0]
(3)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct_dout_diff_debug[12:0]
(4)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct_dout_diff_debug[12:0]
(5)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct_dout_diff_debug[12:0]
(6)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct_dout_diff_debug[12:0]
(7)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct_dout_diff_debug[12:0]
(8)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct_dout_diff_debug[12:0]
(9)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct_dout_diff_debug[12:0]
(10)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct_dout_diff_debug[12:0]
(11)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct_dout_diff_debug[12:0]
(12)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct_dout_diff_debug[12:0]
@1401200
-group_end
@28
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dbg_stage1_pre2_en_out
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_stage1_i.pre2_start_out
@22
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct1_out[15:0]
@c00022
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_stage1_i.phase[7:0]
@28
(0)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_stage1_i.phase[7:0]
(1)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_stage1_i.phase[7:0]
(2)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_stage1_i.phase[7:0]
(3)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_stage1_i.phase[7:0]
(4)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_stage1_i.phase[7:0]
(5)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_stage1_i.phase[7:0]
(6)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_stage1_i.phase[7:0]
(7)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_stage1_i.phase[7:0]
@1401200
-group_end
@22
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_stage1_i.phase_cnt[2:0]
@800200
-transpose
@28
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.pre2_start
@c00022
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wcntr[6:0]
@28
(0)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wcntr[6:0]
(1)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wcntr[6:0]
(2)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wcntr[6:0]
(3)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wcntr[6:0]
(4)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wcntr[6:0]
(5)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wcntr[6:0]
(6)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wcntr[6:0]
@1401200
-group_end
@c00022
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.waddr[6:0]
@28
(0)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.waddr[6:0]
(1)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.waddr[6:0]
(2)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.waddr[6:0]
(3)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.waddr[6:0]
(4)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.waddr[6:0]
(5)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.waddr[6:0]
(6)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.waddr[6:0]
@1401200
-group_end
@28
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.we_r
@420
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.din[15:0]
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.dout_10_32_76_54[31:0]
@28
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.en_out
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.rstop_r[1:0]
@c00022
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wrow_mod[3:0]
@28
(0)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wrow_mod[3:0]
(1)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wrow_mod[3:0]
(2)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wrow_mod[3:0]
(3)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wrow_mod[3:0]
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wcol13
@22
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wrow[2:0]
@1401200
-group_end
@c00022
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.raddr[5:0]
@28
(0)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.raddr[5:0]
(1)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.raddr[5:0]
(2)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.raddr[5:0]
(3)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.raddr[5:0]
(4)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.raddr[5:0]
(5)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.raddr[5:0]
@1401200
-group_end
@800200
-debug_start_stop
@28
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.cmprs_macroblock_buf_iface_i.mb_pre_start4_first
@29
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.cmprs_macroblock_buf_iface_i.buf_ready_w
@8022
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.cmprs_macroblock_buf_iface_i.dct_pipeline_delay_cntr[5:0]
@800022
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.cmprs_macroblock_buf_iface_i.mb_pre_start[8:0]
@28
(0)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.cmprs_macroblock_buf_iface_i.mb_pre_start[8:0]
(1)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.cmprs_macroblock_buf_iface_i.mb_pre_start[8:0]
(2)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.cmprs_macroblock_buf_iface_i.mb_pre_start[8:0]
(3)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.cmprs_macroblock_buf_iface_i.mb_pre_start[8:0]
(4)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.cmprs_macroblock_buf_iface_i.mb_pre_start[8:0]
(5)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.cmprs_macroblock_buf_iface_i.mb_pre_start[8:0]
(6)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.cmprs_macroblock_buf_iface_i.mb_pre_start[8:0]
(7)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.cmprs_macroblock_buf_iface_i.mb_pre_start[8:0]
(8)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.cmprs_macroblock_buf_iface_i.mb_pre_start[8:0]
@1001200
-group_end
@28
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.mb_pre_end
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.mb_release_buf
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.mb_pre_start
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.mb_pre2_first_out
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.mb_pre_first_out
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.yc_pre_first_out
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[1].jp_channel_i.dct_start
@200
-
@1000200 @1000200
-debug_start_stop
-transpose
-chn1
-xdct -xdct
@800200 @800200
-dct_chen -dct_chen
...@@ -2169,9 +2331,6 @@ x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2 ...@@ -2169,9 +2331,6 @@ x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2
(23)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_stage1_i.simd_b4[23:0] (23)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_stage1_i.simd_b4[23:0]
@1401200 @1401200
-group_end -group_end
@420
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_stage1_i.dout1_w[23:0]
@1401200
-ma1 -ma1
@c00200 @c00200
-ma2 -ma2
...@@ -2195,8 +2354,6 @@ x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2 ...@@ -2195,8 +2354,6 @@ x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_stage1_i.dsp_ma_seld_2 x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_stage1_i.dsp_ma_seld_2
@420 @420
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_stage1_i.dsp_ma_p_2[47:0] x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_stage1_i.dsp_ma_p_2[47:0]
[color] 2
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_stage1_i.dout2_w[23:0]
@800200 @800200
-dsp48e1 -dsp48e1
@420 @420
...@@ -2243,6 +2400,25 @@ x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2 ...@@ -2243,6 +2400,25 @@ x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.start x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.start
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.en_in_r x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.en_in_r
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1_en x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1_en
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_reorder_out_i.dv
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_reorder_out_i.pre2_start
@22
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_reorder_out_i.cntr_in[3:0]
@800022
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_reorder_out_i.raddr[3:0]
@28
(0)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_reorder_out_i.raddr[3:0]
(1)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_reorder_out_i.raddr[3:0]
(2)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_reorder_out_i.raddr[3:0]
(3)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_reorder_out_i.raddr[3:0]
@1001200
-group_end
@28
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_reorder_out_i.start_out_r
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_reorder_out_i.en
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_reorder_out_i.en_out
@22
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_reorder_out_i.per_type[2:0]
@c00200 @c00200
-reorder_in -reorder_in
@28 @28
...@@ -2270,15 +2446,9 @@ x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2 ...@@ -2270,15 +2446,9 @@ x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2
-stage1_dbg -stage1_dbg
@28 @28
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.start x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.start
@420
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_stage1_i.dout1_w[23:0]
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_stage1_i.dout2_w[23:0]
@28
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_reorder_out_dbg_i.pre2_start x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_reorder_out_dbg_i.pre2_start
@420
[color] 3
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_reorder_out_dbg_i.din[23:0]
@22 @22
[color] 3
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_reorder_out_dbg_i.cntr_in[3:0] x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_reorder_out_dbg_i.cntr_in[3:0]
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_reorder_out_dbg_i.waddr[3:0] x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1d_chen_reorder_out_dbg_i.waddr[3:0]
@28 @28
...@@ -2289,13 +2459,8 @@ x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2 ...@@ -2289,13 +2459,8 @@ x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dbg_pre_first_out x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dbg_pre_first_out
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dbg_dv x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dbg_dv
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dbg_en_out x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dbg_en_out
@420
[color] 6 [color] 6
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dbg_d_out[23:0]
@28
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.stage1_pre2_start_out x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.stage1_pre2_start_out
@22
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct1_out[23:0]
@200 @200
- -
@1000200 @1000200
...@@ -2316,10 +2481,10 @@ x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2 ...@@ -2316,10 +2481,10 @@ x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.we_r x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.we_r
@22 @22
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.pre_rstart_w x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.pre_rstart_w
@800023 @c00022
[color] 2 [color] 2
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wcntr[6:0] x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wcntr[6:0]
@29 @28
[color] 2 [color] 2
(0)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wcntr[6:0] (0)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wcntr[6:0]
[color] 2 [color] 2
...@@ -2334,7 +2499,7 @@ x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2 ...@@ -2334,7 +2499,7 @@ x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2
(5)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wcntr[6:0] (5)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wcntr[6:0]
[color] 2 [color] 2
(6)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wcntr[6:0] (6)x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wcntr[6:0]
@1001201 @1401200
-group_end -group_end
@22 @22
x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wpage x393_testbench03.x393_i.compressor393_i.cmprs_channel_block[0].jp_channel_i.dct2d8x8_chen_i.dct_chen_transpose_i.wpage
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment