Commit 30ad4d73 authored by Andrey Filippov's avatar Andrey Filippov

debugging simulation

parent bef1c131
......@@ -314,15 +314,15 @@ module dtt_iv_8x8_ad#(
if (rst) dctv_en0 <= 0;
else if (dctv_start_0_w) dctv_en0 <= 1;
else if (!transpose_out_run[2]) dctv_en0 <= 0; // maybe get rid of this signal and send satrt for each 8?
else if (!transpose_out_run[2]) dctv_en0 <= 0; // maybe get rid of this signal and send start for each 8?
if (rst) dctv_en1 <= 0;
else if (dctv_start_1_w) dctv_en1 <= 1;
else if (dctv_phin[6]) dctv_en1 <= 0; // maybe get rid of this signal and send satrt for each 8?
else if (dctv_phin[6]) dctv_en1 <= 0; // maybe get rid of this signal and send start for each 8?
pre_dstv <= dctv_en_out0 ? pre2_dstv[0] : pre2_dstv[1];
if (t_we0 || t_we1) $display("%d %d",transpose_rcntr-2, transpose_out) ;
// if (t_we0 || t_we1) $display("%d %d",transpose_rcntr-2, transpose_out) ;
//write vertical dct input reorder memory
if (t_we0) t_ram0[t_wa[2:0]] <= transpose_out;
......
......@@ -64,11 +64,12 @@ module mclt16x16#(
input [3:0] bayer, // bayer mask (0 bits - skip pixel, valid @ start)
// TODO may modify/remove delay dly_pixel_data_i and use this 3-cycle delay for address calculation. If use 18x18 (0r 20x20) full tile
// instead of 16x16 use different shifts in the case of lateral chromatic aberration
output mpixel_re, //!< pixel read enable (sync with mpixel_a)
output mpixel_page, //!< increment pixel page after this
output [7:0] mpixel_a, //!< pixel address {y,x} of the input tile
input [PIXEL_WIDTH-1:0] mpixel_d, //!< pixel data, latency = 2 from pixel address
output pre_busy, //!< start should come each 256-th cycle (next after pre_last_in), and not after pre_busy)
output reg pre_last_in, //!< may increment page
output pre_last_in, //!< may increment page
output pre_first_out,//!< next will output first of DCT/DCT coefficients
output pre_last_out, //!< next will be last output of DST/DST coefficients
output [7:0] out_addr, //!< address to save coefficients, 2 MSBs - mode (CC,SC,CS,SS), others - down first
......@@ -137,9 +138,13 @@ module mclt16x16#(
wire [DTT_IN_WIDTH-1:0] dtt_r_data = dtt_r_data_w[DTT_IN_WIDTH-1:0];
reg pre_last_out_r;
reg pre_last_in_r;
wire mpixel_prepage; // before common delay
assign pre_last_out = pre_last_out_r;
assign pre_busy = pre_busy_r;
assign pre_busy = pre_busy_r || start || (!pre_last_in_r && in_busy[0]);
assign pre_last_in = pre_last_in_r;
// assign mpixel_re = in_busy[6];
always @ (posedge clk) begin
if (start) begin
......@@ -157,10 +162,9 @@ module mclt16x16#(
if (rst) in_busy <= 0;
else in_busy <= {in_busy[15:0], start | (in_busy[0] & ~(&in_cntr))};
if (start) in_cntr <= 0;
else if (in_busy[0]) in_cntr[7:0] <= in_cntr[7:0] + 1;
if (!in_busy[0]) in_cntr <= 0;
else in_cntr[7:0] <= in_cntr[7:0] + 1;
pre_last_in <= in_cntr[7:0] == 8'hfe;
if (in_busy[8]) begin
mpixel_d_r <= mpixel_d;
......@@ -212,7 +216,8 @@ module mclt16x16#(
if (!dtt_r_re) dtt_r_cntr <= 0;
else dtt_r_cntr <= dtt_r_cntr + 1;
dtt_start <= dtt_r_cntr[5:0] == 0;
/// dtt_start <= dtt_r_cntr[5:0] == 0;
dtt_start <= (dtt_r_cntr[5:0] == 0) && dtt_r_re;
......@@ -275,16 +280,27 @@ D11 - negate for mode 3 (SS)
.wnd_out (window_w) // output[17:0] valid with in_busy[8]
);
dly_var #(
.WIDTH(1),
.DLY_WIDTH(4)
) dly_prepage_i (
.clk (clk), // input
.rst (rst), // input
.dly (4'h3), // input[3:0] Delay for external memory latency = 2, reduce for higher
.din (pre_last_in_r), // input[0:0]
.dout (mpixel_prepage) // output[0:0]
);
// Matching window latency with pixel data latency
dly_var #(
.WIDTH(8),
.WIDTH(10),
.DLY_WIDTH(4)
) dly_pixel_data_i (
.clk (clk), // input
.rst (rst), // input
.dly (4'h2), // input[3:0] Delay for external memory latency = 2, reduce for higher
.din (mpix_a_w), // input[0:0]
.dout (mpixel_a) // output[0:0]
.din ({mpixel_prepage, in_busy[3], mpix_a_w}), // input[0:0]
.dout ({mpixel_page, mpixel_re, mpixel_a}) // output[0:0]
);
dly_var #(
......@@ -405,7 +421,7 @@ D11 - negate for mode 3 (SS)
// data to be input to phase rotator
wire signed [OUT_WIDTH-1:0] dtt_rd_data = dtt_rd_data_w[OUT_WIDTH-1:0]; // valid with dtt_rd_regen_dv[2]
wire dtt_first_quad_out = ~dtt_out_ram_cntr[3] & ~dtt_out_ram_cntr[2];
// reg [9:0] dtt_out_ram_cntr;
always @(posedge clk) begin
......@@ -413,9 +429,9 @@ D11 - negate for mode 3 (SS)
else if (dtt_inc16) dtt_out_ram_cntr <= dtt_out_ram_cntr + 1;
dtt_out_ram_wah <= dtt_out_ram_cntr - dtt_sub16;
if (rst) dtt_dly_cntr <= 0;
else if (dtt_start_fill) dtt_dly_cntr <= DTT_OUT_DELAY;
else if (|dtt_dly_cntr) dtt_dly_cntr <= dtt_dly_cntr - 1;
if (rst) dtt_dly_cntr <= 0;
else if (dtt_start_fill & dtt_first_quad_out) dtt_dly_cntr <= DTT_OUT_DELAY;
else if (|dtt_dly_cntr) dtt_dly_cntr <= dtt_dly_cntr - 1;
dtt_start_out <= dtt_dly_cntr == 1;
......@@ -510,9 +526,12 @@ D11 - negate for mode 3 (SS)
pre_last_out_r <= out_addr_r == 8'hfe;
if (rst) pre_busy_r <= 0;
else if (pre_last_out_r) pre_busy_r <= 1;
else if (pre_last_in_r) pre_busy_r <= 1;
else if (dead_cntr == 0) pre_busy_r <= 0;
pre_last_in_r <= in_cntr[7:0] == 8'hfd;
if (~pre_busy_r) dead_cntr <= DEAD_CYCLES;
else dead_cntr <= dead_cntr - 1;
......
......@@ -38,6 +38,8 @@
* with at least one of the Free Software programs.
*/
`timescale 1ns/1ps
`define INSTANTIATE_DSP48E1
`define PRELOAD_BRAMS
module mclt_test_01 ();
`ifdef IVERILOG
`ifdef NON_VDT_ENVIROMENT
......@@ -58,8 +60,7 @@ module mclt_test_01 ();
`endif // IVERILOG
parameter CLK_PERIOD = 10; // ns
parameter WIDTH = 25; //4; // input data width
// parameter WIDTH = 25; //4; // input data width
parameter SHIFT_WIDTH = 7; // bits in shift (7 bits - fractional)
parameter COORD_WIDTH = 10; // bits in full coordinate 10 for 18K RAM
parameter PIXEL_WIDTH = 16; // input pixel width (unsigned)
......@@ -74,169 +75,145 @@ module mclt_test_01 ();
parameter DSP_P_WIDTH = 48;
parameter DEAD_CYCLES = 14; // start next block immedaitely, or with longer pause
parameter DCT_GAP = 16; // between runs
//parameter DCT_GAP = 16; // between runs
parameter SAME_BITS=4; // (3) to match 24-bit widths
//parameter SAME_BITS=4; // (3) to match 24-bit widths
reg RST = 1'b1;
reg CLK = 1'b0;
reg [3:0] phase_in;
reg [3:0] phase_out;
reg run_in;
reg run_out;
reg run_out_d;
reg en_x = 0;
// reg end_x = 0;
reg [2:0] x_ra;
wire [2:0] x_wa = phase_in[2:0];
wire x_we = !phase_in[3] && run_in;
reg [WIDTH-1:0] x_in;
reg [WIDTH-1:0] x_in_2d;
reg [WIDTH-1:0] x_out;
reg [WIDTH-1:0] x_ram[0:7];
wire [WIDTH-1:0] x_out_w = x_ram[x_ra];
reg start = 0;
reg start2 = 0; // second start for 2d
reg [1:0] mode_in= 0; // 3; // [0] - vertical pass 0: dct, 1 - dst, [1] - horizontal pass
wire [1:0] mode_out; // [0] - vertical pass 0: dct, 1 - dst, [1] - horizontal pass
wire [OUT_WIDTH-1:0] y_dct;
wire pre2_start_out;
wire en_out;
reg y_pre_we;
reg y_we;
reg [3:0] phase_y=8;
reg [2:0] y_wa;
reg [2:0] y_ra;
reg y_dv=0;
reg signed [OUT_WIDTH-1:0] y_ram[0:7];
wire signed [OUT_WIDTH-1:0] y_out = y_ram[y_ra]; // SuppressThisWarning VEditor - simulation only
reg signed [WIDTH-1:0] data_in[0:63];
reg signed [OUT_WIDTH-1:0] data_out[0:63];
wire pre_last_in_2d; // SuppressThisWarning VEditor - simulation only
wire pre_first_out_2d; // SuppressThisWarning VEditor - simulation only
wire pre_busy_2d; // SuppressThisWarning VEditor - simulation only
wire dv_2d; // SuppressThisWarning VEditor - simulation only
// wire signed [OUT_WIDTH-1:0] d_out_2d;
wire pre_last_in_2dr; // SuppressThisWarning VEditor - simulation only
wire pre_first_out_2dr; // SuppressThisWarning VEditor - simulation only
wire pre_busy_2dr; // SuppressThisWarning VEditor - simulation only
wire dv_2dr; // SuppressThisWarning VEditor - simulation only
wire signed [OUT_WIDTH-1:0] d_out_2dr; // SuppressThisWarning VEditor - simulation only
integer i,j, i1, ir;
reg [PIXEL_WIDTH-1 : 0] tile_shift[0:258]; // SuppressThisWarning VEditor : assigned in $readmem() system task
reg [PIXEL_WIDTH-1 : 0] tiles[0:1023];
reg [SHIFT_WIDTH-1 : 0] shifts_x[0:3];
reg [SHIFT_WIDTH-1 : 0] shifts_y[0:3];
reg [3 : 0] bayer[0:3];
integer i, n, n_out;
initial begin
for (i=0; i<64; i=i+1) begin
`ifdef DCT_INPUT_UNITY
data_in[i] = (i[2:0] == (i[5:3] ^ 3'h0)) ? {2'b1,{WIDTH-2{1'b0}}} : 0;
ir= (i[2:0] == (i[5:3] ^ 3'h1)) ? {2'b1,{WIDTH-2{1'b0}}} : 0;
data_in[i] = ir;
`else
ir = $random;
data_in[i] = ((i[5:3] == 0) || (i[5:3] == 7) || (i[2:0] == 0) || (i[2:0] == 7))? 0:
{{SAME_BITS{ir[WIDTH -SAME_BITS - 1]}},ir[WIDTH -SAME_BITS-1:0]};
`endif
$readmemh("input_data/tile_01.dat",tile_shift);
shifts_x[0] = tile_shift[0][SHIFT_WIDTH-1:0];
shifts_y[0] = tile_shift[1][SHIFT_WIDTH-1:0];
bayer[0] = tile_shift[2][3:0];
for (i=0; i<256; i=i+1) begin
tiles['h000 + i] = tile_shift[i+3];
end
$readmemh("input_data/tile_02.dat",tile_shift);
shifts_x[1] = tile_shift[0][SHIFT_WIDTH-1:0];
shifts_y[1] = tile_shift[1][SHIFT_WIDTH-1:0];
bayer[1] = tile_shift[2][3:0];
for (i=0; i<256; i=i+1) begin
tiles['h100 + i] = tile_shift[i+3];
end
$readmemh("input_data/tile_03.dat",tile_shift);
shifts_x[2] = tile_shift[0][SHIFT_WIDTH-1:0];
shifts_y[2] = tile_shift[1][SHIFT_WIDTH-1:0];
bayer[2] = tile_shift[2][3:0];
for (i=0; i<256; i=i+1) begin
tiles['h200 + i] = tile_shift[i+3];
end
$display("Input data in line-scan order:");
for (i=0; i<64; i=i+8) begin
$display ("%d, %d, %d, %d, %d, %d, %d, %d",data_in[i+0],data_in[i+1],data_in[i+2],data_in[i+3],
data_in[i+4],data_in[i+5],data_in[i+6],data_in[i+7]);
$readmemh("input_data/tile_04.dat",tile_shift);
shifts_x[3] = tile_shift[0][SHIFT_WIDTH-1:0];
shifts_y[3] = tile_shift[1][SHIFT_WIDTH-1:0];
bayer[3] = tile_shift[2][3:0];
for (i=0; i<256; i=i+1) begin
tiles['h300 + i] = tile_shift[i+3];
end
$display("");
$display("Input data - transposed:");
j=0;
for (i=0; i < 8; i=i+1) begin
$display ("%d, %d, %d, %d, %d, %d, %d, %d",data_in[i+ 0],data_in[i+ 8],data_in[i+16],data_in[i+24],
data_in[i+32],data_in[i+40],data_in[i+48],data_in[i+56]);
for (n=0;n<4;n=n+1) begin
$display("Tile %d: shift x = %h, shift_y = %h, bayer = %h", 0, shifts_x[n], shifts_y[n], bayer[n]);
for (i = 256 * n; i < 256 * (n + 1); i = i + 16) begin
$display ("%h, %h, %h, %h, %h, %h, %h, %h, %h, %h, %h, %h, %h, %h, %h, %h",
tiles[i+ 0],tiles[i+ 1],tiles[i+ 2],tiles[i+ 3],
tiles[i+ 4],tiles[i+ 5],tiles[i+ 6],tiles[i+ 7],
tiles[i+ 8],tiles[i+ 9],tiles[i+10],tiles[i+11],
tiles[i+12],tiles[i+13],tiles[i+14],tiles[i+15]);
end
$display("");
end
$display("");
end
end
reg start;
reg [SHIFT_WIDTH-1:0] x_shft;
reg [SHIFT_WIDTH-1:0] y_shft;
reg [3:0] bayer_r;
reg [1:0] page_in;
wire pre_busy_w;
wire pre_busy;
reg LATE = 0;
wire mpixel_re;
wire mpixel_page;
reg mpixel_reg;
reg mpixel_valid;
wire [7:0] mpixel_a;
reg [PIXEL_WIDTH-1 : 0] pixel_r;
reg [PIXEL_WIDTH-1 : 0] pixel_r2;
wire [PIXEL_WIDTH-1 : 0] mpixel_d = mpixel_valid ? pixel_r2 : {PIXEL_WIDTH{1'bz}};
wire pre_last_in; // SuppressThisWarning VEditor - output only
wire pre_first_out; // SuppressThisWarning VEditor - output only
wire pre_last_out; // SuppressThisWarning VEditor - output only
wire [7:0] out_addr; // SuppressThisWarning VEditor - output only
wire dv; // SuppressThisWarning VEditor - output only
wire [OUT_WIDTH-1:0] dout; // SuppressThisWarning VEditor - output only
assign #(1) pre_busy = pre_busy_w;
always #(CLK_PERIOD/2) CLK = ~CLK;
initial begin
$dumpfile(fstname);
$dumpvars(0,mclt_test_01); // SuppressThisWarning VEditor
#100;
start = 0;
page_in = 0;
LATE = 0;
RST = 0;
#100;
repeat (10) @(posedge CLK);
#1 en_x = 1;
for (i = 0; i < 64; i = i+1) begin
@(posedge CLK);
#1;
x_in = data_in[i]; // >>x_wa;
if (i==63) begin
en_x = 0;
// #1;
for (n = 0; n < 4; n = n+1) begin
if (n>2) LATE = 1;
while (pre_busy || LATE) begin
if (!pre_busy) LATE = 0;
@(posedge CLK);
#1;
end
if (&i[2:0]) repeat (8) @(posedge CLK);
end
#1 x_in = 0;
repeat (64) @(posedge CLK);
$display("");
$display("output data - transposed:");
for (i=0; i<64; i=i+8) begin
$display ("%d, %d, %d, %d, %d, %d, %d, %d",data_out[i+0],data_out[i+1],data_out[i+2],data_out[i+3],
data_out[i+4],data_out[i+5],data_out[i+6],data_out[i+7]);
end
// repeat (64) @(posedge CLK);
// $finish;
end
initial begin
wait (!RST);
while (!start) begin
@(posedge CLK);
#1;
end
for (i1 = 0; i1 < 192; i1 = i1+1) begin
@(posedge CLK);
#1;
x_in_2d = data_in[i1 & 63];
if ((i1 & 63) == 0) mode_in = mode_in+1;
start2 = (i1 & 63) == 63;
end
for (i1 = 0; i1 < 64; i1 = i1+1) begin
start = 1;
x_shft = shifts_x[n];
y_shft = shifts_y[n];
bayer_r = bayer[n];
@(posedge CLK);
#1;
start2 = 0;
x_in_2d = data_in[i1];
end
repeat (DCT_GAP) @(posedge CLK);
#1;
start2 = 1;
for (i1 = 0; i1 < 64; i1 = i1+1) begin
start = 0;
x_shft = 'bz;
y_shft = 'bz;
bayer_r = 'bz;
@(posedge CLK);
#1;
start2 = 0;
x_in_2d = data_in[63-i1];
end
repeat (300) @(posedge CLK);
$finish;
// #1;
end
// emergency finish
repeat (1024) @(posedge CLK);
$finish;
//pre_last_out
end
always @ (posedge CLK) if (!RST) begin
mpixel_reg <= mpixel_re;
mpixel_valid <= mpixel_reg;
if (mpixel_re) pixel_r <= tiles[{page_in,mpixel_a}];
if (mpixel_reg) pixel_r2 <= pixel_r;
if (mpixel_page) page_in <= page_in + 1;
if (pre_last_out) n_out <= n_out + 1;
initial j = 0;
always @ (posedge CLK) begin
if (y_dv) begin
//$display (" y[0x%x] => 0x%x %d, j=%d @%t",y_ra,y_out,y_out,j,$time);
data_out[{j[2:0],j[5:3]}] = y_out; // transpose array
#1 j = j+1;
end
end
initial begin
n_out = 0;
while (n_out < 4) @(posedge CLK);
repeat (32) @(posedge CLK);
$finish;
end
mclt16x16 #(
.SHIFT_WIDTH (SHIFT_WIDTH),
......@@ -253,23 +230,24 @@ module mclt_test_01 ();
.DSP_P_WIDTH (DSP_P_WIDTH),
.DEAD_CYCLES (DEAD_CYCLES)
) mclt16x16_i (
.clk (CLK), // input
.rst (RST), // input
.start (), // input
.x_shft (), // input[6:0]
.y_shft (), // input[6:0]
.bayer (), // input[3:0]
.mpixel_a (), // output[7:0]
.mpixel_d (), // input[15:0]
.pre_busy (), // output
.pre_last_in (), // output reg
.pre_first_out (), // output
.pre_last_out (), // output
.out_addr (), // output[7:0]
.dv (), // output
.dout () // output[24:0] signed
.clk (CLK), // input
.rst (RST), // input
.start (start), // input
.x_shft (x_shft), // input[6:0]
.y_shft (y_shft), // input[6:0]
.bayer (bayer_r), // input[3:0]
.mpixel_re (mpixel_re), // output
.mpixel_page (mpixel_page), // output //!< increment pixel page after this
.mpixel_a (mpixel_a), // output[7:0]
.mpixel_d (mpixel_d), // input[15:0]
.pre_busy (pre_busy_w), // output
.pre_last_in (pre_last_in), // output reg
.pre_first_out (pre_first_out), // output
.pre_last_out (pre_last_out), // output
.out_addr (out_addr), // output[7:0]
.dv (dv), // output
.dout (dout) // output[24:0] signed
);
endmodule
......@@ -76,7 +76,7 @@ module mclt_wnd_mul#(
.coord (x_in), // input[3:0]
.shift (x_shft), // input[2:0] signed
.coord_out (x_full), // output[4:0] reg
.zero (x_zero) // output reg
.zero (x_zero) // output reg
);
mclt_full_shift #(
......
......@@ -153,7 +153,7 @@ module phase_rotator#(
dly_var #(
.WIDTH(1),
.DLY_WIDTH(4)
) dly_cntrv_i (
) dly_sign_cs_d_i (
.clk (clk), // input
.rst (rst), // input
.dly (4'h2), // input[3:0]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment