diff --git a/dct_tests_04.sav b/dct_tests_04.sav new file mode 100644 index 0000000000000000000000000000000000000000..51c159e14e2e4ac41b23d33ad09d9e508d027083 --- /dev/null +++ b/dct_tests_04.sav @@ -0,0 +1,231 @@ +[*] +[*] GTKWave Analyzer v3.3.78 (w)1999-2016 BSI +[*] Wed Dec 13 06:00:34 2017 +[*] +[dumpfile] "/home/eyesis/nc393/elphel393/fpga-elphel/x393_branch_dct/simulation/dct_tests_04-20171212192023266.fst" +[dumpfile_mtime] "Wed Dec 13 02:20:23 2017" +[dumpfile_size] 219185 +[savefile] "/home/eyesis/nc393/elphel393/fpga-elphel/x393_branch_dct/dct_tests_04.sav" +[timestart] 1588000 +[size] 1814 1171 +[pos] 0 40 +*-15.492632 1795000 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 +[treeopen] dct_tests_03. +[treeopen] dct_tests_03.dtt_iv_8x8_i. +[treeopen] dct_tests_03.dtt_iv_8x8r_i. +[sst_width] 318 +[signals_width] 284 +[sst_expanded] 1 +[sst_vpaned_height] 344 +@420 +dct_tests_03.i +dct_tests_03.i1 +dct_tests_03.j +@28 +dct_tests_03.CLK +dct_tests_03.RST +[color] 2 +dct_tests_03.start +[color] 2 +dct_tests_03.start2 +@22 +dct_tests_03.mode_in[1:0] +@8420 +dct_tests_03.x_in_2d[24:0] +@22 +dct_tests_03.mode_out[1:0] +@8420 +dct_tests_03.d_out_2dr[24:0] +@800200 +-dtt_iv8x8_direct +@28 +dct_tests_03.dtt_iv_8x8_i.rst +dct_tests_03.dtt_iv_8x8_i.clk +dct_tests_03.dtt_iv_8x8_i.start +@22 +dct_tests_03.dtt_iv_8x8_i.mode[1:0] +@28 +dct_tests_03.dtt_iv_8x8_i.pre_last_in +dct_tests_03.dtt_iv_8x8_i.pre_busy +@c00200 +-debug +@28 +dct_tests_03.dtt_iv_8x8_i.transpose_start +@22 +dct_tests_03.dtt_iv_8x8_i.transpose_debug_di[7:0] +@8022 +dct_tests_03.dtt_iv_8x8_i.transpose_debug_di[7:0] +@c00022 +dct_tests_03.dtt_iv_8x8_i.transpose_wa[7:0] +@28 +(0)dct_tests_03.dtt_iv_8x8_i.transpose_wa[7:0] +(1)dct_tests_03.dtt_iv_8x8_i.transpose_wa[7:0] +(2)dct_tests_03.dtt_iv_8x8_i.transpose_wa[7:0] +(3)dct_tests_03.dtt_iv_8x8_i.transpose_wa[7:0] +(4)dct_tests_03.dtt_iv_8x8_i.transpose_wa[7:0] +(5)dct_tests_03.dtt_iv_8x8_i.transpose_wa[7:0] +(6)dct_tests_03.dtt_iv_8x8_i.transpose_wa[7:0] +(7)dct_tests_03.dtt_iv_8x8_i.transpose_wa[7:0] +@1401200 +-group_end +@c08022 +dct_tests_03.dtt_iv_8x8_i.transpose_wa[7:0] +@28 +(0)dct_tests_03.dtt_iv_8x8_i.transpose_wa[7:0] +(1)dct_tests_03.dtt_iv_8x8_i.transpose_wa[7:0] +(2)dct_tests_03.dtt_iv_8x8_i.transpose_wa[7:0] +(3)dct_tests_03.dtt_iv_8x8_i.transpose_wa[7:0] +(4)dct_tests_03.dtt_iv_8x8_i.transpose_wa[7:0] +(5)dct_tests_03.dtt_iv_8x8_i.transpose_wa[7:0] +(6)dct_tests_03.dtt_iv_8x8_i.transpose_wa[7:0] +(7)dct_tests_03.dtt_iv_8x8_i.transpose_wa[7:0] +@1401200 +-group_end +@28 +(0)dct_tests_03.dtt_iv_8x8_i.transpose_we[1:0] +dct_tests_03.dtt_iv_8x8_i.pre_dsth +@8022 +dct_tests_03.dtt_iv_8x8_i.transpose_cntr[6:0] +@22 +dct_tests_03.dtt_iv_8x8_i.transpose_ra[7:0] +@28 +dct_tests_03.dtt_iv_8x8_i.dctv_start_0_r +dct_tests_03.dtt_iv_8x8_i.dctv_start_1_r +@800028 +dct_tests_03.dtt_iv_8x8_i.pre2_dstv[1:0] +@28 +(0)dct_tests_03.dtt_iv_8x8_i.pre2_dstv[1:0] +(1)dct_tests_03.dtt_iv_8x8_i.pre2_dstv[1:0] +@1001200 +-group_end +@c00028 +[color] 2 +dct_tests_03.dtt_iv_8x8_i.dctv_out_we[1:0] +@28 +(0)dct_tests_03.dtt_iv_8x8_i.dctv_out_we[1:0] +(1)dct_tests_03.dtt_iv_8x8_i.dctv_out_we[1:0] +@1401200 +-group_end +@28 +dct_tests_03.dtt_iv_8x8_i.pre_dstv +@200 +-alt +@28 +dct_tests_03.dtt_iv_8x8_i.dstv +dct_tests_03.dtt_iv_8x8_i.out_sel +dct_tests_03.dtt_iv_8x8_i.out_run +@22 +dct_tests_03.dtt_iv_8x8_i.out_cntr[6:0] +@28 +dct_tests_03.dtt_iv_8x8_i.out_sel +@c00022 +dct_tests_03.dtt_iv_8x8_i.out_wa[3:0] +@28 +(0)dct_tests_03.dtt_iv_8x8_i.out_wa[3:0] +(1)dct_tests_03.dtt_iv_8x8_i.out_wa[3:0] +(2)dct_tests_03.dtt_iv_8x8_i.out_wa[3:0] +(3)dct_tests_03.dtt_iv_8x8_i.out_wa[3:0] +@1401200 +-group_end +@28 +dct_tests_03.dtt_iv_8x8_i.out_we +dct_tests_03.dtt_iv_8x8_i.sub16 +dct_tests_03.dtt_iv_8x8_i.inc16 +dct_tests_03.dtt_iv_8x8_i.start_out +@200 +-top +@22 +dct_tests_03.out_ram_wa[4:0] +@28 +dct_tests_03.out_ram_cntr +dct_tests_03.out_ram_wah +@22 +dct_tests_03.out_wa[3:0] +@28 +dct_tests_03.out_we +dct_tests_03.out_ram_ren +dct_tests_03.out_ram_regen +@22 +dct_tests_03.out_ram_ra[5:0] +@28 +dct_tests_03.out_pre_first +dct_tests_03.out_ram_dv +@1401200 +-debug +@22 +dct_tests_03.dtt_iv_8x8_i.mode_out[1:0] +@28 +dct_tests_03.dtt_iv_8x8_i.pre_busy +@c00200 +-direct_internal +@28 +dct_tests_03.dtt_iv_8x8_i.dcth_en0 +dct_tests_03.dtt_iv_8x8_i.dcth_en1 +dct_tests_03.dtt_iv_8x8_i.dcth_start_0_r +dct_tests_03.dtt_iv_8x8_i.dcth_start_1_r +@22 +dct_tests_03.dtt_iv_8x8_i.mode[1:0] +dct_tests_03.dtt_iv_8x8_i.mode_h[1:0] +dct_tests_03.dtt_iv_8x8_i.mode_h_late[1:0] +dct_tests_03.dtt_iv_8x8_i.mode_v[1:0] +dct_tests_03.dtt_iv_8x8_i.mode_out[1:0] +@28 +dct_tests_03.dtt_iv_8x8_i.dctv_start_0_w +@22 +dct_tests_03.dtt_iv_8x8_i.dctv_start_1_w +@800028 +dct_tests_03.dtt_iv_8x8_i.pre2_dsth[1:0] +@28 +(0)dct_tests_03.dtt_iv_8x8_i.pre2_dsth[1:0] +(1)dct_tests_03.dtt_iv_8x8_i.pre2_dsth[1:0] +@1001200 +-group_end +@200 +- +@800028 +dct_tests_03.dtt_iv_8x8_i.pre2_dstv[1:0] +@28 +(0)dct_tests_03.dtt_iv_8x8_i.pre2_dstv[1:0] +(1)dct_tests_03.dtt_iv_8x8_i.pre2_dstv[1:0] +@800200 +-g3 +@28 +dct_tests_03.dtt_iv_8x8_i.dct_iv8_1d_pass2_0_i.start +dct_tests_03.dtt_iv_8x8_i.dct_iv8_1d_pass2_0_i.dst_in +dct_tests_03.dtt_iv_8x8_i.dct_iv8_1d_pass2_0_i.dst_out +@1000200 +-g3 +@28 +dct_tests_03.dtt_iv_8x8_i.dct_iv8_1d_pass2_1_i.start +dct_tests_03.dtt_iv_8x8_i.dct_iv8_1d_pass2_1_i.dst_in +dct_tests_03.dtt_iv_8x8_i.dct_iv8_1d_pass2_1_i.dst_out +@200 +- +@1001200 +-group_end +@1401200 +-direct_internal +@1000200 +-dtt_iv8x8_direct +@800200 +-dtt_iv8x8_inv +@29 +dct_tests_03.dtt_iv_8x8r_i.clk +@28 +dct_tests_03.dtt_iv_8x8r_i.start +dct_tests_03.dtt_iv_8x8r_i.mode[1:0] +dct_tests_03.dtt_iv_8x8r_i.mode_out[1:0] +@800200 +-inv_internals +@200 +- +@1000200 +-inv_internals +@200 +- +@1000200 +-dtt_iv8x8_inv +@200 +-dbg +[pattern_trace] 1 +[pattern_trace] 0 diff --git a/dsp/dct_tests_04.tf b/dsp/dct_tests_04.tf new file mode 100644 index 0000000000000000000000000000000000000000..5ca21aaab80e7fb35ec7a20a2562a001d28d251a --- /dev/null +++ b/dsp/dct_tests_04.tf @@ -0,0 +1,449 @@ +/*! + * Module:dct_tests_03 + * @file dct_tests_03.tf + * @date 2016-12-02 + * @author Andrey Filippov + * + * @brief 1d 8-point DCT type IV for lapped mdct 16->8, operates in 16 clock cycles + * Uses 2 DSP blocks + * + * @copyright Copyright (c) 2016 Elphel, Inc. + * + * License: + * + *dct_tests_03.tf is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * dct_tests_03.tf is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * Additional permission under GNU GPL version 3 section 7: + * If you modify this Program, or any covered work, by linking or combining it + * with independent modules provided by the FPGA vendor only (this permission + * does not extend to any 3-rd party modules, "soft cores" or macros) under + * different license terms solely for the purpose of generating binary "bitstream" + * files and/or simulating the code, the copyright holders of this Program give + * you the right to distribute the covered work without those independent modules + * as long as the source code for them is available from the FPGA vendor free of + * charge, and there is no dependence on any encrypted modules for simulating of + * the combined code. This permission applies to you if the distributed code + * contains all the components and scripts required to completely simulate it + * with at least one of the Free Software programs. + */ +`timescale 1ns/1ps +// No saturation here, and no rounding as we do not need to match decoder (be bit-precise), skipping rounding adder +// will reduce needed resources +//`define DCT_INPUT_UNITY +module dct_tests_03 (); +// parameter fstname="dct_tests_03.fst"; +`ifdef IVERILOG + `ifdef NON_VDT_ENVIROMENT + parameter fstname="dct_tests_03.fst"; + `else + `include "IVERILOG_INCLUDE.v" + `endif // NON_VDT_ENVIROMENT +`else // IVERILOG + `ifdef CVC + `ifdef NON_VDT_ENVIROMENT + parameter fstname = "x393.fst"; + `else // NON_VDT_ENVIROMENT + `include "IVERILOG_INCLUDE.v" + `endif // NON_VDT_ENVIROMENT + `else + parameter fstname = "dct_tests_03.fst"; + `endif // CVC +`endif // IVERILOG + + parameter CLK_PERIOD = 10; // ns + parameter WIDTH = 25; //4; // input data width + parameter OUT_WIDTH = 25; //4; // output data width + parameter TRANSPOSE_WIDTH = 25; //4; // width of the transpose memory (intermediate results) + parameter OUT_RSHIFT = 2; // overall right shift of the result from input, aligned by MSB (>=3 will never cause saturation) + parameter OUT_RSHIFT2 = 0; // overall right shift for the second (vertical) pass + + parameter DCT_GAP = 16; // between runs + + parameter SAME_BITS=4; // (3) to match 24-bit widths + + reg RST = 1'b1; + reg CLK = 1'b0; + reg [3:0] phase_in; + reg [3:0] phase_out; + reg run_in; + reg run_out; + reg run_out_d; + + reg en_x = 0; +// reg end_x = 0; + reg [2:0] x_ra; + wire [2:0] x_wa = phase_in[2:0]; + + + wire x_we = !phase_in[3] && run_in; + reg [WIDTH-1:0] x_in; + reg [WIDTH-1:0] x_in_2d; + reg [WIDTH-1:0] x_out; + reg [WIDTH-1:0] x_ram[0:7]; + wire [WIDTH-1:0] x_out_w = x_ram[x_ra]; + + reg start = 0; + reg start2 = 0; // second start for 2d + reg [1:0] mode_in= 0; // 3; // [0] - vertical pass 0: dct, 1 - dst, [1] - horizontal pass + wire [1:0] mode_out; // [0] - vertical pass 0: dct, 1 - dst, [1] - horizontal pass + + wire [OUT_WIDTH-1:0] y_dct; + wire pre2_start_out; + wire en_out; + + reg y_pre_we; + reg y_we; + reg [3:0] phase_y=8; + reg [2:0] y_wa; + reg [2:0] y_ra; + reg y_dv=0; + reg signed [OUT_WIDTH-1:0] y_ram[0:7]; + wire signed [OUT_WIDTH-1:0] y_out = y_ram[y_ra]; // SuppressThisWarning VEditor - simulation only + reg signed [WIDTH-1:0] data_in[0:63]; + reg signed [OUT_WIDTH-1:0] data_out[0:63]; + + wire pre_last_in_2d; // SuppressThisWarning VEditor - simulation only + wire pre_first_out_2d; // SuppressThisWarning VEditor - simulation only + wire pre_busy_2d; // SuppressThisWarning VEditor - simulation only + wire dv_2d; // SuppressThisWarning VEditor - simulation only +// wire signed [OUT_WIDTH-1:0] d_out_2d; + + wire pre_last_in_2dr; // SuppressThisWarning VEditor - simulation only + wire pre_first_out_2dr; // SuppressThisWarning VEditor - simulation only + wire pre_busy_2dr; // SuppressThisWarning VEditor - simulation only + wire dv_2dr; // SuppressThisWarning VEditor - simulation only + wire signed [OUT_WIDTH-1:0] d_out_2dr; // SuppressThisWarning VEditor - simulation only + + + integer i,j, i1, ir; + initial begin + for (i=0; i<64; i=i+1) begin + `ifdef DCT_INPUT_UNITY + data_in[i] = (i[2:0] == (i[5:3] ^ 3'h0)) ? {2'b1,{WIDTH-2{1'b0}}} : 0; + ir= (i[2:0] == (i[5:3] ^ 3'h1)) ? {2'b1,{WIDTH-2{1'b0}}} : 0; + data_in[i] = ir; + `else + ir = $random; + data_in[i] = ((i[5:3] == 0) || (i[5:3] == 7) || (i[2:0] == 0) || (i[2:0] == 7))? 0: + {{SAME_BITS{ir[WIDTH -SAME_BITS - 1]}},ir[WIDTH -SAME_BITS-1:0]}; + `endif + end + $display("Input data in line-scan order:"); + for (i=0; i<64; i=i+8) begin + $display ("%d, %d, %d, %d, %d, %d, %d, %d",data_in[i+0],data_in[i+1],data_in[i+2],data_in[i+3], + data_in[i+4],data_in[i+5],data_in[i+6],data_in[i+7]); + end + $display(""); + $display("Input data - transposed:"); + j=0; + for (i=0; i < 8; i=i+1) begin + $display ("%d, %d, %d, %d, %d, %d, %d, %d",data_in[i+ 0],data_in[i+ 8],data_in[i+16],data_in[i+24], + data_in[i+32],data_in[i+40],data_in[i+48],data_in[i+56]); + end + $display(""); + + end + + always #(CLK_PERIOD/2) CLK = ~CLK; + initial begin + $dumpfile(fstname); + $dumpvars(0,dct_tests_03); // SuppressThisWarning VEditor + #100; + RST = 0; + #100; + repeat (10) @(posedge CLK); +#1 en_x = 1; + for (i = 0; i < 64; i = i+1) begin + @(posedge CLK); + #1; + x_in = data_in[i]; // >>x_wa; + if (i==63) begin + en_x = 0; + end + if (&i[2:0]) repeat (8) @(posedge CLK); + end + #1 x_in = 0; + repeat (64) @(posedge CLK); + + $display(""); + $display("output data - transposed:"); + for (i=0; i<64; i=i+8) begin + $display ("%d, %d, %d, %d, %d, %d, %d, %d",data_out[i+0],data_out[i+1],data_out[i+2],data_out[i+3], + data_out[i+4],data_out[i+5],data_out[i+6],data_out[i+7]); + end + +// repeat (64) @(posedge CLK); +// $finish; + end + + initial begin + wait (!RST); + while (!start) begin + @(posedge CLK); + #1; + end + for (i1 = 0; i1 < 192; i1 = i1+1) begin + @(posedge CLK); + #1; + x_in_2d = data_in[i1 & 63]; + if ((i1 & 63) == 0) mode_in = mode_in+1; + start2 = (i1 & 63) == 63; + end + for (i1 = 0; i1 < 64; i1 = i1+1) begin + @(posedge CLK); + #1; + start2 = 0; + x_in_2d = data_in[i1]; + end + + repeat (DCT_GAP) @(posedge CLK); + #1; + start2 = 1; + for (i1 = 0; i1 < 64; i1 = i1+1) begin + @(posedge CLK); + #1; + start2 = 0; + x_in_2d = data_in[63-i1]; + end + + repeat (300) @(posedge CLK); + $finish; + + end + + + initial j = 0; + always @ (posedge CLK) begin + if (y_dv) begin +//$display (" y[0x%x] => 0x%x %d, j=%d @%t",y_ra,y_out,y_out,j,$time); + data_out[{j[2:0],j[5:3]}] = y_out; // transpose array + #1 j = j+1; + end + end + + + + always @ (posedge CLK) begin + if (RST) run_in <= 0; + else if (en_x) run_in <= 1; + else if (phase_in == 15) run_in <= 0; + + if (RST) run_out <= 0; + else if ((phase_in == 5) || (phase_out==15)) run_out <= run_in; + + if (!run_in) phase_in <= 0; + else phase_in <= phase_in + 1; + + if (!run_out) phase_out <= 0; + else phase_out <= phase_out + 1; + + run_out_d <= run_out; + + if (RST) start <= 0; + else start <= run_out & !run_out_d; + + {y_we,y_pre_we} <= {y_pre_we, en_out}; + + if (RST) phase_y <= 8; + else if (pre2_start_out) phase_y <= 0; + else if (y_pre_we) phase_y <= phase_y + 1; + + if (RST) y_dv <= 0; + else if ((phase_y == 6) && y_we) y_dv <= 1; + else if (y_ra == 7) y_dv <= 0; + + if (!y_dv) y_ra <= 0; + else y_ra <= y_ra + 1; + + if (y_we) y_ram[y_wa] <= y_dct; + + + if (x_we) x_ram[x_wa] <= x_in; + + x_out <= x_out_w; +//X2-X7-X3-X4-X5-X6-X0-X1-*-X3-X5-X4-*-X1-X7-* + case (phase_out) + 4'h0: x_ra <= 2; + 4'h1: x_ra <= 7; + 4'h2: x_ra <= 3; + 4'h3: x_ra <= 4; + 4'h4: x_ra <= 5; + 4'h5: x_ra <= 6; + 4'h6: x_ra <= 0; + 4'h7: x_ra <= 1; + 4'h8: x_ra <= 'bx; + 4'h9: x_ra <= 3; + 4'ha: x_ra <= 5; + 4'hb: x_ra <= 4; + 4'hc: x_ra <= 'bx; + 4'hd: x_ra <= 6; + 4'he: x_ra <= 7; + 4'hf: x_ra <= 'bx; + endcase + + case (phase_y[2:0]) + 3'h0: y_wa <= 0; + 3'h1: y_wa <= 7; + 3'h2: y_wa <= 4; + 3'h3: y_wa <= 3; + 3'h4: y_wa <= 1; + 3'h5: y_wa <= 6; + 3'h6: y_wa <= 2; + 3'h7: y_wa <= 5; + endcase + + end + + dtt_iv8_1d #( + .WIDTH (WIDTH), + .OUT_WIDTH (OUT_WIDTH), + .OUT_RSHIFT (OUT_RSHIFT), + .B_WIDTH (18), + .A_WIDTH (25), + .P_WIDTH (48), + .COSINE_SHIFT (17), + .COS_01_32 (130441), + .COS_03_32 (125428), + .COS_04_32 (121095), + .COS_05_32 (115595), + .COS_07_32 (101320), + .COS_08_32 (92682), + .COS_09_32 (83151), + .COS_11_32 (61787), + .COS_12_32 (50159), + .COS_13_32 (38048), + .COS_15_32 (12847) + ) dtt_iv8_1d_i ( + .clk (CLK), // input + .rst (RST), // input + .en (run_in), // input + .dst_in (mode_in[1]), // input + .d_in (x_out), // input[23:0] + .start (start), // input + .dout (y_dct), // output[15:0] + .pre2_start_out (pre2_start_out), // output reg + .en_out (en_out), // output reg + .dst_out (), // output + .y_index () // output[2:0] reg + ); + parameter ODEPTH = 5; + reg signed [OUT_WIDTH-1:0] out_ram[0: ((1<