Commit 91b09d25 authored by Andrey Filippov's avatar Andrey Filippov

Trying alternative DTT output for external buffer

parent d8a31b5f
[*]
[*] GTKWave Analyzer v3.3.78 (w)1999-2016 BSI
[*] Wed Dec 6 02:19:47 2017
[*] Sat Dec 9 01:30:06 2017
[*]
[dumpfile] "/home/eyesis/nc393/elphel393/fpga-elphel/x393_branch_dct/simulation/dct_tests_02-20171205191817720.fst"
[dumpfile_mtime] "Wed Dec 6 02:18:18 2017"
[dumpfile_size] 244333
[dumpfile] "/home/eyesis/nc393/elphel393/fpga-elphel/x393_branch_dct/simulation/dct_tests_02-20171206113130304.fst"
[dumpfile_mtime] "Wed Dec 6 18:31:30 2017"
[dumpfile_size] 222572
[savefile] "/home/eyesis/nc393/elphel393/fpga-elphel/x393_branch_dct/dct_tests_02.sav"
[timestart] 0
[timestart] 1319200
[size] 1814 1171
[pos] 0 40
*-20.492632 1605000 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
*-15.492632 1451000 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
[treeopen] dct_tests_02.
[treeopen] dct_tests_02.dtt_iv_8x8_i.
[treeopen] dct_tests_02.dtt_iv_8x8r_i.
......@@ -136,8 +136,6 @@ dct_tests_02.dtt_iv_8x8_i.pre2_dstv[1:0]
(1)dct_tests_02.dtt_iv_8x8_i.pre2_dstv[1:0]
@1001200
-group_end
@28
(0)dct_tests_02.dtt_iv_8x8_i.dctv_out_we_1[1:0]
@420
[color] 3
dct_tests_02.dtt_iv_8x8_i.dctv_dout0[23:0]
......@@ -151,28 +149,41 @@ dct_tests_02.dtt_iv_8x8_i.debug_dctv_dout[23:0]
@8420
[color] 5
dct_tests_02.dtt_iv_8x8_i.debug_dctv_dout[23:0]
@22
@c00022
[color] 6
dct_tests_02.dtt_iv_8x8_i.dctv_out_cntr[6:0]
dct_tests_02.dtt_iv_8x8_i.dctv_out_wa_1[4:0]
@8022
dct_tests_02.dtt_iv_8x8_i.dctv_out_wa_1[4:0]
@28
[color] 6
(0)dct_tests_02.dtt_iv_8x8_i.dctv_out_cntr[6:0]
[color] 6
(1)dct_tests_02.dtt_iv_8x8_i.dctv_out_cntr[6:0]
[color] 6
(2)dct_tests_02.dtt_iv_8x8_i.dctv_out_cntr[6:0]
[color] 6
(3)dct_tests_02.dtt_iv_8x8_i.dctv_out_cntr[6:0]
[color] 6
(4)dct_tests_02.dtt_iv_8x8_i.dctv_out_cntr[6:0]
[color] 6
(5)dct_tests_02.dtt_iv_8x8_i.dctv_out_cntr[6:0]
[color] 6
(6)dct_tests_02.dtt_iv_8x8_i.dctv_out_cntr[6:0]
@1401200
-group_end
@28
[color] 7
dct_tests_02.dtt_iv_8x8_i.dctv_out_sel
[color] 2
(0)dct_tests_02.dtt_iv_8x8_i.dctv_out_we_1[1:0]
dct_tests_02.dtt_iv_8x8_i.dctv_out_run
dct_tests_02.dtt_iv_8x8_i.dctv_out_start_1
dct_tests_02.dtt_iv_8x8_i.dctv_out_run_1
@22
[color] 3
dct_tests_02.dtt_iv_8x8_i.dctv_out_ra_1[6:0]
@420
[color] 2
dct_tests_02.dtt_iv_8x8_i.dctv_out_reg_1[23:0]
@8420
dct_tests_02.dtt_iv_8x8_i.dctv_out_reg_1[23:0]
dct_tests_02.dtt_iv_8x8_i.dctv_out_wa[4:0]
@c00029
dct_tests_02.dtt_iv_8x8_i.dctv_out_we[1:0]
@28
(0)dct_tests_02.dtt_iv_8x8_i.dctv_out_we[1:0]
(1)dct_tests_02.dtt_iv_8x8_i.dctv_out_we[1:0]
@1401201
-group_end
@1000200
-debug
@22
......@@ -219,13 +230,6 @@ dct_tests_02.dtt_iv_8x8_i.pre2_dstv[1:0]
(1)dct_tests_02.dtt_iv_8x8_i.pre2_dstv[1:0]
@22
dct_tests_02.dtt_iv_8x8_i.dctv_out_cntr[6:0]
@28
(0)dct_tests_02.dtt_iv_8x8_i.dctv_out_we_1[1:0]
dct_tests_02.dtt_iv_8x8_i.dctv_out_run_1
@22
dct_tests_02.dtt_iv_8x8_i.dctv_out_ra_1[6:0]
@8420
dct_tests_02.dtt_iv_8x8_i.dctv_out_reg_1[23:0]
@800200
-g3
@28
......@@ -265,7 +269,6 @@ dct_tests_02.dtt_iv_8x8r_i.transpose_start
@8420
dct_tests_02.dtt_iv_8x8r_i.dcth_dout0[23:0]
dct_tests_02.dtt_iv_8x8r_i.dcth_dout1[23:0]
@8421
dct_tests_02.dtt_iv_8x8r_i.transpose_out[23:0]
@200
-
......
[*]
[*] GTKWave Analyzer v3.3.78 (w)1999-2016 BSI
[*] Sat Dec 9 05:38:48 2017
[*]
[dumpfile] "/home/eyesis/nc393/elphel393/fpga-elphel/x393_branch_dct/simulation/dct_tests_03-20171208223621417.fst"
[dumpfile_mtime] "Sat Dec 9 05:36:24 2017"
[dumpfile_size] 225832
[savefile] "/home/eyesis/nc393/elphel393/fpga-elphel/x393_branch_dct/dct_tests_03.sav"
[timestart] 0
[size] 1920 1171
[pos] -1920 40
*-20.492632 1460700 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
[treeopen] dct_tests_03.
[treeopen] dct_tests_03.dtt_iv_8x8_i.
[treeopen] dct_tests_03.dtt_iv_8x8r_i.
[sst_width] 299
[signals_width] 287
[sst_expanded] 1
[sst_vpaned_height] 344
@420
dct_tests_03.i
dct_tests_03.i1
dct_tests_03.j
@28
dct_tests_03.CLK
dct_tests_03.RST
[color] 2
dct_tests_03.start
[color] 2
dct_tests_03.start2
@22
dct_tests_03.mode_in[1:0]
@420
dct_tests_03.x_in_2d[23:0]
@8420
dct_tests_03.x_in_2d[23:0]
@420
dct_tests_03.d_out_2d[23:0]
@8420
dct_tests_03.d_out_2d[23:0]
@22
dct_tests_03.mode_out[1:0]
@420
dct_tests_03.d_out_2dr[23:0]
@8420
dct_tests_03.d_out_2dr[23:0]
@800200
-dtt_iv8x8_direct
@28
dct_tests_03.dtt_iv_8x8_i.rst
dct_tests_03.dtt_iv_8x8_i.clk
dct_tests_03.dtt_iv_8x8_i.start
@22
dct_tests_03.dtt_iv_8x8_i.mode[1:0]
@420
dct_tests_03.dtt_iv_8x8_i.xin[23:0]
@8420
dct_tests_03.dtt_iv_8x8_i.xin[23:0]
@28
dct_tests_03.dtt_iv_8x8_i.pre_last_in
dct_tests_03.dtt_iv_8x8_i.pre_first_out
dct_tests_03.dtt_iv_8x8_i.dv
@420
dct_tests_03.dtt_iv_8x8_i.d_out[23:0]
@8420
dct_tests_03.dtt_iv_8x8_i.d_out[23:0]
@800200
-debug
@28
dct_tests_03.dtt_iv_8x8_i.transpose_start
@420
dct_tests_03.dtt_iv_8x8_i.dcth_dout0[23:0]
@8420
dct_tests_03.dtt_iv_8x8_i.dcth_dout0[23:0]
@420
dct_tests_03.dtt_iv_8x8_i.dcth_dout1[23:0]
@8420
dct_tests_03.dtt_iv_8x8_i.dcth_dout1[23:0]
@22
dct_tests_03.dtt_iv_8x8_i.transpose_debug_di[7:0]
@8022
dct_tests_03.dtt_iv_8x8_i.transpose_debug_di[7:0]
@420
dct_tests_03.dtt_iv_8x8_i.transpose_di[23:0]
@8420
dct_tests_03.dtt_iv_8x8_i.transpose_di[23:0]
@c00022
dct_tests_03.dtt_iv_8x8_i.transpose_wa[7:0]
@28
(0)dct_tests_03.dtt_iv_8x8_i.transpose_wa[7:0]
(1)dct_tests_03.dtt_iv_8x8_i.transpose_wa[7:0]
(2)dct_tests_03.dtt_iv_8x8_i.transpose_wa[7:0]
(3)dct_tests_03.dtt_iv_8x8_i.transpose_wa[7:0]
(4)dct_tests_03.dtt_iv_8x8_i.transpose_wa[7:0]
(5)dct_tests_03.dtt_iv_8x8_i.transpose_wa[7:0]
(6)dct_tests_03.dtt_iv_8x8_i.transpose_wa[7:0]
(7)dct_tests_03.dtt_iv_8x8_i.transpose_wa[7:0]
@1401200
-group_end
@c08022
dct_tests_03.dtt_iv_8x8_i.transpose_wa[7:0]
@28
(0)dct_tests_03.dtt_iv_8x8_i.transpose_wa[7:0]
(1)dct_tests_03.dtt_iv_8x8_i.transpose_wa[7:0]
(2)dct_tests_03.dtt_iv_8x8_i.transpose_wa[7:0]
(3)dct_tests_03.dtt_iv_8x8_i.transpose_wa[7:0]
(4)dct_tests_03.dtt_iv_8x8_i.transpose_wa[7:0]
(5)dct_tests_03.dtt_iv_8x8_i.transpose_wa[7:0]
(6)dct_tests_03.dtt_iv_8x8_i.transpose_wa[7:0]
(7)dct_tests_03.dtt_iv_8x8_i.transpose_wa[7:0]
@1401200
-group_end
@28
(0)dct_tests_03.dtt_iv_8x8_i.transpose_we[1:0]
dct_tests_03.dtt_iv_8x8_i.pre_dsth
@8022
dct_tests_03.dtt_iv_8x8_i.transpose_cntr[6:0]
@22
dct_tests_03.dtt_iv_8x8_i.transpose_ra[7:0]
dct_tests_03.dtt_iv_8x8_i.transpose_reg[23:0]
@420
dct_tests_03.dtt_iv_8x8_i.transpose_out[23:0]
@8420
dct_tests_03.dtt_iv_8x8_i.transpose_out[23:0]
@22
dct_tests_03.dtt_iv_8x8_i.dctv_xin0[23:0]
dct_tests_03.dtt_iv_8x8_i.dctv_xin1[23:0]
@28
dct_tests_03.dtt_iv_8x8_i.dctv_start_0_r
dct_tests_03.dtt_iv_8x8_i.dctv_start_1_r
dct_tests_03.dtt_iv_8x8_i.dctv_out_sel
@800028
dct_tests_03.dtt_iv_8x8_i.pre2_dstv[1:0]
@28
(0)dct_tests_03.dtt_iv_8x8_i.pre2_dstv[1:0]
(1)dct_tests_03.dtt_iv_8x8_i.pre2_dstv[1:0]
@1001200
-group_end
@420
[color] 3
dct_tests_03.dtt_iv_8x8_i.dctv_dout0[23:0]
[color] 2
dct_tests_03.dtt_iv_8x8_i.dctv_dout1[23:0]
@8420
dct_tests_03.dtt_iv_8x8_i.dctv_dout0[23:0]
dct_tests_03.dtt_iv_8x8_i.dctv_dout1[23:0]
@420
dct_tests_03.dtt_iv_8x8_i.debug_dctv_dout[23:0]
@8420
[color] 5
dct_tests_03.dtt_iv_8x8_i.debug_dctv_dout[23:0]
@c00022
[color] 6
dct_tests_03.dtt_iv_8x8_i.dctv_out_cntr[6:0]
@28
[color] 6
(0)dct_tests_03.dtt_iv_8x8_i.dctv_out_cntr[6:0]
[color] 6
(1)dct_tests_03.dtt_iv_8x8_i.dctv_out_cntr[6:0]
[color] 6
(2)dct_tests_03.dtt_iv_8x8_i.dctv_out_cntr[6:0]
[color] 6
(3)dct_tests_03.dtt_iv_8x8_i.dctv_out_cntr[6:0]
[color] 6
(4)dct_tests_03.dtt_iv_8x8_i.dctv_out_cntr[6:0]
[color] 6
(5)dct_tests_03.dtt_iv_8x8_i.dctv_out_cntr[6:0]
[color] 6
(6)dct_tests_03.dtt_iv_8x8_i.dctv_out_cntr[6:0]
@1401200
-group_end
@28
[color] 7
dct_tests_03.dtt_iv_8x8_i.dctv_out_sel
[color] 2
dct_tests_03.dtt_iv_8x8_i.dctv_out_run
dct_tests_03.dtt_iv_8x8_i.dctv_out_start_1
@22
dct_tests_03.dtt_iv_8x8_i.dctv_out_wa[4:0]
@c00028
dct_tests_03.dtt_iv_8x8_i.dctv_out_we[1:0]
@28
(0)dct_tests_03.dtt_iv_8x8_i.dctv_out_we[1:0]
(1)dct_tests_03.dtt_iv_8x8_i.dctv_out_we[1:0]
@1401200
-group_end
@22
dct_tests_03.dtt_iv_8x8_i.debug_dctv_dout[23:0]
@28
dct_tests_03.dtt_iv_8x8_i.dctv_out_sel
dct_tests_03.dtt_iv_8x8_i.pre_dstv
@200
-alt
@28
dct_tests_03.dtt_iv_8x8_i.dstv
dct_tests_03.dtt_iv_8x8_i.out_sel
dct_tests_03.dtt_iv_8x8_i.out_run
@22
dct_tests_03.dtt_iv_8x8_i.out_cntr[6:0]
@28
dct_tests_03.dtt_iv_8x8_i.out_sel
@22
dct_tests_03.dtt_iv_8x8_i.out_wd[23:0]
@c00022
dct_tests_03.dtt_iv_8x8_i.out_wa[3:0]
@28
(0)dct_tests_03.dtt_iv_8x8_i.out_wa[3:0]
(1)dct_tests_03.dtt_iv_8x8_i.out_wa[3:0]
(2)dct_tests_03.dtt_iv_8x8_i.out_wa[3:0]
(3)dct_tests_03.dtt_iv_8x8_i.out_wa[3:0]
@1401200
-group_end
@28
dct_tests_03.dtt_iv_8x8_i.out_we
dct_tests_03.dtt_iv_8x8_i.sub16
dct_tests_03.dtt_iv_8x8_i.inc16
dct_tests_03.dtt_iv_8x8_i.start64
@200
-top
@22
dct_tests_03.out_ram_wa[4:0]
@28
dct_tests_03.out_ram_cntr
dct_tests_03.out_ram_wah
@22
dct_tests_03.out_wa[3:0]
dct_tests_03.out_wd[23:0]
@28
dct_tests_03.out_we
dct_tests_03.out_ram_ren
dct_tests_03.out_ram_regen
@22
dct_tests_03.out_ram_ra[5:0]
dct_tests_03.out_ram_r[23:0]
@23
dct_tests_03.out_ram_r2[23:0]
@1000200
-debug
@22
dct_tests_03.dtt_iv_8x8_i.mode_out[1:0]
@28
dct_tests_03.dtt_iv_8x8_i.pre_first_out_w
@8420
dct_tests_03.dtt_iv_8x8_i.d_out[23:0]
@28
dct_tests_03.dtt_iv_8x8_i.dv
dct_tests_03.dtt_iv_8x8_i.pre_first_out
dct_tests_03.dtt_iv_8x8_i.pre_busy
dct_tests_03.dtt_iv_8x8_i.pre_first_out
@c00200
-direct_internal
@28
dct_tests_03.dtt_iv_8x8_i.dcth_en0
dct_tests_03.dtt_iv_8x8_i.dcth_en1
dct_tests_03.dtt_iv_8x8_i.dcth_start_0_r
dct_tests_03.dtt_iv_8x8_i.dcth_start_1_r
@22
dct_tests_03.dtt_iv_8x8_i.mode[1:0]
dct_tests_03.dtt_iv_8x8_i.mode_h[1:0]
dct_tests_03.dtt_iv_8x8_i.mode_h_late[1:0]
dct_tests_03.dtt_iv_8x8_i.mode_v[1:0]
dct_tests_03.dtt_iv_8x8_i.mode_out[1:0]
@28
dct_tests_03.dtt_iv_8x8_i.dctv_start_0_w
@22
dct_tests_03.dtt_iv_8x8_i.dctv_start_1_w
@800028
dct_tests_03.dtt_iv_8x8_i.pre2_dsth[1:0]
@28
(0)dct_tests_03.dtt_iv_8x8_i.pre2_dsth[1:0]
(1)dct_tests_03.dtt_iv_8x8_i.pre2_dsth[1:0]
@1001200
-group_end
@200
-
@800028
dct_tests_03.dtt_iv_8x8_i.pre2_dstv[1:0]
@28
(0)dct_tests_03.dtt_iv_8x8_i.pre2_dstv[1:0]
(1)dct_tests_03.dtt_iv_8x8_i.pre2_dstv[1:0]
@22
dct_tests_03.dtt_iv_8x8_i.dctv_out_cntr[6:0]
@800200
-g3
@28
dct_tests_03.dtt_iv_8x8_i.dct_iv8_1d_pass2_0_i.start
dct_tests_03.dtt_iv_8x8_i.dct_iv8_1d_pass2_0_i.dst_in
dct_tests_03.dtt_iv_8x8_i.dct_iv8_1d_pass2_0_i.dst_out
@1000200
-g3
@28
dct_tests_03.dtt_iv_8x8_i.dct_iv8_1d_pass2_1_i.start
dct_tests_03.dtt_iv_8x8_i.dct_iv8_1d_pass2_1_i.dst_in
dct_tests_03.dtt_iv_8x8_i.dct_iv8_1d_pass2_1_i.dst_out
@200
-
@1001200
-group_end
@1401200
-direct_internal
@1000200
-dtt_iv8x8_direct
@800200
-dtt_iv8x8_inv
@28
dct_tests_03.dtt_iv_8x8r_i.clk
dct_tests_03.dtt_iv_8x8r_i.start
dct_tests_03.dtt_iv_8x8r_i.mode[1:0]
@420
dct_tests_03.dtt_iv_8x8r_i.xin[23:0]
@8420
dct_tests_03.dtt_iv_8x8r_i.xin[23:0]
@28
dct_tests_03.dtt_iv_8x8r_i.mode_out[1:0]
@800200
-inv_internals
@28
dct_tests_03.dtt_iv_8x8r_i.transpose_start
@8420
dct_tests_03.dtt_iv_8x8r_i.dcth_dout0[23:0]
dct_tests_03.dtt_iv_8x8r_i.dcth_dout1[23:0]
dct_tests_03.dtt_iv_8x8r_i.transpose_out[23:0]
@200
-
@1000200
-inv_internals
@200
-
@1000200
-dtt_iv8x8_inv
@200
-dbg
[pattern_trace] 1
[pattern_trace] 0
/*!
* <b>Module:</b>dct_tests_03
* @file dct_tests_03.tf
* @date 2016-12-02
* @author Andrey Filippov
*
* @brief 1d 8-point DCT type IV for lapped mdct 16->8, operates in 16 clock cycles
* Uses 2 DSP blocks
*
* @copyright Copyright (c) 2016 Elphel, Inc.
*
* <b>License:</b>
*
*dct_tests_03.tf is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* dct_tests_03.tf is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/> .
*
* Additional permission under GNU GPL version 3 section 7:
* If you modify this Program, or any covered work, by linking or combining it
* with independent modules provided by the FPGA vendor only (this permission
* does not extend to any 3-rd party modules, "soft cores" or macros) under
* different license terms solely for the purpose of generating binary "bitstream"
* files and/or simulating the code, the copyright holders of this Program give
* you the right to distribute the covered work without those independent modules
* as long as the source code for them is available from the FPGA vendor free of
* charge, and there is no dependence on any encrypted modules for simulating of
* the combined code. This permission applies to you if the distributed code
* contains all the components and scripts required to completely simulate it
* with at least one of the Free Software programs.
*/
`timescale 1ns/1ps
// No saturation here, and no rounding as we do not need to match decoder (be bit-precise), skipping rounding adder
// will reduce needed resources
//`define DCT_INPUT_UNITY
module dct_tests_03 ();
// parameter fstname="dct_tests_03.fst";
`ifdef IVERILOG
`ifdef NON_VDT_ENVIROMENT
parameter fstname="dct_tests_03.fst";
`else
`include "IVERILOG_INCLUDE.v"
`endif // NON_VDT_ENVIROMENT
`else // IVERILOG
`ifdef CVC
`ifdef NON_VDT_ENVIROMENT
parameter fstname = "x393.fst";
`else // NON_VDT_ENVIROMENT
`include "IVERILOG_INCLUDE.v"
`endif // NON_VDT_ENVIROMENT
`else
parameter fstname = "dct_tests_03.fst";
`endif // CVC
`endif // IVERILOG
parameter CLK_PERIOD = 10; // ns
parameter WIDTH = 24; // input data width
// parameter OUT_WIDTH = 16; // output data width
parameter OUT_WIDTH = 24; // output data width
parameter TRANSPOSE_WIDTH = 24; // width of the transpose memory (intermediate results)
parameter OUT_RSHIFT = 2; // overall right shift of the result from input, aligned by MSB (>=3 will never cause saturation)
parameter OUT_RSHIFT2 = 0; // overall right shift for the second (vertical) pass
parameter DCT_GAP = 16; // between runs
parameter SAME_BITS=3;
reg RST = 1'b1;
reg CLK = 1'b0;
reg [3:0] phase_in;
reg [3:0] phase_out;
reg run_in;
reg run_out;
reg run_out_d;
reg en_x = 0;
// reg end_x = 0;
reg [2:0] x_ra;
wire [2:0] x_wa = phase_in[2:0];
wire x_we = !phase_in[3] && run_in;
reg [WIDTH-1:0] x_in;
reg [WIDTH-1:0] x_in_2d;
reg [WIDTH-1:0] x_out;
reg [WIDTH-1:0] x_ram[0:7];
wire [WIDTH-1:0] x_out_w = x_ram[x_ra];
reg start = 0;
reg start2 = 0; // second start for 2d
reg [1:0] mode_in= 0; // 3; // [0] - vertical pass 0: dct, 1 - dst, [1] - horizontal pass
wire [1:0] mode_out; // [0] - vertical pass 0: dct, 1 - dst, [1] - horizontal pass
wire [OUT_WIDTH-1:0] y_dct;
wire pre2_start_out;
wire en_out;
reg y_pre_we;
reg y_we;
reg [3:0] phase_y=8;
reg [2:0] y_wa;
reg [2:0] y_ra;
reg y_dv=0;
reg signed [OUT_WIDTH-1:0] y_ram[0:7];
wire signed [OUT_WIDTH-1:0] y_out = y_ram[y_ra]; // SuppressThisWarning VEditor - simulation only
reg signed [WIDTH-1:0] data_in[0:63];
reg signed [OUT_WIDTH-1:0] data_out[0:63];
wire pre_last_in_2d; // SuppressThisWarning VEditor - simulation only
wire pre_first_out_2d; // SuppressThisWarning VEditor - simulation only
wire pre_busy_2d; // SuppressThisWarning VEditor - simulation only
wire dv_2d; // SuppressThisWarning VEditor - simulation only
wire signed [OUT_WIDTH-1:0] d_out_2d;
wire pre_last_in_2dr; // SuppressThisWarning VEditor - simulation only
wire pre_first_out_2dr; // SuppressThisWarning VEditor - simulation only
wire pre_busy_2dr; // SuppressThisWarning VEditor - simulation only
wire dv_2dr; // SuppressThisWarning VEditor - simulation only
wire signed [OUT_WIDTH-1:0] d_out_2dr; // SuppressThisWarning VEditor - simulation only
integer i,j, i1, ir;
initial begin
for (i=0; i<64; i=i+1) begin
`ifdef DCT_INPUT_UNITY
data_in[i] = (i[2:0] == (i[5:3] ^ 3'h0)) ? {2'b1,{WIDTH-2{1'b0}}} : 0;
ir= (i[2:0] == (i[5:3] ^ 3'h1)) ? {2'b1,{WIDTH-2{1'b0}}} : 0;
data_in[i] = ir;
`else
ir = $random;
data_in[i] = ((i[5:3] == 0) || (i[5:3] == 7) || (i[2:0] == 0) || (i[2:0] == 7))? 0:
{{SAME_BITS{ir[WIDTH -SAME_BITS - 1]}},ir[WIDTH -SAME_BITS-1:0]};
`endif
end
$display("Input data in line-scan order:");
for (i=0; i<64; i=i+8) begin
$display ("%d, %d, %d, %d, %d, %d, %d, %d",data_in[i+0],data_in[i+1],data_in[i+2],data_in[i+3],
data_in[i+4],data_in[i+5],data_in[i+6],data_in[i+7]);
end
$display("");
$display("Input data - transposed:");
j=0;
for (i=0; i < 8; i=i+1) begin
$display ("%d, %d, %d, %d, %d, %d, %d, %d",data_in[i+ 0],data_in[i+ 8],data_in[i+16],data_in[i+24],
data_in[i+32],data_in[i+40],data_in[i+48],data_in[i+56]);
end
$display("");
end
always #(CLK_PERIOD/2) CLK = ~CLK;
initial begin
$dumpfile(fstname);
$dumpvars(0,dct_tests_03); // SuppressThisWarning VEditor
#100;
RST = 0;
#100;
repeat (10) @(posedge CLK);
#1 en_x = 1;
for (i = 0; i < 64; i = i+1) begin
@(posedge CLK);
#1;
x_in = data_in[i]; // >>x_wa;
if (i==63) begin
en_x = 0;
end
if (&i[2:0]) repeat (8) @(posedge CLK);
end
#1 x_in = 0;
repeat (64) @(posedge CLK);
$display("");
$display("output data - transposed:");
for (i=0; i<64; i=i+8) begin
$display ("%d, %d, %d, %d, %d, %d, %d, %d",data_out[i+0],data_out[i+1],data_out[i+2],data_out[i+3],
data_out[i+4],data_out[i+5],data_out[i+6],data_out[i+7]);
end
// repeat (64) @(posedge CLK);
// $finish;
end
initial begin
wait (!RST);
while (!start) begin
@(posedge CLK);
#1;
end
for (i1 = 0; i1 < 192; i1 = i1+1) begin
@(posedge CLK);
#1;
x_in_2d = data_in[i1 & 63];
if ((i1 & 63) == 0) mode_in = mode_in+1;
start2 = (i1 & 63) == 63;
end
for (i1 = 0; i1 < 64; i1 = i1+1) begin
@(posedge CLK);
#1;
start2 = 0;
x_in_2d = data_in[i1];
end
repeat (DCT_GAP) @(posedge CLK);
#1;
start2 = 1;
for (i1 = 0; i1 < 64; i1 = i1+1) begin
@(posedge CLK);
#1;
start2 = 0;
x_in_2d = data_in[63-i1];
end
repeat (300) @(posedge CLK);
$finish;
end
initial j = 0;
always @ (posedge CLK) begin
if (y_dv) begin
//$display (" y[0x%x] => 0x%x %d, j=%d @%t",y_ra,y_out,y_out,j,$time);
data_out[{j[2:0],j[5:3]}] = y_out; // transpose array
#1 j = j+1;
end
end
always @ (posedge CLK) begin
if (RST) run_in <= 0;
else if (en_x) run_in <= 1;
else if (phase_in == 15) run_in <= 0;
if (RST) run_out <= 0;
else if ((phase_in == 5) || (phase_out==15)) run_out <= run_in;
if (!run_in) phase_in <= 0;
else phase_in <= phase_in + 1;
if (!run_out) phase_out <= 0;
else phase_out <= phase_out + 1;
run_out_d <= run_out;
if (RST) start <= 0;
else start <= run_out & !run_out_d;
{y_we,y_pre_we} <= {y_pre_we, en_out};
if (RST) phase_y <= 8;
else if (pre2_start_out) phase_y <= 0;
else if (y_pre_we) phase_y <= phase_y + 1;
if (RST) y_dv <= 0;
else if ((phase_y == 6) && y_we) y_dv <= 1;
else if (y_ra == 7) y_dv <= 0;
if (!y_dv) y_ra <= 0;
else y_ra <= y_ra + 1;
if (y_we) y_ram[y_wa] <= y_dct;
if (x_we) x_ram[x_wa] <= x_in;
x_out <= x_out_w;
//X2-X7-X3-X4-X5-X6-X0-X1-*-X3-X5-X4-*-X1-X7-*
case (phase_out)
4'h0: x_ra <= 2;
4'h1: x_ra <= 7;
4'h2: x_ra <= 3;
4'h3: x_ra <= 4;
4'h4: x_ra <= 5;
4'h5: x_ra <= 6;
4'h6: x_ra <= 0;
4'h7: x_ra <= 1;
4'h8: x_ra <= 'bx;
4'h9: x_ra <= 3;
4'ha: x_ra <= 5;
4'hb: x_ra <= 4;
4'hc: x_ra <= 'bx;
4'hd: x_ra <= 6;
4'he: x_ra <= 7;
4'hf: x_ra <= 'bx;
endcase
case (phase_y[2:0])
3'h0: y_wa <= 0;
3'h1: y_wa <= 7;
3'h2: y_wa <= 4;
3'h3: y_wa <= 3;
3'h4: y_wa <= 1;
3'h5: y_wa <= 6;
3'h6: y_wa <= 2;
3'h7: y_wa <= 5;
endcase
end
dtt_iv8_1d #(
.WIDTH (WIDTH),
.OUT_WIDTH (OUT_WIDTH),
.OUT_RSHIFT (OUT_RSHIFT),
.B_WIDTH (18),
.A_WIDTH (25),
.P_WIDTH (48),
.COSINE_SHIFT (17),
.COS_01_32 (130441),
.COS_03_32 (125428),
.COS_04_32 (121095),
.COS_05_32 (115595),
.COS_07_32 (101320),
.COS_08_32 (92682),
.COS_09_32 (83151),
.COS_11_32 (61787),
.COS_12_32 (50159),
.COS_13_32 (38048),
.COS_15_32 (12847)
) dtt_iv8_1d_i (
.clk (CLK), // input
.rst (RST), // input
.en (run_in), // input
.dst_in (mode_in[1]), // input
.d_in (x_out), // input[23:0]
.start (start), // input
.dout (y_dct), // output[15:0]
.pre2_start_out (pre2_start_out), // output reg
.en_out (en_out), // output reg
.dst_out (), // output
.y_index () // output[2:0] reg
);
parameter ODEPTH = 5;
reg signed [OUT_WIDTH-1:0] out_ram[0: ((1<<ODEPTH)-1)]; // [0:31];
wire signed [OUT_WIDTH-1:0] out_wd;
wire signed [3:0] out_wa;
wire out_we;
wire sub16;
wire inc16;
wire start64;
reg [ODEPTH-5:0] out_ram_cntr;
reg [ODEPTH-5:0] out_ram_wah;
wire [ODEPTH-1:0] out_ram_wa = {out_ram_wah,out_wa};
reg out_ram_ren;
reg out_ram_regen;
reg [5:0] out_ram_ra;
reg signed [OUT_WIDTH-1:0] out_ram_r;
reg signed [OUT_WIDTH-1:0] out_ram_r2;
always @ (posedge CLK) begin
if (RST) out_ram_cntr <= 0;
else if (inc16) out_ram_cntr <= out_ram_cntr + 1;
out_ram_wah <= out_ram_cntr - sub16;
if (out_we) out_ram[out_ram_wa] <= out_wd;
if (RST) out_ram_ren <= 1'b0;
else if (start64) out_ram_ren <= 1'b1;
else if (&out_ram_ra) out_ram_ren <= 1'b0;
out_ram_regen <= out_ram_ren;
if (!out_ram_ren) out_ram_ra <= 0;
else out_ram_ra <= out_ram_ra + 1;
if (out_ram_ren) out_ram_r <= out_ram[out_ram_ra[4:0]];
if (out_ram_regen) out_ram_r2 <= out_ram_r;
end
dtt_iv_8x8_ad #(
.INPUT_WIDTH (WIDTH),
.OUT_WIDTH (OUT_WIDTH),
.OUT_RSHIFT1 (OUT_RSHIFT),
.OUT_RSHIFT2 (OUT_RSHIFT2),
.TRANSPOSE_WIDTH (TRANSPOSE_WIDTH),
.DSP_B_WIDTH (18),
.DSP_A_WIDTH (25),
.DSP_P_WIDTH (48)
) dtt_iv_8x8_i (
.clk (CLK), // input
.rst (RST), // input
.start (start || start2), // input
.mode (mode_in), // input[1:0]
.xin (x_in_2d), // input[24:0] signed
.pre_last_in (pre_last_in_2d), // output reg
.pre_first_out (pre_first_out_2d), // output
.dv (dv_2d), // output
.d_out (d_out_2d), // output[24:0] signed
.mode_out (mode_out), // output[1:0] reg
.pre_busy (pre_busy_2d), // output reg
.out_wd (out_wd), // output[24:0] reg
.out_wa (out_wa), // output[3:0] reg
.out_we (out_we), // output reg
.sub16 (sub16), // output reg
.inc16 (inc16), // output reg
.start64 (start64) // output reg
);
dtt_iv_8x8 #(
.INPUT_WIDTH (WIDTH),
.OUT_WIDTH (OUT_WIDTH),
.OUT_RSHIFT1 (OUT_RSHIFT),
.OUT_RSHIFT2 (OUT_RSHIFT2),
.TRANSPOSE_WIDTH (TRANSPOSE_WIDTH),
.DSP_B_WIDTH (18),
.DSP_A_WIDTH (25),
.DSP_P_WIDTH (48)
) dtt_iv_8x8r_i (
.clk (CLK), // input
.rst (RST), // input
.start (pre_first_out_2d), // input
.mode ({mode_out[0],mode_out[1]}), // input[1:0] // result is transposed
.xin (d_out_2d), // input[24:0] signed
.pre_last_in (pre_last_in_2dr), // output reg
.pre_first_out (pre_first_out_2dr), // output
.dv (dv_2dr), // output
.d_out (d_out_2dr), // output[24:0] signed
.mode_out (), // output[1:0] reg
.pre_busy (pre_busy_2dr) // output reg
);
endmodule
/*!
* <b>Module:</b>dtt_iv_8x8_ad
* @file dtt_iv_8x8_ad.v
* @date 2016-12-08
* @author Andrey Filippov
*
* @brief 2-d DCT-IV implementation, 1 clock/data word. Input in scanline order, output (a-signed,d,we - transposed
*
* @copyright Copyright (c) 2016 Elphel, Inc.
*
* <b>License:</b>
*
*dtt_iv_8x8_ad.v is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* dtt_iv_8x8_ad.v is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/> .
*
* Additional permission under GNU GPL version 3 section 7:
* If you modify this Program, or any covered work, by linking or combining it
* with independent modules provided by the FPGA vendor only (this permission
* does not extend to any 3-rd party modules, "soft cores" or macros) under
* different license terms solely for the purpose of generating binary "bitstream"
* files and/or simulating the code, the copyright holders of this Program give
* you the right to distribute the covered work without those independent modules
* as long as the source code for them is available from the FPGA vendor free of
* charge, and there is no dependence on any encrypted modules for simulating of
* the combined code. This permission applies to you if the distributed code
* contains all the components and scripts required to completely simulate it
* with at least one of the Free Software programs.
*/
`timescale 1ns/1ps
module dtt_iv_8x8_ad#(
parameter INPUT_WIDTH = 25,
parameter OUT_WIDTH = 25,
parameter OUT_RSHIFT1 = 1, // overall right shift of the result from input, aligned by MSB for pass1 (>=3 will never cause saturation)
parameter OUT_RSHIFT2 = 1, // if sum OUT_RSHIFT1+OUT_RSHIFT2 == 2, direct*reverse == ident (may use 3, -1) or 3,0 with wider output and saturate
parameter TRANSPOSE_WIDTH = 25, // transpose memory width
parameter DSP_B_WIDTH = 18,
parameter DSP_A_WIDTH = 25,
parameter DSP_P_WIDTH = 48,
parameter COSINE_SHIFT= 17,
parameter ODEPTH = 5, // output buffer depth (bits). Here 5, put can use more if used as a full block buffer
parameter COS_01_32 = 130441, // int(round((1<<17) * cos( 1*pi/32)))
parameter COS_03_32 = 125428, // int(round((1<<17) * cos( 3*pi/32)))
parameter COS_04_32 = 121095, // int(round((1<<17) * cos( 4*pi/32)))
parameter COS_05_32 = 115595, // int(round((1<<17) * cos( 5*pi/32)))
parameter COS_07_32 = 101320, // int(round((1<<17) * cos( 7*pi/32)))
parameter COS_08_32 = 92682, // int(round((1<<17) * cos( 8*pi/32)))
parameter COS_09_32 = 83151, // int(round((1<<17) * cos( 9*pi/32)))
parameter COS_11_32 = 61787, // int(round((1<<17) * cos(11*pi/32)))
parameter COS_12_32 = 50159, // int(round((1<<17) * cos(12*pi/32)))
parameter COS_13_32 = 38048, // int(round((1<<17) * cos(13*pi/32)))
parameter COS_15_32 = 12847 // int(round((1<<17) * cos(15*pi/32)))
) (
input clk, //!< system clock, posedge
input rst, //!< sync reset
input start, //!< single-cycle start pulse that goes 1 cycle before first data
input [1:0] mode, //!< DCT/DST: [1] - first (horizontal) pass, [0] - second (vertical) pass. 0 - DCT, 1 - DST
// Next data should be sent in bursts of 8, pause of 8 - total 128 cycles
input signed [INPUT_WIDTH-1:0] xin, //!< input data
output pre_last_in, //!< output high during input of the pre-last of 64 pixels in a 8x8 block (next can be start
output reg pre_first_out, //!< 1 cycle ahead of the first output in a 64 block
output reg dv, //!< data output valid. WAS: Will go high on the 94-th cycle after the start
output signed [OUT_WIDTH-1:0] d_out, //!< output data
output reg [1:0] mode_out, //!< copy of mode input, valid @ pre_first_out
output reg pre_busy, //!< start should come each 64-th cycle (next after pre_last_in), and not after pre_busy)
output reg [OUT_WIDTH-1:0] out_wd, //!< output data to write to external output buffer memory
output reg [3:0] out_wa, //!< 4 LSBs of the output address (may subtract 16 !)
// extrenal output buffer control: should be 32 words at least
output reg out_we, //!< output data valid (write to external buffer
output reg sub16, //!< Subtract 16 from the full output address when true
output reg inc16, //!< increment full output address by 16
output reg start64); //!< may start output readout, 1 entry per clock, vertically
// 1. Two 16xINPUT_WIDTH memories to feed two of the 'horizontal' 1-dct - they should provide outputs shifted by 1 clock
// 2. of the horizontal DCTs
// 3. common transpose memory plus 2 input reorder memory for each of the vertical DCT
// 4. 2 of the vertical DCTs
// 5. small memory to combine/reorder outputs (2 stages as 1 x16 memory is not enough)
// TODO make a version that uses common transpose memory (twice width) and simultaneously calculates dst-iv (invert time sequence, alternate sign)
// That can be used for lateral chromatic aberration (shift in time domain). Reverse transform does not need it - will always be just dct-iv
reg x_run;
reg [5:0] x_wa;
wire dcth_phin_start = x_run && (x_wa[5:0] == 6);
reg dcth_phin_run;
reg dcth_en0;
reg dcth_en1;
reg [6:0] dcth_phin;
reg [2:0] x_ra0;
reg [2:0] x_ra1;
reg signed [INPUT_WIDTH-1:0] x_ram0[0:7];
reg signed [INPUT_WIDTH-1:0] x_ram1[0:7];
reg signed [INPUT_WIDTH-1:0] dcth_xin0;
reg signed [INPUT_WIDTH-1:0] dcth_xin1;
wire signed [TRANSPOSE_WIDTH-1:0] dcth_dout0;
wire signed [TRANSPOSE_WIDTH-1:0] dcth_dout1;
wire dcth_en_out0;
wire dcth_en_out1;
wire dcth_start_0_w = dcth_phin_run && (dcth_phin [6:0] ==0);
wire dcth_start_1_w = dcth_phin_run && (dcth_phin [6:0] ==9);
reg dcth_start_0_r;
reg dcth_start_1_r;
reg [1:0] transpose_w_page;
reg [6:0] transpose_cntr; // transpose memory counter, [6] == 1 when the last page is being finished
reg transpose_in_run;
wire transpose_start = dcth_phin_run && (dcth_phin [6:0] == 7'h11);
reg [2:0] transpose_wa_low; // [2:0] transpose memory low address bits, [3] - other group (of 16)
reg [4:0] transpose_wa_high; // high bits of transpose memory write address
wire [7:0] transpose_wa = {transpose_wa_high,transpose_wa_low};
wire transpose_wa_decr = (transpose_cntr[0] & ~transpose_cntr[3]);
reg [1:0] transpose_we; // [1]
wire [TRANSPOSE_WIDTH-1:0] transpose_di = transpose_cntr[0]? dcth_dout0: dcth_dout1;
reg [TRANSPOSE_WIDTH-1:0] transpose_ram[0:255];
wire [2:0] dcth_yindex0;
wire [2:0] dcth_yindex1;
wire [7:0] transpose_debug_di= {transpose_wa_high, transpose_cntr[0]? dcth_yindex0: dcth_yindex1};
reg [7:0] transpose_debug_ram[0:255];
reg [6:0] transpose_rcntr; // transpose read memory counter, [6] == 1 when the last page is being finished
reg [2:0] transpose_out_run;
wire transpose_out_start = transpose_in_run && (transpose_cntr[6:0] == 7'h35); // 7'h33 is actual minimum
reg [1:0] transpose_r_page;
reg signed [TRANSPOSE_WIDTH-1:0] transpose_reg; // internal BRAM register
reg signed [TRANSPOSE_WIDTH-1:0] transpose_out; // output BRAM register
reg [7:0] transpose_debug_reg; // internal BRAM register
reg [7:0] transpose_debug_out; // output BRAM register
wire [7:0] transpose_ra = {transpose_r_page, transpose_rcntr[2:0], transpose_rcntr[5:3]};
reg [3:0] t_wa;
wire t_we0 = transpose_out_run[2] && !t_wa[3];
wire t_we1 = transpose_out_run[2] && t_wa[3];
reg signed [TRANSPOSE_WIDTH-1:0] t_ram0[0:7];
reg signed [TRANSPOSE_WIDTH-1:0] t_ram1[0:7];
reg signed [TRANSPOSE_WIDTH-1:0] dctv_xin0;
reg signed [TRANSPOSE_WIDTH-1:0] dctv_xin1;
reg signed [7:0] t_debug_ram0[0:7];
reg signed [7:0] t_debug_ram1[0:7];
reg signed [7:0] dctv_debug_xin0; // SuppressThisWarning VEditor - simulation only
reg signed [7:0] dctv_debug_xin1; // SuppressThisWarning VEditor - simulation only
wire signed [OUT_WIDTH-1:0] dctv_dout0;
wire signed [OUT_WIDTH-1:0] dctv_dout1;
wire dctv_en_out0;
wire dctv_en_out1;
wire [2:0] dctv_yindex0;
wire [2:0] dctv_yindex1;
wire dctv_phin_start = transpose_out_run && (transpose_rcntr[5:0] == 8);
reg dctv_phin_run;
reg dctv_en0;
reg dctv_en1;
reg [6:0] dctv_phin;
reg [2:0] t_ra0;
reg [2:0] t_ra1;
wire dctv_start_0_w = dctv_phin_run && (dctv_phin [6:0] ==0);
wire dctv_start_1_w = dctv_phin_run && (dctv_phin [6:0] ==9);
reg dctv_start_0_r;
reg dctv_start_1_r;
reg pre_last_in_r;
reg [6:0] dctv_out_cntr; // count output data from second (vertical) pass (bit 6 - stopping)
reg dctv_out_run; //
wire dctv_out_start = dctv_phin [6:0] == 'h11;
reg [6:0] out_cntr; // count output data from second (vertical) pass (bit 6 - stopping)
reg out_run; //
wire out_start = dctv_phin [6:0] == 'h12;
reg out_sel; // which of the 2 output channels to select
reg [ODEPTH-1:0] dctv_out_wa;
reg [1:0] dctv_out_we;
reg dctv_out_sel; // select DCTv channel output;
reg signed [OUT_WIDTH-1:0] dctv_out_ram[0: ((1<<ODEPTH)-1)]; // [0:31];
reg [2:0] dctv_out_debug_ram[0:((1<<ODEPTH)-1)]; // [0:31];
reg [6:0] dctv_out_ra;
wire dctv_out_start_1 = dctv_out_cntr[6:0] == 'h0e; // 'h0b;
reg pre_dv;
reg signed [OUT_WIDTH-1:0] dctv_out_reg;
reg [2:0] dctv_out_debug_reg; // SuppressThisWarning VEditor - simulation only
reg [1:0] mode_h; // registered at start, [1] used for hor (first) pass
reg [1:0] mode_h_late; // mode_h registered @ pre_last_in
reg [1:0] mode_v; // mode_h_late registered @ transpose_out_start ([0]used for vert pass)
// mode_out mode_v registered @ pre_first_out_w
wire [1:0] pre2_dsth; // 2 cycles before horizontal output data is valid, 0 dct, 1 - dst
wire [1:0] pre2_dstv; // 2 cycles before vertical output data is valid, 0 dct, 1 - dst
reg pre_dsth; // 1 cycles before horizontal output data is valid, 0 dct, 1 - dst
reg pre_dstv; // 1 cycles before vertical output data is valid, 0 dct, 1 - dst
reg dstv; // when vertical output data is valid, 0 dct, 1 - dst
wire pre_first_out_w = dctv_out_start_1;
wire [OUT_WIDTH-1:0] debug_dctv_dout = dctv_out_sel? dctv_dout1: dctv_dout0; // SuppressThisWarning VEditor - simulation only
assign d_out = dctv_out_reg;
assign pre_last_in = pre_last_in_r;
always @ (posedge clk) begin
if (rst) x_run <= 0;
else if (start) x_run <= 1;
else if (&x_wa[5:0]) x_run <= 0;
if (start) mode_h <= mode;
if (pre_last_in) mode_h_late <= mode_h;
if (transpose_out_start) mode_v <= mode_h_late;
if (pre_first_out_w) mode_out <= mode_v;
if (!x_run) x_wa <= 0;
else x_wa <= x_wa + 1;
pre_last_in_r <= x_run && (x_wa[5:0] == 'h3d);
if (rst) pre_busy <= 0;
else if (pre_last_in_r) pre_busy <= 1;
else if (dcth_phin [5:0] == 5) pre_busy <= 0; // check actual?
if (rst) dcth_phin_run <= 0;
else if (dcth_phin_start) dcth_phin_run <= 1;
else if (dcth_phin [6:0] == 7'h48) dcth_phin_run <= 0; // check actual?
if (!dcth_phin_run || dcth_phin_start) dcth_phin <= 0;
else dcth_phin <= dcth_phin + 1;
if (rst) dcth_en0 <= 0;
else if (dcth_start_0_w) dcth_en0 <= 1;
else if (!x_run) dcth_en0 <= 0; // maybe get rid of this signal and send start for each 8?
if (rst) dcth_en1 <= 0;
else if (dcth_start_1_w) dcth_en1 <= 1;
else if (dcth_phin [6]) dcth_en1 <= 0; // maybe get rid of this signal and send start for each 8?
//write input reorder memory
if (x_run && !x_wa[3]) x_ram0[x_wa[2:0]] <= xin;
if (x_run && x_wa[3]) x_ram1[x_wa[2:0]] <= xin;
//read input reorder memory
dcth_xin0 <= x_ram0[x_ra0[2:0]];
dcth_xin1 <= x_ram1[x_ra1[2:0]];
dcth_start_0_r <= dcth_start_0_w;
dcth_start_1_r <= dcth_start_1_w;
pre_dsth <= dcth_en_out0 ? pre2_dsth[0] : pre2_dsth[1];
if (rst) transpose_in_run <= 0;
else if (transpose_start) transpose_in_run <= 1;
else if (transpose_cntr [6:0] == 7'h46) transpose_in_run <= 0; // check actual?
if (!transpose_in_run || transpose_start) transpose_cntr <= 0;
else transpose_cntr <= transpose_cntr + 1;
if (rst) transpose_w_page <= 0;
else if (transpose_in_run && (&transpose_cntr[5:0])) transpose_w_page <= transpose_w_page + 1;
case (transpose_cntr[3:0])
4'h0: transpose_wa_low <= 0 ^ {3{pre_dsth}};
4'h1: transpose_wa_low <= 1 ^ {3{pre_dsth}};
4'h2: transpose_wa_low <= 7 ^ {3{pre_dsth}};
4'h3: transpose_wa_low <= 6 ^ {3{pre_dsth}};
4'h4: transpose_wa_low <= 4 ^ {3{pre_dsth}};
4'h5: transpose_wa_low <= 2 ^ {3{pre_dsth}};
4'h6: transpose_wa_low <= 3 ^ {3{pre_dsth}};
4'h7: transpose_wa_low <= 5 ^ {3{pre_dsth}};
4'h8: transpose_wa_low <= 1 ^ {3{pre_dsth}};
4'h9: transpose_wa_low <= 0 ^ {3{pre_dsth}};
4'ha: transpose_wa_low <= 6 ^ {3{pre_dsth}};
4'hb: transpose_wa_low <= 7 ^ {3{pre_dsth}};
4'hc: transpose_wa_low <= 2 ^ {3{pre_dsth}};
4'hd: transpose_wa_low <= 4 ^ {3{pre_dsth}};
4'he: transpose_wa_low <= 5 ^ {3{pre_dsth}};
4'hf: transpose_wa_low <= 3 ^ {3{pre_dsth}};
endcase
transpose_wa_high <= {transpose_w_page, transpose_cntr[5:4], transpose_cntr[0]} - {transpose_wa_decr,1'b0};
transpose_we <= {transpose_we[0],dcth_en_out0 | dcth_en_out1};
// Write transpose memory)
if (transpose_we[1]) transpose_ram[transpose_wa] <= transpose_di;
if (transpose_we[1]) transpose_debug_ram[transpose_wa] <= transpose_debug_di;
// if (transpose_we[1]) $display("%d %d @%t",transpose_cntr, transpose_wa, $time) ;
if (rst) transpose_out_run[0] <= 0;
else if (transpose_out_start) transpose_out_run[0] <= 1;
else if (&transpose_rcntr[5:0]) transpose_out_run[0] <= 0; // check actual?
transpose_out_run[2:1] <= transpose_out_run[1:0];
if (!transpose_out_run[0] || transpose_out_start) transpose_rcntr <= 0;
else transpose_rcntr <= transpose_rcntr + 1;
if (transpose_out_start) transpose_r_page <= transpose_w_page;
// Read transpose memory to 2 small reorder memories, use BRAM register
if (transpose_out_run[0]) transpose_reg <= transpose_ram[transpose_ra];
if (transpose_out_run[1]) transpose_out <= transpose_reg;
if (transpose_out_run[0]) transpose_debug_reg <= transpose_debug_ram[transpose_ra];
if (transpose_out_run[1]) transpose_debug_out <= transpose_debug_reg;
if (!transpose_out_run[2]) t_wa <= 0;
else t_wa <= t_wa+1;
if (rst) dctv_phin_run <= 0;
else if (dctv_phin_start) dctv_phin_run <= 1;
else if (dctv_phin [6:0] == 7'h48) dctv_phin_run <= 0; // check actual?
if (!dctv_phin_run || dctv_phin_start) dctv_phin <= 0;
else dctv_phin <= dctv_phin + 1;
if (rst) dctv_en0 <= 0;
else if (dctv_start_0_w) dctv_en0 <= 1;
else if (!transpose_out_run[2]) dctv_en0 <= 0; // maybe get rid of this signal and send satrt for each 8?
if (rst) dctv_en1 <= 0;
else if (dctv_start_1_w) dctv_en1 <= 1;
else if (dctv_phin[6]) dctv_en1 <= 0; // maybe get rid of this signal and send satrt for each 8?
pre_dstv <= dctv_en_out0 ? pre2_dstv[0] : pre2_dstv[1];
if (t_we0 || t_we1) $display("%d %d",transpose_rcntr-2, transpose_out) ;
//write vertical dct input reorder memory
if (t_we0) t_ram0[t_wa[2:0]] <= transpose_out;
if (t_we1) t_ram1[t_wa[2:0]] <= transpose_out;
if (t_we0) t_debug_ram0[t_wa[2:0]] <= transpose_debug_out;
if (t_we1) t_debug_ram1[t_wa[2:0]] <= transpose_debug_out;
//read vertical dct input reorder memory
dctv_xin0 <= t_ram0[t_ra0[2:0]];
dctv_xin1 <= t_ram1[t_ra1[2:0]];
dctv_start_0_r <= dctv_start_0_w;
dctv_start_1_r <= dctv_start_1_w;
dctv_debug_xin0 <= t_debug_ram0[t_ra0[2:0]];
dctv_debug_xin1 <= t_debug_ram1[t_ra1[2:0]];
// Reordering data from a pair of vertical DCTs - 2 steps, 1 is not enough
if (rst) dctv_out_run <= 0;
else if (dctv_out_start) dctv_out_run <= 1;
else if (dctv_out_cntr[6:0] == 'h47) dctv_out_run <= 0;
if (!dctv_out_run || dctv_out_start) dctv_out_cntr <= 0;
else dctv_out_cntr <= dctv_out_cntr + 1;
dctv_out_we <= {dctv_out_we[0], dctv_en_out0 | dctv_en_out1};
dctv_out_sel <= dctv_out_cntr[0];
case (dctv_out_cntr[3:0])
4'h0: dctv_out_wa[3:0] <= 4'h0 ^ {1'b0,{3{pre_dstv}}};
4'h1: dctv_out_wa[3:0] <= 4'h9 ^ {1'b0,{3{pre_dstv}}};
4'h2: dctv_out_wa[3:0] <= 4'h7 ^ {1'b0,{3{pre_dstv}}};
4'h3: dctv_out_wa[3:0] <= 4'he ^ {1'b0,{3{pre_dstv}}};
4'h4: dctv_out_wa[3:0] <= 4'h4 ^ {1'b0,{3{pre_dstv}}};
4'h5: dctv_out_wa[3:0] <= 4'ha ^ {1'b0,{3{pre_dstv}}};
4'h6: dctv_out_wa[3:0] <= 4'h3 ^ {1'b0,{3{pre_dstv}}};
4'h7: dctv_out_wa[3:0] <= 4'hd ^ {1'b0,{3{pre_dstv}}};
4'h8: dctv_out_wa[3:0] <= 4'h1 ^ {1'b0,{3{pre_dstv}}};
4'h9: dctv_out_wa[3:0] <= 4'h8 ^ {1'b0,{3{pre_dstv}}};
4'ha: dctv_out_wa[3:0] <= 4'h6 ^ {1'b0,{3{pre_dstv}}};
4'hb: dctv_out_wa[3:0] <= 4'hf ^ {1'b0,{3{pre_dstv}}};
4'hc: dctv_out_wa[3:0] <= 4'h2 ^ {1'b0,{3{pre_dstv}}};
4'hd: dctv_out_wa[3:0] <= 4'hc ^ {1'b0,{3{pre_dstv}}};
4'he: dctv_out_wa[3:0] <= 4'h5 ^ {1'b0,{3{pre_dstv}}};
4'hf: dctv_out_wa[3:0] <= 4'hb ^ {1'b0,{3{pre_dstv}}};
endcase
// It is possible to fill large output memory buffer, in that case
dctv_out_wa[ODEPTH-1:4] <= dctv_out_cntr[ODEPTH-1:4] - (~dctv_out_cntr[3] & dctv_out_cntr[0]);
// write first stage of output reordering
if (dctv_out_we[1]) dctv_out_ram[dctv_out_wa] <= dctv_out_sel? dctv_dout1: dctv_dout0;
if (dctv_out_we[1]) dctv_out_debug_ram[dctv_out_wa] <= dctv_out_sel? dctv_yindex1: dctv_yindex0;
if (rst) pre_dv <= 0;
else if (dctv_out_start_1) pre_dv <= 1;
else if (&dctv_out_ra[5:0]) pre_dv <= 0;
if (!pre_dv || dctv_out_start_1) dctv_out_ra <= 0;
else dctv_out_ra <= dctv_out_ra + 1;
// reading first stage of output reorder RAM
if (pre_dv) dctv_out_reg <= dctv_out_ram[dctv_out_ra[4:0]];
if (pre_dv) dctv_out_debug_reg <= dctv_out_debug_ram[dctv_out_ra[4:0]];
pre_first_out <= pre_first_out_w;
dv <= pre_dv;
// alternative option
// Reordering data from a pair of vertical DCTs - 2 steps, 1 is not enough
if (rst) out_run <= 0;
else if (out_start) out_run <= 1;
else if (out_cntr[6:0] == 'h47) out_run <= 0;
if (!out_run || out_start) out_cntr <= 0;
else out_cntr <= out_cntr + 1;
if (out_start || !out_run) out_sel <= 0;
else out_sel <= ~out_sel;
if (dctv_out_we[1]) out_wd <= out_sel? dctv_dout1: dctv_dout0;
dstv <= pre_dstv;
case (out_cntr[3:0])
4'h0: out_wa[3:0] <= 4'h0 ^ {1'b0,{3{dstv}}};
4'h1: out_wa[3:0] <= 4'h9 ^ {1'b0,{3{dstv}}};
4'h2: out_wa[3:0] <= 4'h7 ^ {1'b0,{3{dstv}}};
4'h3: out_wa[3:0] <= 4'he ^ {1'b0,{3{dstv}}};
4'h4: out_wa[3:0] <= 4'h4 ^ {1'b0,{3{dstv}}};
4'h5: out_wa[3:0] <= 4'ha ^ {1'b0,{3{dstv}}};
4'h6: out_wa[3:0] <= 4'h3 ^ {1'b0,{3{dstv}}};
4'h7: out_wa[3:0] <= 4'hd ^ {1'b0,{3{dstv}}};
4'h8: out_wa[3:0] <= 4'h1 ^ {1'b0,{3{dstv}}};
4'h9: out_wa[3:0] <= 4'h8 ^ {1'b0,{3{dstv}}};
4'ha: out_wa[3:0] <= 4'h6 ^ {1'b0,{3{dstv}}};
4'hb: out_wa[3:0] <= 4'hf ^ {1'b0,{3{dstv}}};
4'hc: out_wa[3:0] <= 4'h2 ^ {1'b0,{3{dstv}}};
4'hd: out_wa[3:0] <= 4'hc ^ {1'b0,{3{dstv}}};
4'he: out_wa[3:0] <= 4'h5 ^ {1'b0,{3{dstv}}};
4'hf: out_wa[3:0] <= 4'hb ^ {1'b0,{3{dstv}}};
endcase
// sub16 <= ~out_cntr[3] & out_cntr[0];
sub16 <= ~out_cntr[3] & ~out_cntr[0] & out_run;
inc16 <= out_cntr[3:0] == 'he;
out_we <= dctv_out_we[1];
start64 <= out_cntr[6:0] == 'h0d;
end
always @ (posedge clk) begin
//X2-X7-X3-X4-X5-X6-X0-X1-*-X3-X5-X4-*-X1-X7-*
case (dcth_phin[3:0])
4'h0: x_ra0 <= 2;
4'h1: x_ra0 <= 7;
4'h2: x_ra0 <= 3;
4'h3: x_ra0 <= 4;
4'h4: x_ra0 <= 5;
4'h5: x_ra0 <= 6;
4'h6: x_ra0 <= 0;
4'h7: x_ra0 <= 1;
4'h8: x_ra0 <= 'bx;
4'h9: x_ra0 <= 3;
4'ha: x_ra0 <= 5;
4'hb: x_ra0 <= 4;
4'hc: x_ra0 <= 'bx;
4'hd: x_ra0 <= 6;
4'he: x_ra0 <= 7;
4'hf: x_ra0 <= 'bx;
endcase
case (dcth_phin[3:0])
4'h0: x_ra1 <= 1;
4'h1: x_ra1 <= 'bx;
4'h2: x_ra1 <= 3;
4'h3: x_ra1 <= 5;
4'h4: x_ra1 <= 4;
4'h5: x_ra1 <= 'bx;
4'h6: x_ra1 <= 6;
4'h7: x_ra1 <= 7;
4'h8: x_ra1 <= 'bx;
4'h9: x_ra1 <= 2;
4'ha: x_ra1 <= 7;
4'hb: x_ra1 <= 3;
4'hc: x_ra1 <= 4;
4'hd: x_ra1 <= 5;
4'he: x_ra1 <= 6;
4'hf: x_ra1 <= 0;
endcase
end
always @ (posedge clk) begin
//X2-X7-X3-X4-X5-X6-X0-X1-*-X3-X5-X4-*-X1-X7-*
case (dctv_phin[3:0])
4'h0: t_ra0 <= 2;
4'h1: t_ra0 <= 7;
4'h2: t_ra0 <= 3;
4'h3: t_ra0 <= 4;
4'h4: t_ra0 <= 5;
4'h5: t_ra0 <= 6;
4'h6: t_ra0 <= 0;
4'h7: t_ra0 <= 1;
4'h8: t_ra0 <= 'bx;
4'h9: t_ra0 <= 3;
4'ha: t_ra0 <= 5;
4'hb: t_ra0 <= 4;
4'hc: t_ra0 <= 'bx;
4'hd: t_ra0 <= 6;
4'he: t_ra0 <= 7;
4'hf: t_ra0 <= 'bx;
endcase
case (dctv_phin[3:0])
4'h0: t_ra1 <= 1;
4'h1: t_ra1 <= 'bx;
4'h2: t_ra1 <= 3;
4'h3: t_ra1 <= 5;
4'h4: t_ra1 <= 4;
4'h5: t_ra1 <= 'bx;
4'h6: t_ra1 <= 6;
4'h7: t_ra1 <= 7;
4'h8: t_ra1 <= 'bx;
4'h9: t_ra1 <= 2;
4'ha: t_ra1 <= 7;
4'hb: t_ra1 <= 3;
4'hc: t_ra1 <= 4;
4'hd: t_ra1 <= 5;
4'he: t_ra1 <= 6;
4'hf: t_ra1 <= 0;
endcase
end
dtt_iv8_1d #(
.WIDTH (INPUT_WIDTH),
.OUT_WIDTH (TRANSPOSE_WIDTH),
.OUT_RSHIFT (OUT_RSHIFT1),
.B_WIDTH (DSP_B_WIDTH),
.A_WIDTH (DSP_A_WIDTH),
.P_WIDTH (DSP_P_WIDTH),
.COSINE_SHIFT (COSINE_SHIFT),
.COS_01_32 (COS_01_32),
.COS_03_32 (COS_03_32),
.COS_04_32 (COS_04_32),
.COS_05_32 (COS_05_32),
.COS_07_32 (COS_07_32),
.COS_08_32 (COS_08_32),
.COS_09_32 (COS_09_32),
.COS_11_32 (COS_11_32),
.COS_12_32 (COS_12_32),
.COS_13_32 (COS_13_32),
.COS_15_32 (COS_15_32)
) dct_iv8_1d_pass1_0_i (
.clk (clk), // input
.rst (rst), // input
.en (dcth_en0), // input
.dst_in (mode_h[1]) , // 0 - dct, 1 - dst. @ start/restart
.d_in (dcth_xin0), // input[23:0]
.start (dcth_start_0_r), // input
.dout (dcth_dout0), // output[23:0]
.pre2_start_out (), // pre2_start_outh[0]), // output reg
.en_out (dcth_en_out0), // output reg
.dst_out (pre2_dsth[0]), // output valid with en_out
.y_index (dcth_yindex0) // output[2:0] reg
);
dtt_iv8_1d #(
.WIDTH (INPUT_WIDTH),
.OUT_WIDTH (TRANSPOSE_WIDTH),
.OUT_RSHIFT (OUT_RSHIFT1),
.B_WIDTH (DSP_B_WIDTH),
.A_WIDTH (DSP_A_WIDTH),
.P_WIDTH (DSP_P_WIDTH),
.COSINE_SHIFT (COSINE_SHIFT),
.COS_01_32 (COS_01_32),
.COS_03_32 (COS_03_32),
.COS_04_32 (COS_04_32),
.COS_05_32 (COS_05_32),
.COS_07_32 (COS_07_32),
.COS_08_32 (COS_08_32),
.COS_09_32 (COS_09_32),
.COS_11_32 (COS_11_32),
.COS_12_32 (COS_12_32),
.COS_13_32 (COS_13_32),
.COS_15_32 (COS_15_32)
) dct_iv8_1d_pass1_1_i (
.clk (clk), // input
.rst (rst), // input
.en (dcth_en1), // input
.dst_in (mode_h[1]), // 0 - dct, 1 - dst. @ start/restart
.d_in (dcth_xin1), // input[23:0]
.start (dcth_start_1_r), // input
.dout (dcth_dout1), // output[23:0]
.pre2_start_out (), // pre2_start_outh[1]), // output reg
.en_out (dcth_en_out1), // output reg
.dst_out (pre2_dsth[1]), // output valid with en_out
.y_index (dcth_yindex1) // output[2:0] reg
);
//dcth_phin_run && (dcth_phin [6:0] ==9)
dtt_iv8_1d #(
.WIDTH (TRANSPOSE_WIDTH),
.OUT_WIDTH (OUT_WIDTH),
.OUT_RSHIFT (OUT_RSHIFT2),
.B_WIDTH (DSP_B_WIDTH),
.A_WIDTH (DSP_A_WIDTH),
.P_WIDTH (DSP_P_WIDTH),
.COSINE_SHIFT (COSINE_SHIFT),
.COS_01_32 (COS_01_32),
.COS_03_32 (COS_03_32),
.COS_04_32 (COS_04_32),
.COS_05_32 (COS_05_32),
.COS_07_32 (COS_07_32),
.COS_08_32 (COS_08_32),
.COS_09_32 (COS_09_32),
.COS_11_32 (COS_11_32),
.COS_12_32 (COS_12_32),
.COS_13_32 (COS_13_32),
.COS_15_32 (COS_15_32)
) dct_iv8_1d_pass2_0_i (
.clk (clk), // input
.rst (rst), // input
.en (dctv_en0), // input
.dst_in (mode_v[0]) , // 0 - dct, 1 - dst. @ start/restart
.d_in (dctv_xin0), // input[23:0]
.start (dctv_start_0_r), // input
.dout (dctv_dout0), // output[23:0]
.pre2_start_out (), // pre2_start_outv[0]), // output reg
.en_out (dctv_en_out0), // output reg
.dst_out (pre2_dstv[0]), // output valid with en_out
.y_index (dctv_yindex0) // output[2:0] reg
);
dtt_iv8_1d #(
.WIDTH (TRANSPOSE_WIDTH),
.OUT_WIDTH (OUT_WIDTH),
.OUT_RSHIFT (OUT_RSHIFT2),
.B_WIDTH (DSP_B_WIDTH),
.A_WIDTH (DSP_A_WIDTH),
.P_WIDTH (DSP_P_WIDTH),
.COSINE_SHIFT (COSINE_SHIFT),
.COS_01_32 (COS_01_32),
.COS_03_32 (COS_03_32),
.COS_04_32 (COS_04_32),
.COS_05_32 (COS_05_32),
.COS_07_32 (COS_07_32),
.COS_08_32 (COS_08_32),
.COS_09_32 (COS_09_32),
.COS_11_32 (COS_11_32),
.COS_12_32 (COS_12_32),
.COS_13_32 (COS_13_32),
.COS_15_32 (COS_15_32)
) dct_iv8_1d_pass2_1_i (
.clk (clk), // input
.rst (rst), // input
.en (dctv_en1), // input
.dst_in (mode_v[0]) , // 0 - dct, 1 - dst. @ start/restart
.d_in (dctv_xin1), // input[23:0]
.start (dctv_start_1_r), // input
.dout (dctv_dout1), // output[23:0]
.pre2_start_out (), // pre2_start_outv[1]), // output reg
.en_out (dctv_en_out1), // output reg
.dst_out (pre2_dstv[1]), // output valid with en_out
.y_index (dctv_yindex1) // output[2:0] reg
);
endmodule
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment