Commit cb389afb authored by Andrey Filippov's avatar Andrey Filippov

Compared both accumulator and DSP implementations

parent c4d0f3bc
...@@ -81,7 +81,7 @@ module mclt16x16_bayer#( ...@@ -81,7 +81,7 @@ module mclt16x16_bayer#(
); );
// When defined, use 2 DSP multipleierts // When defined, use 2 DSP multipleierts
`define DSP_ACCUM_FOLD 1 // `define DSP_ACCUM_FOLD 1
localparam DTT_OUT_DELAY = 128; // 191; // start output to sin/cos rotator, with checker - 2*64 +/=? localparam DTT_OUT_DELAY = 128; // 191; // start output to sin/cos rotator, with checker - 2*64 +/=?
localparam DTT_IN_DELAY = 62; // 69; // wa -ra min = 1 localparam DTT_IN_DELAY = 62; // 69; // wa -ra min = 1
......
...@@ -37,14 +37,14 @@ ...@@ -37,14 +37,14 @@
* with at least one of the Free Software programs. * with at least one of the Free Software programs.
*/ */
`timescale 1ns/1ps `timescale 1ns/1ps
`define DSP_ACCUM_FOLD 1
module mclt_baeyer_fold_accum # ( module mclt_baeyer_fold_accum # (
parameter PIXEL_WIDTH = 16, // input pixel width (unsigned) parameter PIXEL_WIDTH = 16, // input pixel width (unsigned)
parameter WND_WIDTH = 18, // input pixel width (unsigned) parameter WND_WIDTH = 18, // input pixel width (unsigned)
parameter DTT_IN_WIDTH = 25, // bits in DTT input parameter DTT_IN_WIDTH = 25, // bits in DTT input
parameter DSP_B_WIDTH = 18, // signed, output from sin/cos ROM parameter DSP_B_WIDTH = 18, // signed, output from sin/cos ROM // SuppressThisWarning VEditor - not always used
parameter DSP_A_WIDTH = 25, parameter DSP_A_WIDTH = 25, // SuppressThisWarning VEditor - not always used
parameter DSP_P_WIDTH = 48 parameter DSP_P_WIDTH = 48 // SuppressThisWarning VEditor - not always used
)( )(
input clk, input clk,
...@@ -56,17 +56,24 @@ module mclt_baeyer_fold_accum # ( ...@@ -56,17 +56,24 @@ module mclt_baeyer_fold_accum # (
input signed [WND_WIDTH-1:0] window, input signed [WND_WIDTH-1:0] window,
input var_pre2_first, input var_pre2_first,
output signed [DTT_IN_WIDTH-1:0] dtt_in, output signed [DTT_IN_WIDTH-1:0] dtt_in,
output reg dtt_in_dv output dtt_in_dv
); );
reg [PIXEL_WIDTH-1:0] pix_dr; // only for mpy to match dsp
reg var_pre_first; reg var_pre_first;
reg var_first; reg var_first;
reg var_last;
reg [6:0] phases;
`ifdef DSP_ACCUM_FOLD
reg dtt_in_dv_dsp_r;
reg signed [DTT_IN_WIDTH-1:0] dtt_in_dsp;
`else
wire [ 1:0] pix_sgn_d;
reg [PIXEL_WIDTH-1:0] pix_dr; // only for mpy to match dsp
reg signed [WND_WIDTH-1:0] window_r; reg signed [WND_WIDTH-1:0] window_r;
reg signed [PIXEL_WIDTH-1:0] pix_d_r; // registered pixel data (to be absorbed by MPY) reg signed [PIXEL_WIDTH-1:0] pix_d_r; // registered pixel data (to be absorbed by MPY)
wire [ 1:0] pix_sgn_d;
reg [ 1:0] pix_sgn_r; reg [ 1:0] pix_sgn_r;
reg var_last;
reg signed [PIXEL_WIDTH + WND_WIDTH - 1:0] pix_wnd_r; // MSB not used: positive[PIXEL_WIDTH]*positive[WND_WIDTH]->positive[PIXEL_WIDTH+WND_WIDTH-1] reg signed [PIXEL_WIDTH + WND_WIDTH - 1:0] pix_wnd_r; // MSB not used: positive[PIXEL_WIDTH]*positive[WND_WIDTH]->positive[PIXEL_WIDTH+WND_WIDTH-1]
reg signed [DTT_IN_WIDTH-1:0] pix_wnd_r2; // pixels (positive) multiplied by window(positive), two MSBs == 2'b0 to prevent overflow reg signed [DTT_IN_WIDTH-1:0] pix_wnd_r2; // pixels (positive) multiplied by window(positive), two MSBs == 2'b0 to prevent overflow
...@@ -78,70 +85,46 @@ module mclt_baeyer_fold_accum # ( ...@@ -78,70 +85,46 @@ module mclt_baeyer_fold_accum # (
+ pix_wnd_r[PIXEL_WIDTH + WND_WIDTH -DTT_IN_WIDTH -1] + pix_wnd_r[PIXEL_WIDTH + WND_WIDTH -DTT_IN_WIDTH -1]
`endif `endif
; ;
reg [6:0] phases;
reg signed [DTT_IN_WIDTH-1:0] data_cc_r; reg signed [DTT_IN_WIDTH-1:0] data_cc_r;
reg signed [DTT_IN_WIDTH-1:0] data_sc_r; reg signed [DTT_IN_WIDTH-1:0] data_sc_r;
reg signed [DTT_IN_WIDTH-1:0] data_sc_r2; // data_sc_r delayed by 1 cycle reg signed [DTT_IN_WIDTH-1:0] data_sc_r2; // data_sc_r delayed by 1 cycle
reg mode_mux;
reg dtt_in_dv_r;
reg signed [DTT_IN_WIDTH-1:0] data_dtt_in; // multiplexed DTT input data reg signed [DTT_IN_WIDTH-1:0] data_dtt_in; // multiplexed DTT input data
`endif
reg mode_mux;
assign dtt_in = data_dtt_in;
`ifdef DSP_ACCUM_FOLD
assign dtt_in = dtt_in_dsp;
assign dtt_in_dv = dtt_in_dv_dsp_r;
`else
assign dtt_in = data_dtt_in;
assign dtt_in_dv = dtt_in_dv_r;
`endif
always @ (posedge clk) begin always @ (posedge clk) begin
phases <= {phases[5:0], pre_phase}; phases <= {phases[5:0], pre_phase};
if (rst) dtt_in_dv <= 0;
else dtt_in_dv <= phases[6];
pix_dr <= pix_d;
if (phases[1]) begin
pix_d_r <= pix_dr;
window_r <= window;
end
if (phases[2]) pix_wnd_r <= pix_d_r * window_r; // 1 MSB is extra
if (phases[2]) begin if (phases[2]) begin
var_pre_first <= var_pre2_first; var_pre_first <= var_pre2_first;
end end
if (phases[3]) begin if (phases[3]) begin
pix_wnd_r2 <= {pix_wnd_r2_w[DTT_IN_WIDTH-2],pix_wnd_r2_w};
pix_sgn_r <= pix_sgn_d;
var_first <= var_pre_first; var_first <= var_pre_first;
end end
var_last <= var_first & phases[4]; var_last <= var_first & phases[4];
if (phases[4]) begin
data_cc_r <= (var_first ? {DTT_IN_WIDTH{1'b0}} : data_cc_r) + (pix_sgn_r[0]?(-pix_wnd_r2):pix_wnd_r2) ;
data_sc_r <= (var_first ? {DTT_IN_WIDTH{1'b0}} : data_sc_r) + (pix_sgn_r[1]?(-pix_wnd_r2):pix_wnd_r2) ;
data_sc_r2 <= data_sc_r;
end end
if (phases[5]) data_sc_r2 <= data_sc_r;
if (var_last) mode_mux <= 0;
else if (phases[6]) mode_mux <= mode_mux + 1;
if (phases[6]) case (mode_mux) `ifdef DSP_ACCUM_FOLD
1'b0: data_dtt_in <= data_cc_r; always @ (posedge clk) begin
1'b1: data_dtt_in <= data_sc_r2; if (rst) dtt_in_dv_dsp_r <= 0;
endcase else dtt_in_dv_dsp_r <= phases[5];
end end
dly_var #(
.WIDTH(2),
.DLY_WIDTH(4)
) dly_pix_sgn_i (
.clk (clk), // input
.rst (rst), // input
.dly (4'h1), // input[3:0]
.din (pix_sgn), // input[0:0]
.dout (pix_sgn_d) // output[0:0]
);
// wire sub_a1=0,sub_a2=0;
wire neg_m1, neg_m2; wire neg_m1, neg_m2;
// wire accum1= !var_pre_first;
// wire accum2= !var_first;
wire accum1= !var_pre2_first; wire accum1= !var_pre2_first;
wire accum2= !var_pre_first; wire accum2= !var_pre_first;
wire [DSP_P_WIDTH-1:0] pout1; wire [DSP_P_WIDTH-1:0] pout1;
...@@ -156,7 +139,6 @@ module mclt_baeyer_fold_accum # ( ...@@ -156,7 +139,6 @@ module mclt_baeyer_fold_accum # (
`endif `endif
; ;
reg signed [DTT_IN_WIDTH-1:0] dtt_in_dsp;
// wire signed [DTT_IN_WIDTH-2:0] pix_wnd_r2_w = pix_wnd_r[PIXEL_WIDTH + WND_WIDTH - 2 -: DTT_IN_WIDTH - 1] // wire signed [DTT_IN_WIDTH-2:0] pix_wnd_r2_w = pix_wnd_r[PIXEL_WIDTH + WND_WIDTH - 2 -: DTT_IN_WIDTH - 1]
always @ (posedge clk) begin always @ (posedge clk) begin
...@@ -225,7 +207,6 @@ module mclt_baeyer_fold_accum # ( ...@@ -225,7 +207,6 @@ module mclt_baeyer_fold_accum # (
) dly_neg_m1_i ( ) dly_neg_m1_i (
.clk (clk), // input .clk (clk), // input
.rst (rst), // input .rst (rst), // input
// .dly (4'h1), // input[3:0]
.dly (4'h0), // input[3:0] .dly (4'h0), // input[3:0]
.din (pix_sgn[0]), // input[0:0] .din (pix_sgn[0]), // input[0:0]
.dout (neg_m1) // output[0:0] .dout (neg_m1) // output[0:0]
...@@ -236,11 +217,58 @@ module mclt_baeyer_fold_accum # ( ...@@ -236,11 +217,58 @@ module mclt_baeyer_fold_accum # (
) dly_neg_m2_i ( ) dly_neg_m2_i (
.clk (clk), // input .clk (clk), // input
.rst (rst), // input .rst (rst), // input
// .dly (4'h2), // input[3:0]
.dly (4'h1), // input[3:0] .dly (4'h1), // input[3:0]
.din (pix_sgn[1]), // input[0:0] .din (pix_sgn[1]), // input[0:0]
.dout (neg_m2) // output[0:0] .dout (neg_m2) // output[0:0]
); );
`else
always @ (posedge clk) begin
if (rst) dtt_in_dv_r <= 0;
else dtt_in_dv_r <= phases[6];
pix_dr <= pix_d;
if (phases[1]) begin
pix_d_r <= pix_dr;
window_r <= window;
end
if (phases[2]) pix_wnd_r <= pix_d_r * window_r; // 1 MSB is extra
if (phases[3]) begin
pix_wnd_r2 <= {pix_wnd_r2_w[DTT_IN_WIDTH-2],pix_wnd_r2_w};
pix_sgn_r <= pix_sgn_d;
end
if (phases[4]) begin
data_cc_r <= (var_first ? {DTT_IN_WIDTH{1'b0}} : data_cc_r) + (pix_sgn_r[0]?(-pix_wnd_r2):pix_wnd_r2) ;
data_sc_r <= (var_first ? {DTT_IN_WIDTH{1'b0}} : data_sc_r) + (pix_sgn_r[1]?(-pix_wnd_r2):pix_wnd_r2) ;
data_sc_r2 <= data_sc_r;
end
if (phases[5]) data_sc_r2 <= data_sc_r;
if (var_last) mode_mux <= 0;
else if (phases[6]) mode_mux <= mode_mux + 1;
if (phases[6]) case (mode_mux)
1'b0: data_dtt_in <= data_cc_r;
1'b1: data_dtt_in <= data_sc_r2;
endcase
end
dly_var #(
.WIDTH(2),
.DLY_WIDTH(4)
) dly_pix_sgn_i (
.clk (clk), // input
.rst (rst), // input
.dly (4'h1), // input[3:0]
.din (pix_sgn), // input[0:0]
.dout (pix_sgn_d) // output[0:0]
);
`endif
endmodule endmodule
[*] [*]
[*] GTKWave Analyzer v3.3.78 (w)1999-2016 BSI [*] GTKWave Analyzer v3.3.78 (w)1999-2016 BSI
[*] Sun Dec 24 08:31:35 2017 [*] Sun Dec 24 17:48:46 2017
[*] [*]
[dumpfile] "/home/eyesis/nc393/elphel393/fpga-elphel/x393_branch_dct/simulation/mclt_test_02-20171224011207780.fst" [dumpfile] "/home/eyesis/nc393/elphel393/fpga-elphel/x393_branch_dct/simulation/mclt_test_02-20171224013140770.fst"
[dumpfile_mtime] "Sun Dec 24 08:12:14 2017" [dumpfile_mtime] "Sun Dec 24 08:31:46 2017"
[dumpfile_size] 1516519 [dumpfile_size] 1515496
[savefile] "/home/eyesis/nc393/elphel393/fpga-elphel/x393_branch_dct/mclt_test_02.sav" [savefile] "/home/eyesis/nc393/elphel393/fpga-elphel/x393_branch_dct/mclt_test_02.sav"
[timestart] 3589100 [timestart] 3552600
[size] 1814 1171 [size] 1201 767
[pos] 0 0 [pos] -1740 37
*-14.476400 3690400 355000 2885000 325000 7455000 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 *-14.476400 3828100 355000 2885000 325000 7455000 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
[treeopen] mclt_test_02. [treeopen] mclt_test_02.
[treeopen] mclt_test_02.mclt16x16_i.dtt_iv_8x8_ad_i.dct_iv8_1d_pass1_0_i. [treeopen] mclt_test_02.mclt16x16_i.dtt_iv_8x8_ad_i.dct_iv8_1d_pass1_0_i.
[treeopen] mclt_test_02.mclt16x16_i.dtt_iv_8x8_ad_i.dct_iv8_1d_pass2_0_i. [treeopen] mclt_test_02.mclt16x16_i.dtt_iv_8x8_ad_i.dct_iv8_1d_pass2_0_i.
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
[sst_width] 280 [sst_width] 280
[signals_width] 319 [signals_width] 319
[sst_expanded] 1 [sst_expanded] 1
[sst_vpaned_height] 343 [sst_vpaned_height] 203
@c00200 @c00200
-top -top
@28 @28
...@@ -1311,8 +1311,6 @@ mclt_test_02.mclt_bayer_fold_i.mclt_bayer_fold_i.wnd_a_w[7:0] ...@@ -1311,8 +1311,6 @@ mclt_test_02.mclt_bayer_fold_i.mclt_bayer_fold_i.wnd_a_w[7:0]
-fold -fold
@1000200 @1000200
-top -top
@29
mclt_test_02.mclt_bayer_fold_i.dtt_start_out
@200 @200
- -
@800200 @800200
...@@ -1644,8 +1642,6 @@ mclt_test_02.mclt_bayer_fold_i.dtt_dly_cntr[7:0] ...@@ -1644,8 +1642,6 @@ mclt_test_02.mclt_bayer_fold_i.dtt_dly_cntr[7:0]
mclt_test_02.mclt_bayer_fold_i.dtt_out_ram_wa[8:0] mclt_test_02.mclt_bayer_fold_i.dtt_out_ram_wa[8:0]
mclt_test_02.mclt_bayer_fold_i.dtt_out_ram_wah[4:0] mclt_test_02.mclt_bayer_fold_i.dtt_out_ram_wah[4:0]
mclt_test_02.mclt_bayer_fold_i.dtt_out_wa16[3:0] mclt_test_02.mclt_bayer_fold_i.dtt_out_wa16[3:0]
@28
mclt_test_02.mclt_bayer_fold_i.dtt_start_out
@c00022 @c00022
mclt_test_02.mclt_bayer_fold_i.dtt_rd_cntr_pre[8:0] mclt_test_02.mclt_bayer_fold_i.dtt_rd_cntr_pre[8:0]
@28 @28
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment