Commit 1db76b02 authored by Andrey Filippov's avatar Andrey Filippov

Comparing mclt Bayer with Java results, fixing bugs

parent 84e3107c
...@@ -41,7 +41,7 @@ ...@@ -41,7 +41,7 @@
module mclt16x16_bayer#( module mclt16x16_bayer#(
parameter SHIFT_WIDTH = 7, // bits in shift (7 bits - fractional) parameter SHIFT_WIDTH = 7, // bits in shift (7 bits - fractional)
parameter PIX_ADDR_WIDTH = 9, // number of pixel address width parameter PIX_ADDR_WIDTH = 9, // number of pixel address width
// parameter EXT_PIX_LATENCY = 2, // external pixel buffer a->d latency parameter EXT_PIX_LATENCY = 2, // external pixel buffer a->d latency (may increase to 4 for gamma)
parameter COORD_WIDTH = 10, // bits in full coordinate 10 for 18K RAM parameter COORD_WIDTH = 10, // bits in full coordinate 10 for 18K RAM
parameter PIXEL_WIDTH = 16, // input pixel width (unsigned) parameter PIXEL_WIDTH = 16, // input pixel width (unsigned)
parameter WND_WIDTH = 18, // input pixel width (unsigned) parameter WND_WIDTH = 18, // input pixel width (unsigned)
...@@ -104,12 +104,12 @@ module mclt16x16_bayer#( ...@@ -104,12 +104,12 @@ module mclt16x16_bayer#(
reg inv_checker_r3; reg inv_checker_r3;
reg inv_checker_r4; reg inv_checker_r4;
wire [1:0] signs; //!< bit 0: sign to add to dtt-cc input, bit 1: sign to add to dtt-cs input wire [1:0] signs; //!< bit 0: sign to add to dtt-cc input, bit 1: sign to add to dtt-cs input
wire [6:0] phases; //!< other signals wire [6:0] phases; //!< other signals
wire signed [WND_WIDTH-1:0] window_w; wire signed [WND_WIDTH-1:0] window_w;
wire signed [DTT_IN_WIDTH-1:0] data_dtt_in; // multiplexed DTT input data wire signed [DTT_IN_WIDTH-1:0] data_dtt_in; // multiplexed DTT input data
reg [6:0] dtt_in_cntr; // reg [6:0] dtt_in_cntr; //
...@@ -175,7 +175,7 @@ module mclt16x16_bayer#( ...@@ -175,7 +175,7 @@ module mclt16x16_bayer#(
end end
// if (!phases[14]) dtt_in_cntr <= 0; // if (!phases[14]) dtt_in_cntr <= 0;
if (!dtt_we) dtt_in_cntr <= 0; if (!dtt_we) dtt_in_cntr <= 0;
else dtt_in_cntr <= dtt_in_cntr + 1; else dtt_in_cntr <= dtt_in_cntr + 1;
start_dtt <= dtt_in_cntr == DTT_IN_DELAY; start_dtt <= dtt_in_cntr == DTT_IN_DELAY;
...@@ -199,23 +199,20 @@ module mclt16x16_bayer#( ...@@ -199,23 +199,20 @@ module mclt16x16_bayer#(
end end
`ifdef DSP_ACCUM_FOLD
localparam ADDR_DLYL = 4 - EXT_PIX_LATENCY; // 4'h2; // 3 for mpy, 2 - for dsp
`else
localparam ADDR_DLYL = 5 - EXT_PIX_LATENCY; // 4'h3; // 3 for mpy, 2 - for dsp
`endif
mclt_bayer_fold #( mclt_bayer_fold #(
.SHIFT_WIDTH (SHIFT_WIDTH), .SHIFT_WIDTH (SHIFT_WIDTH),
.PIX_ADDR_WIDTH (PIX_ADDR_WIDTH), .PIX_ADDR_WIDTH (PIX_ADDR_WIDTH),
.ADDR_DLY (4'h1), // 2 for mpy, 1 - for dsp .ADDR_DLY (ADDR_DLYL), // 3 for mpy, 2 - for dsp
.COORD_WIDTH (COORD_WIDTH), .COORD_WIDTH (COORD_WIDTH),
// .PIXEL_WIDTH (PIXEL_WIDTH),
.WND_WIDTH (WND_WIDTH) .WND_WIDTH (WND_WIDTH)
// .OUT_WIDTH (OUT_WIDTH),
// .DTT_IN_WIDTH (DTT_IN_WIDTH),
// .TRANSPOSE_WIDTH (TRANSPOSE_WIDTH),
// .OUT_RSHIFT (OUT_RSHIFT),
// .OUT_RSHIFT2 (OUT_RSHIFT2),
// .DSP_B_WIDTH (DSP_B_WIDTH),
// .DSP_A_WIDTH (DSP_A_WIDTH),
// .DSP_P_WIDTH (DSP_P_WIDTH),
// .DEAD_CYCLES (DEAD_CYCLES)
) mclt_bayer_fold_i ( ) mclt_bayer_fold_i (
.clk (clk), // input .clk (clk), // input
.rst (rst), // input .rst (rst), // input
...@@ -233,7 +230,6 @@ module mclt16x16_bayer#( ...@@ -233,7 +230,6 @@ module mclt16x16_bayer#(
.signs (signs), // output[1:0] .signs (signs), // output[1:0]
.phases (phases), // output[7:0] .phases (phases), // output[7:0]
.var_pre2_first(var_pre2_first), // output .var_pre2_first(var_pre2_first), // output
// .var_first (), // var_first), // output reg
.pre_last_in (pre_last_in_w)// output reg .pre_last_in (pre_last_in_w)// output reg
); );
...@@ -268,9 +264,11 @@ module mclt16x16_bayer#( ...@@ -268,9 +264,11 @@ module mclt16x16_bayer#(
wire dtt_inc16; wire dtt_inc16;
reg [4:0] dtt_out_ram_cntr; reg [4:0] dtt_out_ram_cntr;
reg [4:0] dtt_out_ram_wah; reg [4:0] dtt_out_ram_wah;
reg [1:0] dtt_out_ram_wpage; // one of 4 pages (128 samples long) being written to reg [1:0] dtt_out_ram_wpage; // one of 4 pages (128 samples long) being written to
reg [1:0] dtt_out_ram_wpage2; // later by 1 DTT
wire dtt_start_fill; // some data available in DTT output buffer, OK to start consecutive readout wire dtt_start_fill; // some data available in DTT output buffer, OK to start consecutive readout
reg dtt_start_first_fill; reg dtt_start_first_fill;
reg dtt_start_second_fill;
reg [1:0] dtt_start_out; // start read out to sin/cos rotator reg [1:0] dtt_start_out; // start read out to sin/cos rotator
...@@ -305,7 +303,11 @@ module mclt16x16_bayer#( ...@@ -305,7 +303,11 @@ module mclt16x16_bayer#(
dtt_start_first_fill <= dtt_start_fill & dtt_first_quad_out; dtt_start_first_fill <= dtt_start_fill & dtt_first_quad_out;
if (dtt_start_first_fill) dtt_out_ram_wpage <= dtt_out_ram_wah[4:3]; dtt_start_second_fill<= dtt_start_fill & ~dtt_first_quad_out;
if (dtt_start_first_fill) dtt_out_ram_wpage <= dtt_out_ram_wah[4:3];
if (dtt_start_second_fill) dtt_out_ram_wpage2 <= dtt_out_ram_wpage;
if (rst) dtt_dly_cntr <= 0; if (rst) dtt_dly_cntr <= 0;
else if (dtt_start_first_fill) dtt_dly_cntr <= DTT_OUT_DELAY; else if (dtt_start_first_fill) dtt_dly_cntr <= DTT_OUT_DELAY;
...@@ -320,21 +322,10 @@ module mclt16x16_bayer#( ...@@ -320,21 +322,10 @@ module mclt16x16_bayer#(
if (rst) dtt_rd_regen_dv[3:1] <= 0; if (rst) dtt_rd_regen_dv[3:1] <= 0;
else dtt_rd_regen_dv[3:1] <= dtt_rd_regen_dv[2:0]; else dtt_rd_regen_dv[3:1] <= dtt_rd_regen_dv[2:0];
if (dtt_start_out[0]) dtt_rd_cntr_pre <= {dtt_out_ram_wpage, 7'b0}; //copy page number // if (dtt_start_out[0]) dtt_rd_cntr_pre <= {dtt_out_ram_wpage, 7'b0}; //copy page number
if (dtt_start_out[0]) dtt_rd_cntr_pre <= {dtt_out_ram_wpage2, 7'b0}; //copy page number
else if (dtt_rd_regen_dv[0]) dtt_rd_cntr_pre <= dtt_rd_cntr_pre + 1; else if (dtt_rd_regen_dv[0]) dtt_rd_cntr_pre <= dtt_rd_cntr_pre + 1;
/*
dtt_rd_ra0 <= {dtt_rd_cntr_pre[8:7],
dtt_rd_cntr_pre[6] ^ dtt_rd_cntr_pre[5],
dtt_rd_cntr_pre[5]? (~dtt_rd_cntr_pre[4:0]) : dtt_rd_cntr_pre[4:0],
dtt_rd_cntr_pre[5]};
dtt_rd_ra1 <= {dtt_rd_cntr_pre[8:7],
dtt_rd_cntr_pre[6] ^ dtt_rd_cntr_pre[5],
dtt_rd_cntr_pre[5]? (~dtt_rd_cntr_pre[4:0]) : dtt_rd_cntr_pre[4:0],
~dtt_rd_cntr_pre[5]};
*/
dtt_rd_ra0 <= {dtt_rd_cntr_pre[8:7], dtt_rd_ra0 <= {dtt_rd_cntr_pre[8:7],
dtt_rd_cntr_pre[0] ^ dtt_rd_cntr_pre[1], dtt_rd_cntr_pre[0] ^ dtt_rd_cntr_pre[1],
dtt_rd_cntr_pre[0]? (~dtt_rd_cntr_pre[6:2]) : dtt_rd_cntr_pre[6:2], dtt_rd_cntr_pre[0]? (~dtt_rd_cntr_pre[6:2]) : dtt_rd_cntr_pre[6:2],
...@@ -362,8 +353,8 @@ module mclt16x16_bayer#( ...@@ -362,8 +353,8 @@ module mclt16x16_bayer#(
.pix_sgn (signs), // input[1:0] .pix_sgn (signs), // input[1:0]
.window (window_w), // input[17:0] signed .window (window_w), // input[17:0] signed
.var_pre2_first (var_pre2_first), // input .var_pre2_first (var_pre2_first), // input
.dtt_in (data_dtt_in), // output[24:0] signed .dtt_in (data_dtt_in), // output[24:0] signed
.dtt_in_dv (dtt_we) // output reg .dtt_in_dv (dtt_we) // output reg
); );
......
...@@ -37,7 +37,7 @@ ...@@ -37,7 +37,7 @@
* with at least one of the Free Software programs. * with at least one of the Free Software programs.
*/ */
`timescale 1ns/1ps `timescale 1ns/1ps
`define DSP_ACCUM_FOLD 1 //`define DSP_ACCUM_FOLD 1
module mclt_baeyer_fold_accum # ( module mclt_baeyer_fold_accum # (
parameter PIXEL_WIDTH = 16, // input pixel width (unsigned) parameter PIXEL_WIDTH = 16, // input pixel width (unsigned)
parameter WND_WIDTH = 18, // input pixel width (unsigned) parameter WND_WIDTH = 18, // input pixel width (unsigned)
...@@ -49,15 +49,13 @@ module mclt_baeyer_fold_accum # ( ...@@ -49,15 +49,13 @@ module mclt_baeyer_fold_accum # (
)( )(
input clk, input clk,
input rst, input rst,
// input [5:0] phases,
input pre_phase, input pre_phase,
input signed [PIXEL_WIDTH-1:0] pix_d, //!< pixel data input signed [PIXEL_WIDTH-1:0] pix_d, //!< pixel data (should be 1 cycle later for `undef DSP_ACCUM_FOLD
input [1:0] pix_sgn, //!< bit 0: sign to add to dtt-cc input, bit 1: sign to add to dtt-cs input input [1:0] pix_sgn, //!< bit 0: sign to add to dtt-cc input, bit 1: sign to add to dtt-cs input
input signed [WND_WIDTH-1:0] window, input signed [WND_WIDTH-1:0] window,
input var_pre2_first, input var_pre2_first,
output signed [DTT_IN_WIDTH-1:0] dtt_in, output signed [DTT_IN_WIDTH-1:0] dtt_in,
output dtt_in_dv output dtt_in_dv
); );
reg var_pre_first; reg var_pre_first;
reg var_first; reg var_first;
...@@ -65,43 +63,6 @@ module mclt_baeyer_fold_accum # ( ...@@ -65,43 +63,6 @@ module mclt_baeyer_fold_accum # (
reg [6:0] phases; reg [6:0] phases;
`ifdef DSP_ACCUM_FOLD
reg dtt_in_dv_dsp_r;
reg signed [DTT_IN_WIDTH-1:0] dtt_in_dsp;
`else
wire [ 1:0] pix_sgn_d;
reg [PIXEL_WIDTH-1:0] pix_dr; // only for mpy to match dsp
reg signed [WND_WIDTH-1:0] window_r;
reg signed [PIXEL_WIDTH-1:0] pix_d_r; // registered pixel data (to be absorbed by MPY)
reg [ 1:0] pix_sgn_r;
reg signed [PIXEL_WIDTH + WND_WIDTH - 1:0] pix_wnd_r; // MSB not used: positive[PIXEL_WIDTH]*positive[WND_WIDTH]->positive[PIXEL_WIDTH+WND_WIDTH-1]
reg signed [DTT_IN_WIDTH-1:0] pix_wnd_r2; // pixels (positive) multiplied by window(positive), two MSBs == 2'b0 to prevent overflow
// rounding
// wire signed [DTT_IN_WIDTH-3:0] pix_wnd_r2_w = pix_wnd_r[PIXEL_WIDTH + WND_WIDTH - 2 -: DTT_IN_WIDTH - 2]
wire signed [DTT_IN_WIDTH-2:0] pix_wnd_r2_w = pix_wnd_r[PIXEL_WIDTH + WND_WIDTH - 2 -: DTT_IN_WIDTH - 1]
`ifdef ROUND
// + pix_wnd_r[PIXEL_WIDTH + WND_WIDTH -DTT_IN_WIDTH]
+ pix_wnd_r[PIXEL_WIDTH + WND_WIDTH -DTT_IN_WIDTH -1]
`endif
;
reg signed [DTT_IN_WIDTH-1:0] data_cc_r;
reg signed [DTT_IN_WIDTH-1:0] data_sc_r;
reg signed [DTT_IN_WIDTH-1:0] data_sc_r2; // data_sc_r delayed by 1 cycle
reg mode_mux;
reg dtt_in_dv_r;
reg signed [DTT_IN_WIDTH-1:0] data_dtt_in; // multiplexed DTT input data
`endif
`ifdef DSP_ACCUM_FOLD
assign dtt_in = dtt_in_dsp;
assign dtt_in_dv = dtt_in_dv_dsp_r;
`else
assign dtt_in = data_dtt_in;
assign dtt_in_dv = dtt_in_dv_r;
`endif
always @ (posedge clk) begin always @ (posedge clk) begin
phases <= {phases[5:0], pre_phase}; phases <= {phases[5:0], pre_phase};
...@@ -119,6 +80,12 @@ module mclt_baeyer_fold_accum # ( ...@@ -119,6 +80,12 @@ module mclt_baeyer_fold_accum # (
`ifdef DSP_ACCUM_FOLD `ifdef DSP_ACCUM_FOLD
reg dtt_in_dv_dsp_r;
reg signed [DTT_IN_WIDTH-1:0] dtt_in_dsp;
assign dtt_in = dtt_in_dsp;
assign dtt_in_dv = dtt_in_dv_dsp_r;
always @ (posedge clk) begin always @ (posedge clk) begin
if (rst) dtt_in_dv_dsp_r <= 0; if (rst) dtt_in_dv_dsp_r <= 0;
else dtt_in_dv_dsp_r <= phases[5]; else dtt_in_dv_dsp_r <= phases[5];
...@@ -223,13 +190,40 @@ module mclt_baeyer_fold_accum # ( ...@@ -223,13 +190,40 @@ module mclt_baeyer_fold_accum # (
); );
`else `else
wire [ 1:0] pix_sgn_d;
/// reg [PIXEL_WIDTH-1:0] pix_dr; // only for mpy to match dsp
reg signed [WND_WIDTH-1:0] window_r;
reg signed [PIXEL_WIDTH-1:0] pix_d_r; // registered pixel data (to be absorbed by MPY)
reg [ 1:0] pix_sgn_r;
reg signed [PIXEL_WIDTH + WND_WIDTH - 1:0] pix_wnd_r; // MSB not used: positive[PIXEL_WIDTH]*positive[WND_WIDTH]->positive[PIXEL_WIDTH+WND_WIDTH-1]
reg signed [DTT_IN_WIDTH-1:0] pix_wnd_r2; // pixels (positive) multiplied by window(positive), two MSBs == 2'b0 to prevent overflow
// rounding
// wire signed [DTT_IN_WIDTH-3:0] pix_wnd_r2_w = pix_wnd_r[PIXEL_WIDTH + WND_WIDTH - 2 -: DTT_IN_WIDTH - 2]
wire signed [DTT_IN_WIDTH-2:0] pix_wnd_r2_w = pix_wnd_r[PIXEL_WIDTH + WND_WIDTH - 2 -: DTT_IN_WIDTH - 1]
`ifdef ROUND
// + pix_wnd_r[PIXEL_WIDTH + WND_WIDTH -DTT_IN_WIDTH]
+ pix_wnd_r[PIXEL_WIDTH + WND_WIDTH -DTT_IN_WIDTH -1]
`endif
;
reg signed [DTT_IN_WIDTH-1:0] data_cc_r;
reg signed [DTT_IN_WIDTH-1:0] data_sc_r;
reg signed [DTT_IN_WIDTH-1:0] data_sc_r2; // data_sc_r delayed by 1 cycle
reg mode_mux;
reg dtt_in_dv_r;
reg signed [DTT_IN_WIDTH-1:0] data_dtt_in; // multiplexed DTT input data
assign dtt_in = data_dtt_in;
assign dtt_in_dv = dtt_in_dv_r;
always @ (posedge clk) begin always @ (posedge clk) begin
if (rst) dtt_in_dv_r <= 0; if (rst) dtt_in_dv_r <= 0;
else dtt_in_dv_r <= phases[6]; else dtt_in_dv_r <= phases[6];
pix_dr <= pix_d; /// pix_dr <= pix_d;
if (phases[1]) begin if (phases[1]) begin
pix_d_r <= pix_dr; /// pix_d_r <= pix_dr;
pix_d_r <= pix_d;
window_r <= window; window_r <= window;
end end
if (phases[2]) pix_wnd_r <= pix_d_r * window_r; // 1 MSB is extra if (phases[2]) pix_wnd_r <= pix_d_r * window_r; // 1 MSB is extra
......
...@@ -41,20 +41,9 @@ ...@@ -41,20 +41,9 @@
module mclt_bayer_fold#( module mclt_bayer_fold#(
parameter SHIFT_WIDTH = 7, // bits in shift (7 bits - fractional) parameter SHIFT_WIDTH = 7, // bits in shift (7 bits - fractional)
parameter PIX_ADDR_WIDTH = 9, // number of pixel address width parameter PIX_ADDR_WIDTH = 9, // number of pixel address width
// parameter EXT_PIX_LATENCY = 2, // external pixel buffer a->d latency parameter ADDR_DLY = 4'h3, // extra delay of pixel address to match window delay
parameter ADDR_DLY = 4'h2, // extra delay of pixel address to match window delay
parameter COORD_WIDTH = 10, // bits in full coordinate 10 for 18K RAM parameter COORD_WIDTH = 10, // bits in full coordinate 10 for 18K RAM
// parameter PIXEL_WIDTH = 16, // input pixel width (unsigned)
parameter WND_WIDTH = 18 // input pixel width (unsigned) parameter WND_WIDTH = 18 // input pixel width (unsigned)
// parameter OUT_WIDTH = 25, // bits in dtt output
// parameter DTT_IN_WIDTH = 25, // bits in DTT input
// parameter TRANSPOSE_WIDTH = 25, // width of the transpose memory (intermediate results)
// parameter OUT_RSHIFT = 2, // overall right shift of the result from input, aligned by MSB (>=3 will never cause saturation)
// parameter OUT_RSHIFT2 = 0, // overall right shift for the second (vertical) pass
// parameter DSP_B_WIDTH = 18, // signed, output from sin/cos ROM
// parameter DSP_A_WIDTH = 25,
// parameter DSP_P_WIDTH = 48,
// parameter DEAD_CYCLES = 14 // start next block immedaitely, or with longer pause
)( )(
input clk, //!< system clock, posedge input clk, //!< system clock, posedge
input rst, //!< sync reset input rst, //!< sync reset
...@@ -71,14 +60,12 @@ module mclt_bayer_fold#( ...@@ -71,14 +60,12 @@ module mclt_bayer_fold#(
output pix_page, //!< copy pixel page (should be externally combined with first color) output pix_page, //!< copy pixel page (should be externally combined with first color)
output signed [WND_WIDTH-1:0] window, //!< msb==0, always positive output signed [WND_WIDTH-1:0] window, //!< msb==0, always positive
output [1:0] signs, //!< bit 0: sign to add to dtt-cc input, bit 1: sign to add to dtt-cs input output [1:0] signs, //!< bit 0: sign to add to dtt-cc input, bit 1: sign to add to dtt-cs input
// output [14:0] phases, //!< other signals
output [6:0] phases, //!< other signals output [6:0] phases, //!< other signals
output var_pre2_first,//!< two ahead of first of 2 fold variants (4 for monochrome, 2 left for checker) output var_pre2_first,//!< two ahead of first of 2 fold variants (4 for monochrome, 2 left for checker)
// output reg var_first, //!< first of 2 fold variants (4 for monochrome, 2 left for checker)
output reg pre_last_in //!< pre last data in output reg pre_last_in //!< pre last data in
); );
reg [6:0] in_cntr; // input phase counter reg [6:0] in_cntr; // input phase counter
// reg [14:0] run_r; // run phase
reg [6:0] run_r; // run phase reg [6:0] run_r; // run phase
reg [1:0] tile_size_r; // 0: 16x16, 1 - 18x18, 2 - 20x20, 3 - 22x22 (max for 9-bit addr) reg [1:0] tile_size_r; // 0: 16x16, 1 - 18x18, 2 - 20x20, 3 - 22x22 (max for 9-bit addr)
...@@ -86,9 +73,11 @@ module mclt_bayer_fold#( ...@@ -86,9 +73,11 @@ module mclt_bayer_fold#(
reg [7:0] top_left_r0; // index of the 16x16 top left corner reg [7:0] top_left_r0; // index of the 16x16 top left corner
reg [7:0] top_left_r; // index of the 16x16 top left corner reg [7:0] top_left_r; // index of the 16x16 top left corner
reg [1:0] valid_rows_r0;// 3 for green, 1 or 2 for R/B - which of the even/odd checker rows contain pixels reg [1:0] valid_rows_r0;// 3 for green, 1 or 2 for R/B - which of the even/odd checker rows contain pixels
// reg [1:0] valid_rows_r ;// correct latency for window rom wire [ 9:0] fold_addr= {tile_size_r,inv_checker_r, (valid_rows_r0==3)?
/// wire [ 9:0] fold_addr= {tile_size_r,inv_checker_r, in_cntr[0],in_cntr[6:1]}; in_cntr[0]:
wire [ 9:0] fold_addr= {tile_size_r,inv_checker_r, (valid_rows_r0==3)?in_cntr[0]:valid_rows_r0[0],in_cntr[6:1]}; // valid_rows_r0[0],
~valid_rows_r0[0],
in_cntr[6:1]};
reg [SHIFT_WIDTH-1:0] x_shft_r0; // tile pixel X fractional shift (valid @ start) reg [SHIFT_WIDTH-1:0] x_shft_r0; // tile pixel X fractional shift (valid @ start)
reg [SHIFT_WIDTH-1:0] y_shft_r0; // tile pixel Y fractional shift (valid @ start) reg [SHIFT_WIDTH-1:0] y_shft_r0; // tile pixel Y fractional shift (valid @ start)
reg [SHIFT_WIDTH-1:0] x_shft_r; // matching delay reg [SHIFT_WIDTH-1:0] x_shft_r; // matching delay
...@@ -100,28 +89,16 @@ module mclt_bayer_fold#( ...@@ -100,28 +89,16 @@ module mclt_bayer_fold#(
wire [PIX_ADDR_WIDTH-1:0] pix_a_w = {~fold_rom_out[15] & fold_rom_out[7],fold_rom_out[15:8]}; wire [PIX_ADDR_WIDTH-1:0] pix_a_w = {~fold_rom_out[15] & fold_rom_out[7],fold_rom_out[15:8]};
reg [PIX_ADDR_WIDTH-1:0] pix_a_r; reg [PIX_ADDR_WIDTH-1:0] pix_a_r;
wire [ 1:0] sgn_w = fold_rom_out[16 +: 2]; wire [ 1:0] sgn_w = fold_rom_out[16 +: 2];
// reg blank_r; // blank window (latency 1 from fold_rom_out)
// wire blank_d; // delayed to matchwindow rom regrst
wire pre_page = in_cntr == 2; // valid 1 cycle before fold_rom_out wire pre_page = in_cntr == 2; // valid 1 cycle before fold_rom_out
wire var_first_d; // adding subtracting first variant of 2 folds wire var_first_d; // adding subtracting first variant of 2 folds
// reg var_pre_first;
assign phases = run_r; assign phases = run_r;
assign var_pre2_first = var_first_d; assign var_pre2_first = var_first_d;
// wire [ 3:0] bayer_1hot = { mpix_a_w[4] & mpix_a_w[0],
// mpix_a_w[4] & ~mpix_a_w[0],
// ~mpix_a_w[4] & mpix_a_w[0],
// ~mpix_a_w[4] & ~mpix_a_w[0]};
// wire mpix_use = |(bayer_d & bayer_1hot); //not disabled by bayer, valid with mpix_a_w
// wire mpix_use_d; // delayed
// reg mpix_use_r; // delayed
always @ (posedge clk) begin always @ (posedge clk) begin
if (rst) run_r <= 0; if (rst) run_r <= 0;
// else run_r <= {run_r[13:0], start | (run_r[0] & ~(&in_cntr[6:0]))};
else run_r <= {run_r[5:0], start | (run_r[0] & ~(&in_cntr[6:0]))}; else run_r <= {run_r[5:0], start | (run_r[0] & ~(&in_cntr[6:0]))};
if (!run_r[0]) in_cntr <= 0; if (!run_r[0]) in_cntr <= 0;
...@@ -145,18 +122,6 @@ module mclt_bayer_fold#( ...@@ -145,18 +122,6 @@ module mclt_bayer_fold#(
if (run_r[2]) pix_a_r <= pix_a_w + {1'b0, top_left_r}; if (run_r[2]) pix_a_r <= pix_a_w + {1'b0, top_left_r};
/// if (in_cntr == 2) valid_rows_r <= valid_rows_r0;
/// blank_r <= ~(wnd_a_w[0] ? valid_rows_r[1]: valid_rows_r[0]);
/// if (run_r[9]) var_pre_first <= var_first_d;
/// if (run_r[10]) begin
// var_first <= var_first_d;
/// var_first <= var_pre_first;
/// end
pre_last_in <= in_cntr[6:0] == 7'h7d; pre_last_in <= in_cntr[6:0] == 7'h7d;
...@@ -192,18 +157,25 @@ module mclt_bayer_fold#( ...@@ -192,18 +157,25 @@ module mclt_bayer_fold#(
); );
// Matching window latency with pixel data latency // Matching window latency with pixel data latency
wire [3:0] addr_dly = ADDR_DLY; generate
dly_var #( if (ADDR_DLY !=0) begin
.WIDTH(11), wire [3:0] addr_dly = ADDR_DLY - 1; // iverilog problem mitigation
.DLY_WIDTH(4) dly_var #(
) dly_pixel_addr_i ( .WIDTH(11),
.clk (clk), // input .DLY_WIDTH(4)
.rst (rst), // input ) dly_pixel_addr_i (
// .dly (4'h2), // input[3:0] Delay for external memory latency = 2, reduce for higher .clk (clk), // input
.dly (addr_dly), // input[3:0] Delay for external memory latency = 2, reduce for higher .rst (rst), // input
.din ({pre_page, run_r[3], pix_a_r}), // input[0:0] .dly (addr_dly), // input[3:0] Delay for external memory latency = 2, reduce for higher
.dout ({pix_page, pix_re, pix_addr}) // output[0:0] .din ({pre_page, run_r[3], pix_a_r}), // input[0:0]
); .dout ({pix_page, pix_re, pix_addr}) // output[0:0]
);
end else begin
assign pix_page = pre_page;
assign pix_re = run_r[3];
assign pix_addr = pix_a_r;
end
endgenerate
// Latency = 6 // Latency = 6
mclt_wnd_mul #( mclt_wnd_mul #(
...@@ -238,13 +210,10 @@ module mclt_bayer_fold#( ...@@ -238,13 +210,10 @@ module mclt_bayer_fold#(
) dly_var_first_i ( ) dly_var_first_i (
.clk (clk), // input .clk (clk), // input
.rst (rst), // input .rst (rst), // input
// .dly (4'h9), // input[3:0]
.dly (4'h8), // input[3:0] .dly (4'h8), // input[3:0]
.din (run_r[0] && (in_cntr[0] == 0)), // input[0:0] .din (run_r[0] && (in_cntr[0] == 0)), // input[0:0]
.dout (var_first_d) // output[0:0] .dout (var_first_d) // output[0:0]
); );
//
endmodule endmodule
...@@ -508,7 +508,9 @@ module mclt_test_03 (); ...@@ -508,7 +508,9 @@ module mclt_test_03 ();
); );
localparam PIX_ADDR_WIDTH = 9; localparam PIX_ADDR_WIDTH = 9;
localparam ADDR_DLY = 2; // localparam ADDR_DLY = 2;
localparam EXT_PIX_LATENCY = 2; // external pixel buffer a->d latency (may increase to 4 for gamma)
reg [1:0] TILE_SIZE = 3; // 22; reg [1:0] TILE_SIZE = 3; // 22;
reg INV_CHECKER = 0; reg INV_CHECKER = 0;
reg [7:0] TOP_LEFT = 69; // center reg [7:0] TOP_LEFT = 69; // center
...@@ -540,6 +542,7 @@ module mclt_test_03 (); ...@@ -540,6 +542,7 @@ module mclt_test_03 ();
mclt16x16_bayer #( mclt16x16_bayer #(
.SHIFT_WIDTH (SHIFT_WIDTH), .SHIFT_WIDTH (SHIFT_WIDTH),
.PIX_ADDR_WIDTH (PIX_ADDR_WIDTH), .PIX_ADDR_WIDTH (PIX_ADDR_WIDTH),
.EXT_PIX_LATENCY (EXT_PIX_LATENCY), // 2), // external pixel buffer a->d latency (may increase to 4 for gamma)
.COORD_WIDTH (COORD_WIDTH), .COORD_WIDTH (COORD_WIDTH),
.PIXEL_WIDTH (PIXEL_WIDTH), .PIXEL_WIDTH (PIXEL_WIDTH),
.WND_WIDTH (WND_WIDTH), .WND_WIDTH (WND_WIDTH),
...@@ -552,7 +555,7 @@ module mclt_test_03 (); ...@@ -552,7 +555,7 @@ module mclt_test_03 ();
.DSP_A_WIDTH (DSP_A_WIDTH), .DSP_A_WIDTH (DSP_A_WIDTH),
.DSP_P_WIDTH (DSP_P_WIDTH), .DSP_P_WIDTH (DSP_P_WIDTH),
.DEAD_CYCLES (DEAD_CYCLES) .DEAD_CYCLES (DEAD_CYCLES)
) mclt_bayer_fold_i ( ) mclt16x16_bayer_i (
.clk (CLK), // input .clk (CLK), // input
.rst (RST), // input .rst (RST), // input
.start (start), // input .start (start), // input
...@@ -565,7 +568,16 @@ module mclt_test_03 (); ...@@ -565,7 +568,16 @@ module mclt_test_03 ();
.pix_addr (PIX_ADDR9), // output[8:0] .pix_addr (PIX_ADDR9), // output[8:0]
.pix_re (PIX_RE), // output .pix_re (PIX_RE), // output
.pix_page (PIX_PAGE), // output .pix_page (PIX_PAGE), // output
.pix_d (PIX_D) // input[15:0] .pix_d (PIX_D), // input[15:0]
.pre_busy (), // output
.pre_last_in (), // output
.pre_first_out (), // output
.pre_last_out (), // output
.out_addr (), // output[7:0]
.dv (), // output
.dout0 (), // output[24:0] signed
.dout1 () // output[24:0] signed
); );
......
This diff is collapsed.
...@@ -43,6 +43,9 @@ ...@@ -43,6 +43,9 @@
`define DEBUG_HISTOGRAMS `define DEBUG_HISTOGRAMS
// TODO: Later compare instantiate/infer // TODO: Later compare instantiate/infer
`define INSTANTIATE_DSP48E1 // not yet tesetd/debugged otherwise `define INSTANTIATE_DSP48E1 // not yet tesetd/debugged otherwise
`define DSP_ACCUM_FOLD 1 // for MCLT
// https://forums.xilinx.com/t5/Embedded-Processor-System-Design/AXI4-Bursts-4KB-Address-Boundary-Limitation/td-p/216413 // https://forums.xilinx.com/t5/Embedded-Processor-System-Design/AXI4-Bursts-4KB-Address-Boundary-Limitation/td-p/216413
// Interconnect does not have 4K limit, and compressed data can only go to interconnect (memory), so it is OK to violate AXI specs here // Interconnect does not have 4K limit, and compressed data can only go to interconnect (memory), so it is OK to violate AXI specs here
`define AXI_4K_LIMIT_DISABLE // Current x393 code (only simulation modules) does not have it implemented, defining it causes mismatch synth/sim `define AXI_4K_LIMIT_DISABLE // Current x393 code (only simulation modules) does not have it implemented, defining it causes mismatch synth/sim
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment