Commit 6ed7aef1 authored by Andrey Filippov's avatar Andrey Filippov

bug fixrd in DCT and other places

parent 30417be4
This diff is collapsed.
......@@ -383,7 +383,9 @@ module x393_dut#(
`endif
`ifdef HISPI
parameter HBLANK= 92; // 72; // 62; // 52; // 90; // 12; /// 52; //*********************
parameter HBLANK= 132; // all in time
/// parameter HBLANK= 122; // 72; // 62; // 52; // 90; // 12; /// 52; //********************* Still too fast
/// parameter HBLANK= 92; // 72; // 62; // 52; // 90; // 12; /// 52; //*********************
parameter BLANK_ROWS_BEFORE= 9; /// 3; // 9; // 3; //8; ///2+2 - a little faster than compressor
parameter BLANK_ROWS_AFTER= 8; /// 1; //8;
......
......@@ -94,21 +94,23 @@ module cmprs_frame_sync#(
*/
// wire vsync_late_mclk; // single mclk cycle, reclocked from vsync_late
// wire frame_started_mclk;
reg bonded_mode;
reg frame_start_dst_r;
reg frames_differ; // src and dest point to different frames (single-frame buffer mode), disregard line_unfinished_*
reg frames_numbers_differ; // src and dest point to different frames (multi-frame buffer mode), disregard line_unfinished_*
reg line_numbers_sync; // src unfinished line number is > this unfinished line number
reg bonded_mode;
reg [3:0] frame_start_dst_r;
reg frame_start_pend_r; // postpone frame_start_dst if previous frame was still being read during vsync_late
reg frames_differ; // src and dest point to different frames (single-frame buffer mode), disregard line_unfinished_*
reg frames_numbers_differ; // src and dest point to different frames (multi-frame buffer mode), disregard line_unfinished_*
reg line_numbers_sync; // src unfinished line number is > this unfinished line number
reg reading_frame_r; // compressor is reading frame data (make sure input is done before starting next frame, otherwise make it a broken frame
reg broken_frame;
reg aborted_frame;
reg stuffer_running_mclk_r;
reg reading_frame_r; // compressor is reading frame data (make sure input is done before starting next frame, otherwise make it a broken frame
// reg broken_frame;
reg aborted_frame;
reg stuffer_running_mclk_r;
reg [CMPRS_TIMEOUT_BITS-1:0] timeout;
reg cmprs_en_extend_r=0;
reg cmprs_en_d;
reg cmprs_en_extend_r=0;
reg cmprs_en_d;
reg suspend_end; // suspend at teh end of the current frame until frame number changes
// reg cmprs_en_xclk;
assign frame_start_dst = frame_start_dst_r;
assign frame_start_dst = frame_start_dst_r[0];
assign cmprs_en_extend = cmprs_en_extend_r;
assign stuffer_running_mclk = stuffer_running_mclk_r;
......@@ -132,17 +134,32 @@ module cmprs_frame_sync#(
cmprs_en_d <= cmprs_en;
broken_frame <= cmprs_en && cmprs_run && vsync_late && reading_frame_r; // single xclk pulse
/// broken_frame <= cmprs_en && cmprs_run && vsync_late && reading_frame_r; // single xclk pulse
aborted_frame <= cmprs_en_d && !cmprs_en && stuffer_running_mclk_r;
if (!stuffer_running_mclk_r ||!cmprs_en_extend_r) force_flush_long <= 0;
else if (broken_frame || aborted_frame) force_flush_long <= 1;
/// else if (broken_frame || aborted_frame) force_flush_long <= 1;
else if (aborted_frame) force_flush_long <= 1;
if (!cmprs_en || frame_done || (cmprs_run && vsync_late)) reading_frame_r <= 0;
/// if (!cmprs_en || frame_done || (cmprs_run && vsync_late)) reading_frame_r <= 0;
if (!cmprs_en || frame_done ) reading_frame_r <= 0;
else if (frame_started_mclk) reading_frame_r <= 1;
frame_start_dst_r <= cmprs_en && (cmprs_run ? (vsync_late && !reading_frame_r) : cmprs_standalone);
if (!cmprs_en) frame_start_pend_r <= 0;
else if (cmprs_run && vsync_late && reading_frame_r) frame_start_pend_r <= 1;
else if (frame_start_dst_r[0]) frame_start_pend_r <= 0;
if (!cmprs_en) suspend_end <= 0;
else if (frame_done) suspend_end <= 1;
else if (frame_start_dst_r[3]) suspend_end <= 0;
frame_start_dst_r[0] <= cmprs_en && (cmprs_run ?
((vsync_late && !reading_frame_r) || (frame_start_pend_r && frame_done)):
cmprs_standalone);
if (!cmprs_en) frame_start_dst_r[3:1] <=0;
else frame_start_dst_r[3:1] <= frame_start_dst_r[2:0];
if (!cmprs_en) bonded_mode <= 0;
else if (cmprs_run) bonded_mode <= 1;
else if (cmprs_standalone) bonded_mode <= 0;
......@@ -150,12 +167,10 @@ module cmprs_frame_sync#(
if (!cmprs_en || !cmprs_run || vsync_late) frames_differ <= 0;
else if (frame_done_src) frames_differ <= 1'b1;
frames_numbers_differ <= frame_number_src != frame_number;
frames_numbers_differ <= !suspend_end && (frame_number_src != frame_number); // during end of frame, before frame number is incremented
line_numbers_sync <= (line_unfinished_src > line_unfinished);
// suspend <= !bonded_mode && ((sigle_frame_buf ? frames_differ : frames_numbers_differ) || line_numbers_sync);
// suspend <= bonded_mode && ((sigle_frame_buf ? frames_differ : frames_numbers_differ) || !line_numbers_sync);
suspend <= bonded_mode && !((sigle_frame_buf ? frames_differ : frames_numbers_differ) || line_numbers_sync);
......
......@@ -123,7 +123,9 @@ module cmprs_macroblock_buf_iface #(
// reg first_mb; // from mb_pre_start[2] to mb_pre_start[1]
wire starting;
reg frame_pre_run;
reg [1:0] frame_may_start;
// reg [1:0] frame_may_start;
reg frame_start_xclk_r; // next cycle after frame_start_xclk
reg [5:0] dct_pipeline_delay_cntr;
......@@ -143,19 +145,27 @@ module cmprs_macroblock_buf_iface #(
assign last_mb = mb_last_row && mb_last_in_row;
assign starting = |mb_pre_start;
// assign mb_pre_start_w = (mb_pre_end_in && (!last_mb || frame_en_w)) || (!frame_pre_run && frame_en_w && !frame_en_r && !starting);
// assign frame_pre_start_w = frame_en_w && ((mb_pre_end_in && last_mb) || (!frame_pre_run && !frame_en_r && !starting));
assign mb_pre_start_w = (mb_pre_end_in && (!last_mb || frame_may_start)) || ((frame_may_start==2'b1) && !frame_pre_run && !starting);
/*
assign mb_pre_start_w = (mb_pre_end_in && (!last_mb || frame_may_start)) ||
((frame_may_start==2'b1) && !frame_pre_run && !starting);
assign frame_pre_start_w = frame_may_start[0] && ((mb_pre_end_in && last_mb) || (!frame_pre_run && !frame_may_start[1] && !starting));
*/
assign mb_pre_start_w = mb_pre_end_in || (frame_start_xclk_r && !frame_pre_run && !starting);
assign frame_pre_start_w = (mb_pre_end_in && last_mb) || (frame_start_xclk_r && !frame_pre_run && !starting);
//frame_start_xclk_r
assign start_page = next_invalid[1:0]; // oldest page needed for this macroblock
always @ (posedge xclk) begin
if (!frame_en) frame_start_xclk_r <= 0;
else frame_start_xclk_r <= frame_start_xclk;
if (!frame_en) frame_en_r <= 0;
else frame_en_r <= frame_en_w;
if (!frame_en_w || starting) frame_may_start[0] <= 0;
else if (frame_start_xclk) frame_may_start[0] <= 1;
frame_may_start[1] <= frame_may_start[0];
/// if (!frame_en_w || starting) frame_may_start[0] <= 0;
/// else if (frame_start_xclk) frame_may_start[0] <= 1;
/// frame_may_start[1] <= frame_may_start[0];
frame_pre_start_r <= frame_pre_start_w; // same time as mb_pre_start
......
......@@ -133,6 +133,18 @@ module cmprs_pixel_buf_iface #(
// assign data_out = do_r;
assign pre_first_out = pre_first_out_r[0];
assign pre2_first_out = pre_first_out_r[1];
`ifdef DEBUG_COMPRESSOR_SCRAMBLE
wire [31:0] DBG_SCRAMBLED;
scrambler #(
.DATA_BYTE_WIDTH(4)
) scrambler_i (
.clk (xclk), // input wire
.rst (mb_pre_start), // input wire
.val_in (buf_re[CMPRS_BUF_EXTRA_LATENCY+2]), // input wire
.data_in ({24'b0,buf_di}), // input[31:0] wire
.data_out (DBG_SCRAMBLED) // output[31:0] wire
);
`endif
always @(posedge xclk) begin
// mb_h <= mb_h_m1+1; // macroblock height
......
......@@ -153,7 +153,7 @@ module jp_channel#(
output next_page_chn, // single mclk (posedge): Done with the page in the buffer, memory controller may read more data
// Master(sensor)/slave(compressor) synchronization signals
output frame_start_dst, // @mclk - trigger receive (tiledc) memory channel (it will take care of single/repetitive
// these output either follows vsync_late (reclocks it) or generated in non-bonded mode
// this output either follows vsync_late (reclocks it) or generated in non-bonded mode
// (compress from memory)
input [FRAME_HEIGHT_BITS-1:0] line_unfinished_src,// number of the current (unfinished ) line, in the source (sensor) channel (RELATIVE TO FRAME, NOT WINDOW?)
input [LAST_FRAME_BITS-1:0] frame_number_src, // current frame number (for multi-frame ranges) in the source (sensor) channel
......@@ -723,7 +723,7 @@ module jp_channel#(
.coring_we (set_coring_w), // input - write color saturation values
.di (cmd_data), // input[31:0] - 32-bit data to write to control register (24LSB are used)
.frame_start (frame_start_dst), // input @mclk
.frame_start_xclk (frame_start_xclk), // re-clocked, parameters are copied during this pulse
.frame_start_xclk (frame_start_xclk), // output re-clocked, parameters are copied during this pulse
.cmprs_en_mclk (cmprs_en_mclk), // output
.cmprs_en_extend (cmprs_en_extend), // input
.cmprs_run_mclk (cmprs_run_mclk), // output reg
......@@ -970,7 +970,20 @@ module jp_channel#(
// Each pass (1d) uses 5 DSP48E1 modules (2 - multipliers and 3 SIMD (2x24) adder/subracters
// Needs a small (<48, but did not calculate yet) pause between block if they did not come
// immediately after each other. This pause is needed to restart pipeline
`ifdef DEBUG_COMPRESSOR_SCRAMBLE
wire DBG_DCT_DV;
wire [31:0] DBG_DCT_SCRAMBLED;
scrambler #(
.DATA_BYTE_WIDTH(4)
) scrambler_i (
.clk (xclk), // input wire
.rst (first_block_dct && !DBG_DCT_DV && !dct_pre_first_out), // input wire
.val_in (DBG_DCT_DV), // input wire
.data_in ({19'b0, dct_out}), // input[31:0] wire
.data_out (DBG_DCT_SCRAMBLED) // output[31:0] wire
);
`endif
dct2d8x8_chen #(
.INPUT_WIDTH (10),
.OUTPUT_WIDTH (13),
......@@ -990,8 +1003,12 @@ module jp_channel#(
.xin (yc_nodc), // input[9:0] signed
.last_in (dct_last_in), // output reg
.pre_first_out (dct_pre_first_out), // output
`ifdef DEBUG_COMPRESSOR_SCRAMBLE
.dv (DBG_DCT_DV), // output
`else
.dv (), // output
.d_out (dct_out) // output[12:0] signed
`endif
.d_out (dct_out) // output[12:0] signed
);
wire quant_start;
......
......@@ -312,10 +312,13 @@ module dct1d_chen#(
if (rst) per_type <= 0;
else if (start) per_type <= 3'h1;
else if (phase[7]) begin
if (!per_type[2] && !en) per_type <= 3'h6;
/// if (!per_type[2] && !en) per_type <= 3'h6;
if (!per_type[2] && !en) per_type <= 3'h7;
else if ((per_type != 0) && (per_type != 3)) per_type <= per_type + 1;
end
phase <= {phase[6:0], start | (phase[7] & (|per_type) )};
/// phase <= {phase[6:0], start | (phase[7] & (|per_type) & ~(&per_type))}; // both per_type 7 and 0 disables
phase <= {phase[6:0], start | (phase[7] & (|per_type))};
//TODO:
if (rst || start || phase[7]) phase_cnt <= 0;
else if (|phase[6:0]) phase_cnt <= phase_cnt + 1;
......@@ -343,8 +346,10 @@ module dct1d_chen#(
if (rst || !(en || (|phase))) pre_en_out <= 0;
else if (phase[3]) begin
if (per_type == 2) pre_en_out <= 1;
else if (per_type[2]) pre_en_out <= 0;
if (per_type == 2) pre_en_out <= 1;
/// else if (per_type[2]) pre_en_out <= 0;
/// else if (&per_type[2:0]) pre_en_out <= 0;
else if (per_type[2:0]==0) pre_en_out <= 0;
end
en_out <= pre_en_out;
......
......@@ -75,14 +75,15 @@ module dct1d_chen_reorder_out#(
else if ((per_type != 0) && (per_type != 3)) per_type <= per_type + 1;
end
if (rst) pre_we_r <= 0;
else if (pre2_start) pre_we_r <= 1;
else if ((per_type == 0) || ((cntr_in==3) && per_type[2])) pre_we_r <= 0;
if (rst) pre_we_r <= 0;
else if (pre2_start) pre_we_r <= 1;
/// else if ((per_type == 0) || ((cntr_in==3) && per_type[2])) pre_we_r <= 0;
else if ((per_type == 0) || ((cntr_in[2:0]==3) && per_type[2])) pre_we_r <= 0;
we_r <= pre_we_r;
if (rst) cntr_in <= 0;
else if (pre2_start) cntr_in <= {~cntr_in[3],3'b0};
else if (pre_we_r) cntr_in <= cntr_in + 1;
else if (pre_we_r) cntr_in <= cntr_in + 1;
case (cntr_in[2:0])
3'h0: ina_rom <= {1'b0,3'h4};
3'h1: ina_rom <= {1'b1,3'h1};
......@@ -96,16 +97,19 @@ module dct1d_chen_reorder_out#(
if (we_r) reord_buf_ram[waddr] <= din;
if ((per_type == 2) && (cntr_in == 1)) raddr <= {~cntr_in[3], 3'b0};
/// if ((per_type == 2) && (cntr_in == 1)) raddr <= {~cntr_in[3], 3'b0};
if ((per_type == 2) && (cntr_in[2:0] == 1)) raddr <= {~cntr_in[3], 3'b0};
else if ((raddr[2:0] != 0) || (per_type !=0)) raddr <= raddr + 1;
if (en_out_r) dout_r <= reord_buf_ram[raddr];
start_out_r <= (per_type == 2) && (cntr_in == 1);
/// start_out_r <= (per_type == 2) && (cntr_in == 1);
start_out_r <= (per_type == 2) && (cntr_in[2:0] == 1);
if (rst ||(per_type == 0) ) en_out_r <= 0;
// else if (cntr_in == 1) en_out_r <= (per_type == 2) || !per_type[2];
else if ((cntr_in == 1) && (per_type == 2)) en_out_r <= 1;
/// else if ((cntr_in == 1) && (per_type == 2)) en_out_r <= 1;
else if ((cntr_in[2:0] == 1) && (per_type == 2)) en_out_r <= 1;
else if (stop_out && !en) en_out_r <= 0;
//stop_out
......
......@@ -768,7 +768,27 @@ class X393Jpeg(object):
"""
@return absolute path of the directory one above current script one
"""
return os.path.abspath(os.path.join(os.path.dirname(__file__), '../'))
return os.path.abspath(os.path.join(os.path.dirname(__file__), '../'))
def jpeg_sim_multi(self,
num_rpt=1,
irq_mask = 0xf0,
irq_after=100,
irq_timeout = 100000,
file_path = "img@.jpeg"):
"""
Wait for ready, acquire and save next image, use img
@param num_rpt - numer of times to acquire next ready image
@param irq_mask - IRQ mask, 0xf0 - all 4 channels
@param irq_after- nanoseconds to wait after IRQ befor4e reading pointers
@param irq_timeout - time (in nanoseconds) to wait for interrupts
@param file_path - camera file system path (starts with "/") or relative to web server root,
@ is replaced with timestamp, -<chn> added before "."
"""
for _ in range (num_rpt):
self.x393_mem.wait_irq(irq_mask= irq_mask, wait_ns = irq_timeout)
self.x393_mem.wait_irq(irq_mask= 0, wait_ns = irq_after)
self.jpeg_write(file_path, "next")
def jpeg_write(self,
file_path = "img.jpeg",
channel = 0,
......@@ -1105,11 +1125,15 @@ compressor_control all None None None None None 2
compressor_interrupt_control all clr
compressor_interrupt_control all en
compressor_control all 3
jpeg_sim_multi 8
jpeg_sim_multi 8
wait_irq 0xf0 100000
wait_irq 0x0 100
jpeg_write "img@.jpeg" next
jpeg_write "/home/eyesis/git/x393-neon/www/img.jpeg" next
x393 (localhost:7777) +107.289s--> compressor_control all None None None None None 2
......
......@@ -148,7 +148,7 @@ class X393Mem(object):
X393_CLIENT.flush()
def wait_irq(self,irq_mask= 0, wait_ns = 1000):
"""
Wait silation certain time, interruptible
Wait simulation certain time (in ns), interruptible
@param irq_mask - mask to be AND-ed with interrupt vector
@param wait_ns - timeout in nanoseconds
"""
......
......@@ -111,15 +111,6 @@ module simul_axi_hp_rd #(
wire [3:0] rd_qos_in;
wire [3:0] rd_qos_out;
/*
wire aw_nempty;
wire w_nempty;
wire enough_data; // enough data to start a new burst
wire [11:3] next_wr_address; // bits that are incrtemented in 64-bit mode (higher are kept according to AXI 4KB inc. limit)
reg [31:0] write_address;
wire fifo_wd_rd; // read data fifo
wire last_confirmed_write;
*/
wire [5:0] arid_out; // verify it matches wid_out when outputting data
wire [1:0] arburst_out;
......@@ -141,7 +132,12 @@ module simul_axi_hp_rd #(
reg [3:0] read_left;
reg [1:0] rburst;
reg [3:0] rlen;
`ifndef AXI_4K_LIMIT_DISABLE
wire [11:3] next_rd_address; // bits that are incrtemented in 64-bit mode (higher are kept according to AXI 4KB inc. limit)
`else
wire [31:3] next_rd_address;
`endif
reg [31:0] read_address;
wire last_confirmed_read;
......@@ -205,11 +201,20 @@ module simul_axi_hp_rd #(
assign last_read = (read_left==0);
assign last_confirmed_read = (read_left==0) && sim_rd_valid && sim_rd_ready;
// AXI: Bursts should not cross 4KB boundaries (... and to limit size of the address incrementer)
// in 64 bit mode - low 3 bits are preserved, next 9 are incremented
// in 64 bit mode - low 3 bits are preserved, next 9 are incremented
`ifndef AXI_4K_LIMIT_DISABLE
assign next_rd_address[11:3] =
rburst[1]?
(rburst[0]? {9'bx}:((read_address[11:3] + 1) & {5'h1f, ~rlen[3:0]})):
(rburst[0]? (read_address[11:3]+1):(read_address[11:3]));
`else
assign next_rd_address[31:3] =
rburst[1]?
(rburst[0]? {29'bx}:((read_address[31:3] + 1) & {25'h1f, ~rlen[3:0]})):
(rburst[0]? (read_address[31:3]+1):(read_address[31:3]));
`endif
assign sim_rd_address = read_address;
assign sim_rid = arid_out;
// Current model policy is not to initiate a new burst (read from simulation port) if it may overflow FIFO
......@@ -272,8 +277,11 @@ module simul_axi_hp_rd #(
if (rst) read_address <= 32'bx;
else if (start_read_burst_w) read_address <= araddr_out; // precedence over inc
`ifndef AXI_4K_LIMIT_DISABLE
else if (sim_rd_valid && sim_rd_ready) read_address <= {read_address[31:12],next_rd_address[11:3],read_address[2:0]};
`else
else if (sim_rd_valid && sim_rd_ready) read_address <= { next_rd_address[31:3],read_address[2:0]};
`endif
end
......
......@@ -130,7 +130,11 @@ UPDATE: Xilinx docs say that (AR/AW)CACHE is ignored
wire aw_nempty;
wire w_nempty;
wire enough_data; // enough data to start a new burst
`ifndef AXI_4K_LIMIT_DISABLE
wire [11:3] next_wr_address; // bits that are incrtemented in 64-bit mode (higher are kept according to AXI 4KB inc. limit)
`else
wire [31:3] next_wr_address;
`endif
reg [31:0] write_address;
reg [5:0] awid_r; // awid registered with write_address
wire fifo_wd_rd; // read data fifo
......@@ -248,11 +252,19 @@ UPDATE: Xilinx docs say that (AR/AW)CACHE is ignored
(aw_nempty && enough_data) || (write_in_progress && !last_confirmed_write);
// AXI: Bursts should not cross 4KB boundaries (... and to limit size of the address incrementer)
// in 64 bit mode - low 3 bits are preserved, next 9 are incremented
// in 64 bit mode - low 3 bits are preserved, next 9 are incremented
// Seems that Zynq is OK to cross 4K boundary
`ifndef AXI_4K_LIMIT_DISABLE
assign next_wr_address[11:3] =
wburst[1]?
(wburst[0]? {9'bx}:((write_address[11:3] + 1) & {5'h1f, ~wlen[3:0]})):
(wburst[0]? (write_address[11:3]+1):(write_address[11:3]));
`else
assign next_wr_address[31:3] =
wburst[1]?
(wburst[0]? {29'bx}:((write_address[31:3] + 1) & {25'h1f, ~wlen[3:0]})):
(wburst[0]? (write_address[31:3]+1):(write_address[31:3]));
`endif
assign sim_wr_data= wdata_out;
assign sim_wid= wid_out;
assign sim_wr_stb=wstrb_out;
......@@ -305,8 +317,11 @@ UPDATE: Xilinx docs say that (AR/AW)CACHE is ignored
if (rst) write_address <= 32'bx;
else if (start_write_burst_w) write_address <= awaddr_out; // precedence over inc
`ifndef AXI_4K_LIMIT_DISABLE
else if (fifo_wd_rd) write_address <= {write_address[31:12],next_wr_address[11:3],write_address[2:0]};
`else
else if (fifo_wd_rd) write_address <= { next_wr_address[31:3],write_address[2:0]};
`endif
if (rst) awid_r <= 6'bx;
else if (start_write_burst_w) awid_r <= awid_out; // precedence over inc
......
......@@ -41,7 +41,12 @@
`ifndef SYSTEM_DEFINES
`define SYSTEM_DEFINES
// TODO: Later compare instantiate/infer
`define INSTANTIATE_DSP48E1
`define INSTANTIATE_DSP48E1 // not yet tesetd/debugged otherwise
// https://forums.xilinx.com/t5/Embedded-Processor-System-Design/AXI4-Bursts-4KB-Address-Boundary-Limitation/td-p/216413
// Interconnect does not have 4K limit, and compressed data can only go to interconnect (memory), so it is OK to violate AXI specs here
`define AXI_4K_LIMIT_DISABLE // Current x393 code (only simulation modules) does not have it implemented, defining it causes mismatch synth/sim
`define DEBUG_COMPRESSOR_SCRAMBLE
`define DEBUG_DCT1D // undefine after debugging is over
// `define USE_OLD_DCT
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment