Commit f99260f2 authored by Andrey Filippov's avatar Andrey Filippov

fixed histograms_saxi, using 100MHz clock for both saxigp<0,1>, v 03931016

parent dad1d6c5
......@@ -40,7 +40,11 @@
// Number of histograms per sensor is now statically defined by NUM_FRAME_BITS
// It may be modified to both reduce this number (by masking) or increase ( by
// keeping pointer locally)
// move to system_defines, it slowes SAXI_AW and reduces requirements on the ready inputs
`define MOD_SAXI 1
///`define FAKE_WDATA 1
///`define FAKE1_WDATA 1
module histogram_saxi#(
parameter HIST_SAXI_ADDR = 'h380, // 16 locations to write 20 bits of a 4KB page for the histogram
parameter HIST_SAXI_ADDR_MASK = 'h7f0,
......@@ -60,7 +64,7 @@ module histogram_saxi#(
// input rst,
input mclk, // for command/status
input aclk, // global clock to run s_axi (@150MHz?)
input aclk, // global clock to run s_axi (@150MHz?) saxi0_aclk
input mrst, // @posedge mclk, sync reset
input arst, // @posedge aclk, sync reset
......@@ -206,7 +210,6 @@ module histogram_saxi#(
reg [31:10] hist_start_addr; // higher bits of the system memory address of the histogram (1024 bytes) start
reg [31: 6] start_addr_r; // higher bits of the system memory address of the saxi burst start address
wire saxi_start_burst_w;
reg first_burst;
wire [31:0] inter_buf_data; // data between bram buffer and a small FIFO
reg [3:0] wburst_cntr; // count words in output data burst (using max==16)
......@@ -215,10 +218,58 @@ module histogram_saxi#(
wire fifo_nempty;
wire fifo_half_full;
reg [2:0] buf_re; // {fifo_we, buf_regen, buf_re}
wire buf_re_last_w; // reading last word from the buffer
wire buf_re_w;
wire fifo_re;
reg saxi_bvalid_r;
reg page_read_run; // reading buffer page until page_ra reads 'hff
reg [31:0] wdata_fake;
reg [31:0] wdata_fake_r;
reg wdata_fake_inc;
wire [31:0] wdata;
`ifdef MOD_SAXI
reg saxi_start_burst;
reg saxi_awvalid_r;
wire start_burst_w;
assign start_burst_w = saxi_awvalid && saxi_awready;
assign saxi_awvalid = saxi_awvalid_r; // ((|start_addr_r[9:6]) || first_burst) && !saxi_start_burst && !arst ; // TODO: make it a register
always @(posedge aclk) begin
saxi_start_burst <= start_burst_w;
if (arst || start_burst_w) saxi_awvalid_r <= 0;
else saxi_awvalid_r <= first_burst ||
(saxi_start_burst ? (start_addr_r[9:6] != 'hf) : (|start_addr_r[9:6]));
wire saxi_start_burst;
assign saxi_awvalid = ((|start_addr_r[9:6]) || first_burst) && !arst;
assign saxi_start_burst = saxi_awvalid && saxi_awready;
assign saxi_wdata = wdata_fake_r;
always @ (posedge aclk) begin
// wdata_fake_inc <= en_aclk && saxi_awvalid && saxi_awready; // v 100b
wdata_fake_inc <= en_aclk && saxi_wvalid && saxi_wready; // v 100c
if (!en_aclk) wdata_fake[15:0] <= 0;
else if (wdata_fake_inc) wdata_fake[15:0] <= wdata_fake[15:0] + 1;
if (!en_aclk) wdata_fake[31:16] <= 0;
else wdata_fake[31:16] <= wdata_fake[31:16] + 1;
`ifdef FAKE1_WDATA
wdata_fake_r[31:16] <= wdata_fake[31:16];
wdata_fake_r[15:0] <= start_addr_r[21:6];
wdata_fake_r <= wdata_fake;
assign saxi_wdata = wdata;
// reg [9:0] buf_raddr; // nuffer read address {page[1:0], addr [7:0]}
......@@ -312,11 +363,11 @@ module histogram_saxi#(
assign attrib_frame = attrib_r[2+:NUM_FRAME_BITS];
assign attrib_color = attrib_r[1:0];
assign saxi_start_burst_w = saxi_awvalid && saxi_awready;
// assign saxi_start_burst = saxi_awvalid && saxi_awready;
assign saxi_awaddr = {start_addr_r[31:6],6'b0};
assign saxi_awvalid = ((|start_addr_r[9:6]) || first_burst) && !arst;
// assign saxi_awvalid = ((|start_addr_r[9:6]) || first_burst) && !arst;
//{enc_rq[1:0], sub_chn_r, frame_r, burst[1:0]}
// assign block_end= ???;
......@@ -340,7 +391,10 @@ module histogram_saxi#(
// TODO: Maybe reduce pause between 16-burst pages? Allow some overlap?
assign buf_re_w = en_aclk && (|pages_in_buf_rd) && !fifo_half_full && !(&page_ra) && page_read_run; // will stay off until next page
assign buf_re_last_w = &page_ra && buf_re[0];
// assign buf_re_w = en_aclk && (|pages_in_buf_rd) && !fifo_half_full && !(&page_ra) && page_read_run; // will stay off until next page
assign buf_re_w = en_aclk && (|pages_in_buf_rd) && !fifo_half_full && !buf_re_last_w && page_read_run; // will stay off until next page
assign fifo_re= saxi_wvalid && saxi_wready;
// currently waiting for SAXI to get confirmnation of all data in the current page before proceeding to the next
......@@ -417,7 +471,9 @@ module histogram_saxi#(
else if (buf_re[0]) page_ra <= page_ra + 1;
if (!en_aclk) page_read_run <= 0;
else page_read_run <= block_start_r[1] || (page_read_run && !(&page_ra)); // until page_ra is 8'hff
// else page_read_run <= block_start_r[1] || (page_read_run && !(&page_ra)); // until page_ra is 8'hff
else page_read_run <= block_start_r[1] || (page_read_run && !buf_re_last_w); // until page_ra is 8'hff
if (!en_aclk) pages_in_buf_rd <= 0;
else if ( page_written_aclk && !page_sent_aclk) pages_in_buf_rd <= pages_in_buf_rd + 1;
......@@ -437,11 +493,11 @@ module histogram_saxi#(
if (block_start_r[2]) hist_start_addr[11:10] <= attrib_color;
if (arst || block_start_r[3]) start_addr_r[31:6] <= {hist_start_addr[31:10], 4'b0};
else if (saxi_start_burst_w) start_addr_r[31:6] <= start_addr_r[31:6] + 1;
else if (saxi_start_burst) start_addr_r[31:6] <= start_addr_r[31:6] + 1;
if (!nreset_aclk || arst) first_burst <= 0;
else if (block_start_r[3]) first_burst <= 1; // block_start_r[3] - same as start_addr_r set
else if (saxi_start_burst_w) first_burst <= 0;
else if (saxi_start_burst) first_burst <= 0;
if (block_start_r[0]) awcache_mode <= mode[HIST_SAXI_AWCACHE+:4];
if (block_start_r[0]) confirm_write <= mode[HIST_CONFIRM_WRITE];
......@@ -531,8 +587,8 @@ module histogram_saxi#(
.we (buf_re[2]), // input
.re (fifo_re), // input
.data_in (inter_buf_data), // input[31:0]
.data_out (saxi_wdata), // output[31:0]
.nempty (fifo_nempty), // output
.data_out (wdata), // output[31:0]
.nempty (fifo_nempty), // output (fast register output)
.half_full (fifo_half_full) // output reg
This diff is collapsed.
......@@ -35,8 +35,25 @@
* contains all the components and scripts required to completely simulate it
* with at least one of the Free Software programs.
parameter FPGA_VERSION = 32'h03931005; // parallel, converting from 32'h0393401a
// parameter FPGA_VERSION = 32'h0393401a; // adding strobe output for IMX-5 on ext-5
parameter FPGA_VERSION = 32'h03931016; // debugging histograms - MOD_SAXI on, restore actual histogram data
// parameter FPGA_VERSION = 32'h03931015; // debugging histograms - MOD_SAXI on, fixed some bugs related to inactive cycle with page_ra == 8'hff - works?
// parameter FPGA_VERSION = 32'h03931014; // debugging histograms - MOD_SAXI on, updates sim_saxi to match hardware (wlast disables wready, smaller fifo)
// parameter FPGA_VERSION = 32'h03931013; // debugging histograms - trying to re-enable FAKE_WDATA1 top word - timer, lower - page address - OK
// parameter FPGA_VERSION = 32'h03931012; // debugging histograms - trying to re-enable FAKE_WDATA1 - failed
// parameter FPGA_VERSION = 32'h03931011; // debugging histograms - disabled FAKE_WDATA1 - works
// parameter FPGA_VERSION = 32'h03931010; // debugging histograms - define FAKE_WDATA1, wdata_fake_r <= {start_addr_r[21:6], wdata_fake[15:0]};
// parameter FPGA_VERSION = 32'h0393100f; // debugging histograms - high 16 bits - always count with MOD_SAXI, getting stuck
// parameter FPGA_VERSION = 32'h0393100e; // debugging histograms - high 16 bits - always count
// parameter FPGA_VERSION = 32'h0393100d; // debugging histograms - counts aw saxi_wvalid && saxi_wready - extra buffer
// parameter FPGA_VERSION = 32'h0393100c; // debugging histograms - counts aw saxi_wvalid && saxi_wready
// parameter FPGA_VERSION = 32'h0393100b; // debugging histograms - counts aw saxi_awvalid && saxi_awready
// parameter FPGA_VERSION = 32'h0393100a; // reducing saxi speed from 150 to 100 MHz (both histograms and the logger)
// parameter FPGA_VERSION = 32'h03931009; // trying to boost SAXI (clock 190 instead of 150) with set_clock_uncertainty - got stuck (maybe my fault)
// parameter FPGA_VERSION = 32'h03931008; // trying to boost SAXI (clock 200 instead of 150) seems OK
// parameter FPGA_VERSION = 32'h03931007; // changed histogram_saxi - still fails
// parameter FPGA_VERSION = 32'h03931006; // parallel 03931006A - good, 03931006 - bad. will investigate
// parameter FPGA_VERSION = 32'h03931005; // parallel, converting from 32'h0393401a
// parameter FPGA_VERSION = 32'h0393401a; // adding strobe output for IMX-5 on ext-5
// parameter FPGA_VERSION = 32'h03934019; // Boson640, logger debug disabled
// parameter FPGA_VERSION = 32'h03934018; // Boson640, debugging logger 02
// parameter FPGA_VERSION = 32'h03934017; // Boson640, debugging logger 01
......@@ -596,7 +596,7 @@ module sensors393 #(
// S_AXI interface write only (histograms out)
// write address
input aclk, // global clock for S_AXI0 (150 MHz)
input aclk, // global clock for S_AXI0 (150 MHz) saxi0_aclk
output [31:0] saxi_awaddr, // AXI PS Slave GP0 AWADDR[31:0], input
output saxi_awvalid, // AXI PS Slave GP0 AWVALID, input
input saxi_awready, // AXI PS Slave GP0 AWREADY, output
......@@ -79,10 +79,14 @@ module simul_saxi_gp_wr(
input [ 3:0] sim_bresp_latency, // latency in writing data outside of the module
output [ 3:0] sim_wr_qos
localparam AW_FIFO_DEPTH = 3; // FIFO number of address bits to fit AW_FIFO_NUM (number is one bit wider)
localparam W_FIFO_DEPTH = 3; // FIFO number of address bits to fit W_FIFO_NUM
localparam [AW_FIFO_DEPTH:0] AW_FIFO_NUM = 8; // Maximal number of words in AW FIFO 8-words
localparam [W_FIFO_DEPTH:0] W_FIFO_NUM = 8; // Maximal number of words in AW 8-words
// TODO change these localparam to parameters
localparam AW_FIFO_DEPTH = 2; // 7; //3; // FIFO number of address bits to fit AW_FIFO_NUM (number is one bit wider)
localparam W_FIFO_DEPTH = 3; // 2; // 7; //3; // FIFO number of address bits to fit W_FIFO_NUM
localparam WREADY_DELAY_AFTER_LAST = 3; // negate wready for these number of clocks after wlast (0..7)
localparam [AW_FIFO_DEPTH:0] AW_FIFO_NUM = 1 << AW_FIFO_DEPTH; // 128; // 8; // Maximal number of words in AW FIFO 8-words
localparam [W_FIFO_DEPTH:0] W_FIFO_NUM = 1 << W_FIFO_DEPTH; // 8; // Maximal number of words in AW 8-words
localparam VALID_AWLOCK = 2'b0; // TODO
......@@ -98,7 +102,8 @@ To make it work, I set the (AR/AW)CACHE=0x11 and (AR/AW)PROT=0x00. In the CDMA d
The default values set by VHLS were 0x00 and 0x10 respectively, which is also the case in the last post.
reg [WREADY_DELAY_AFTER_LAST : 0] wlast_d = 0; // [3:0] extra bit, but should work with WREADY_DELAY_AFTER_LAST == 0
wire wlast_nready;
wire aw_nempty;
wire w_nempty;
reg [11:0] next_wr_address_w; // bits that are incremented in 32-bit mode (higher are kept according to AXI 4KB inc. limit)
......@@ -140,10 +145,18 @@ Alex
// priority transactions are backed up behind it." Whqt about demotion? Assuming it is not demoted
assign aresetn= ~rst; // probably not needed at all - docs say "do not use"
assign wlast_nready = (((1 << WREADY_DELAY_AFTER_LAST) -1) & wlast_d) != 0;
// generate ready signals for address and data
// assign wready= !wcount[7] && (!(&wcount[6:0]) || !fifo_data_we_d);
assign wready = (wcount < W_FIFO_NUM) && ((wcount < (W_FIFO_NUM-1)) || !fifo_data_we_d);
assign wready = ((wcount < W_FIFO_NUM) && ((wcount < (W_FIFO_NUM-1)) || !fifo_data_we_d)) && !wlast_nready;
always @ (posedge rst or posedge aclk) begin
if (rst) wlast_d<=0;
else wlast_d <= (wlast_d << 1) | {{WREADY_DELAY_AFTER_LAST{1'b0}}, (wlast & wready & wvalid)};
always @ (posedge rst or posedge aclk) begin
if (rst) fifo_data_we_d<=0;
else fifo_data_we_d <= wready && wvalid;
......@@ -65,7 +65,9 @@
// if specific sesnor is not defined, parallel sensor interface is used for all channels
/*************** CHANGE here and x393_hispi | x393_parallel | x393_lwir | x393_boson in bitstream (and few other) tool settings ****************/
// `define BOSON 1
// `define BO-SON 1
// `define BO-SON_REVA 1 /* 103993 REVA board*/ // need to comment both - TCL recognizes as "BO-SON"
// `define LWIR
// `define HISPI
// also change in bitstream, utilization and timimg summary tools (, ...)
......@@ -82,7 +84,6 @@
`ifdef BOSON
`define PCLK_MASTER /* pclk is generated by the sensors, no global pclk and prst!*/
`define BOSON_REVA 1 /* 103993 REVA board*/
`define MON_HISPI // Measure HISPI timing
// `define USE_OLD_XDCT393
......@@ -1712,7 +1712,7 @@ assign axi_grst = axi_rst_pre;
// SAXIGP0 signals (read unused) (for the histograms)
wire saxi0_aclk = hclk; // 150KHz
wire saxi0_aclk = camsync_clk; // hclk; // 150KHz -> 100 MHz
wire [31:0] saxi0_awaddr;
wire saxi0_awvalid;
wire saxi0_awready;
......@@ -1736,7 +1736,7 @@ assign axi_grst = axi_rst_pre;
wire [ 1:0] saxi0_bresp;
// SAXIGP1 signals (read unused) (for the event logger - has 3 spare channels for write)
wire saxi1_aclk = hclk; // 150KHz
wire saxi1_aclk = camsync_clk; // hclk; // 150KHz -> 100 MHz
wire [31:0] saxi1_awaddr;
wire saxi1_awvalid;
wire saxi1_awready;
......@@ -2633,7 +2633,7 @@ assign axi_grst = axi_rst_pre;
) event_logger_i (
// .rst (axi_rst), // input
.mclk (mclk), // input
.xclk (logger_clk), // input
.xclk (logger_clk), // input // 100 MHz
.mrst (mrst), // input
.xrst (lrst), // input
.cmd_ad (cmd_logger_ad), // input[7:0]
......@@ -2673,8 +2673,8 @@ assign axi_grst = axi_rst_pre;
.mclk (mclk), // input
.en (logger_saxi_en), // input
.iclk (mclk), // input
.data_in (logger_out), // input[15:0]
.valid (logger_stb), // input
.data_in (logger_out), // input[15:0] @posedge iclk input data
.valid (logger_stb), // input @posedge iclk input data valid
.has_burst (logger_has_burst), // output reg
.read_burst (logger_read_burst), // input
.data_out (logger_data32), // output[31:0]
No preview for this file type
This diff is collapsed.
Copyright 1986-2017 Xilinx, Inc. All Rights Reserved.
| Tool Version : Vivado v.2017.4 (lin64) Build 2086221 Fri Dec 15 20:54:30 MST 2017
| Date : Thu Mar 9 11:38:40 2023
| Date : Wed Mar 15 12:53:56 2023
| Host : elphel-desktop running 64-bit Ubuntu 14.04.5 LTS
| Command : report_utilization -file vivado_build/
| Design : x393
......@@ -31,13 +31,13 @@ Table of Contents
| Site Type | Used | Fixed | Available | Util% |
| Slice LUTs | 41694 | 0 | 78600 | 53.05 |
| LUT as Logic | 38299 | 0 | 78600 | 48.73 |
| LUT as Memory | 3395 | 0 | 26600 | 12.76 |
| Slice LUTs | 41831 | 0 | 78600 | 53.22 |
| LUT as Logic | 38434 | 0 | 78600 | 48.90 |
| LUT as Memory | 3397 | 0 | 26600 | 12.77 |
| LUT as Distributed RAM | 2858 | 0 | | |
| LUT as Shift Register | 537 | 0 | | |
| Slice Registers | 54275 | 0 | 157200 | 34.53 |
| Register as Flip Flop | 54275 | 0 | 157200 | 34.53 |
| LUT as Shift Register | 539 | 0 | | |
| Slice Registers | 54280 | 0 | 157200 | 34.53 |
| Register as Flip Flop | 54280 | 0 | 157200 | 34.53 |
| Register as Latch | 0 | 0 | 157200 | 0.00 |
| F7 Muxes | 54 | 0 | 39300 | 0.14 |
| F8 Muxes | 0 | 0 | 19650 | 0.00 |
......@@ -58,8 +58,8 @@ Table of Contents
| 0 | Yes | - | - |
| 16 | Yes | - | Set |
| 693 | Yes | - | Reset |
| 965 | Yes | Set | - |
| 52601 | Yes | Reset | - |
| 964 | Yes | Set | - |
| 52607 | Yes | Reset | - |
......@@ -69,27 +69,27 @@ Table of Contents
| Site Type | Used | Fixed | Available | Util% |
| Slice | 16481 | 0 | 19650 | 83.87 |
| SLICEL | 10863 | 0 | | |
| SLICEM | 5618 | 0 | | |
| LUT as Logic | 38299 | 0 | 78600 | 48.73 |
| using O5 output only | 5 | | | |
| using O6 output only | 29710 | | | |
| using O5 and O6 | 8584 | | | |
| LUT as Memory | 3395 | 0 | 26600 | 12.76 |
| Slice | 16512 | 0 | 19650 | 84.03 |
| SLICEL | 10885 | 0 | | |
| SLICEM | 5627 | 0 | | |
| LUT as Logic | 38434 | 0 | 78600 | 48.90 |
| using O5 output only | 3 | | | |
| using O6 output only | 29881 | | | |
| using O5 and O6 | 8550 | | | |
| LUT as Memory | 3397 | 0 | 26600 | 12.77 |
| LUT as Distributed RAM | 2858 | 0 | | |
| using O5 output only | 2 | | | |
| using O6 output only | 108 | | | |
| using O5 and O6 | 2748 | | | |
| LUT as Shift Register | 537 | 0 | | |
| using O5 output only | 249 | | | |
| using O6 output only | 236 | | | |
| using O5 and O6 | 52 | | | |
| LUT Flip Flop Pairs | 24440 | 0 | 78600 | 31.09 |
| fully used LUT-FF pairs | 4583 | | | |
| LUT-FF pairs with one unused LUT output | 17721 | | | |
| LUT-FF pairs with one unused Flip Flop | 17494 | | | |
| Unique Control Sets | 4658 | | | |
| LUT as Shift Register | 539 | 0 | | |
| using O5 output only | 256 | | | |
| using O6 output only | 233 | | | |
| using O5 and O6 | 50 | | | |
| LUT Flip Flop Pairs | 24431 | 0 | 78600 | 31.08 |
| fully used LUT-FF pairs | 4596 | | | |
| LUT-FF pairs with one unused LUT output | 17731 | | | |
| LUT-FF pairs with one unused Flip Flop | 17547 | | | |
| Unique Control Sets | 4592 | | | |
* Note: Review the Control Sets Report for more information regarding control sets.
......@@ -197,17 +197,17 @@ Table of Contents
| Ref Name | Used | Functional Category |
| FDRE | 52601 | Flop & Latch |
| LUT3 | 11385 | LUT |
| LUT6 | 10143 | LUT |
| LUT2 | 8260 | LUT |
| LUT4 | 7858 | LUT |
| LUT5 | 7614 | LUT |
| FDRE | 52607 | Flop & Latch |
| LUT3 | 11410 | LUT |
| LUT6 | 10331 | LUT |
| LUT2 | 8166 | LUT |
| LUT4 | 7799 | LUT |
| LUT5 | 7655 | LUT |
| RAMD32 | 4174 | Distributed Memory |
| CARRY4 | 2809 | CarryLogic |
| LUT1 | 1623 | LUT |
| RAMS32 | 1408 | Distributed Memory |
| FDSE | 965 | Flop & Latch |
| FDSE | 964 | Flop & Latch |
| FDCE | 693 | Flop & Latch |
| SRL16E | 485 | Distributed Memory |
| OBUFT | 121 | IO |
......@@ -83,10 +83,19 @@ create_generated_clock -name ddr3_clk [get_nets -hierarchical clk_pre ]
create_generated_clock -name ddr3_clk_div [get_nets -hierarchical clk_div_pre ]
create_generated_clock -name ddr3_mclk [get_nets -hierarchical mclk_pre]
create_generated_clock -name ddr3_clk_ref [get_nets clocks393_i/dly_ref_clk_pre ]
create_generated_clock -name axihp_clk [get_nets clocks393_i/hclk_pre ]
# trying to boost SAXI performance by forcing higher axihp_clk (5 ns instead of 6.667)
# create_clock -name axihp_clk -period 5 [get_nets clocks393_i/hclk_pre ]
# Will add/subtract to 6.667 ns period 150ns -> 190
# set_clock_uncertainty 1.4 [get_generated_clocks {axihp_clk}]
# Switched both SAXI0 and SAXI1 to 100 MHz from 150 MHz
create_generated_clock -name xclk [get_nets clocks393_i/xclk_pre ]
#clock for inter - camera synchronization and event logger
#clock for inter - camera synchronization and event logger (now for SAXI0/1 also, was 150)
create_generated_clock -name sclk [get_nets clocks393_i/sync_clk_pre ]
create_clock -name ffclk0 -period 41.667 [get_ports {ffclk0p}]
#Generated clocks are assumed to be tied to clkin1 (not 2), so until external ffclk0 is constrained, derivative clocks are not generated
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment