Commit db53f38f authored by Andrey Filippov's avatar Andrey Filippov

testing and simulating, improving timing

parent 4ca48f7b
......@@ -10,6 +10,8 @@ x393.prj
*DEBUG_VDT*
*.kate-swp
*.old
*.new
*.bad
*.pyc
*.pickle
py393/dbg*
......
......@@ -62,42 +62,42 @@
<link>
<name>vivado_logs/VivadoBitstream.log</name>
<type>1</type>
<location>/home/andrey/git/x393/vivado_logs/VivadoBitstream-20151031232832558.log</location>
<location>/home/andrey/git/x393/vivado_logs/VivadoBitstream-20151103114104932.log</location>
</link>
<link>
<name>vivado_logs/VivadoOpt.log</name>
<type>1</type>
<location>/home/andrey/git/x393/vivado_logs/VivadoOpt-20151031232832558.log</location>
<location>/home/andrey/git/x393/vivado_logs/VivadoOpt-20151103114104932.log</location>
</link>
<link>
<name>vivado_logs/VivadoOptPhys.log</name>
<type>1</type>
<location>/home/andrey/git/x393/vivado_logs/VivadoOptPhys-20151031232832558.log</location>
<location>/home/andrey/git/x393/vivado_logs/VivadoOptPhys-20151103114104932.log</location>
</link>
<link>
<name>vivado_logs/VivadoOptPower.log</name>
<type>1</type>
<location>/home/andrey/git/x393/vivado_logs/VivadoOptPower-20151031232832558.log</location>
<location>/home/andrey/git/x393/vivado_logs/VivadoOptPower-20151103114104932.log</location>
</link>
<link>
<name>vivado_logs/VivadoPlace.log</name>
<type>1</type>
<location>/home/andrey/git/x393/vivado_logs/VivadoPlace-20151031232832558.log</location>
<location>/home/andrey/git/x393/vivado_logs/VivadoPlace-20151103114104932.log</location>
</link>
<link>
<name>vivado_logs/VivadoRoute.log</name>
<type>1</type>
<location>/home/andrey/git/x393/vivado_logs/VivadoRoute-20151031232832558.log</location>
<location>/home/andrey/git/x393/vivado_logs/VivadoRoute-20151103114104932.log</location>
</link>
<link>
<name>vivado_logs/VivadoSynthesis.log</name>
<type>1</type>
<location>/home/andrey/git/x393/vivado_logs/VivadoSynthesis-20151031232335810.log</location>
<location>/home/andrey/git/x393/vivado_logs/VivadoSynthesis-20151103114104932.log</location>
</link>
<link>
<name>vivado_logs/VivadoTimimgSummaryReportImplemented.log</name>
<type>1</type>
<location>/home/andrey/git/x393/vivado_logs/VivadoTimimgSummaryReportImplemented-20151031232832558.log</location>
<location>/home/andrey/git/x393/vivado_logs/VivadoTimimgSummaryReportImplemented-20151103114104932.log</location>
</link>
<link>
<name>vivado_logs/VivadoTimimgSummaryReportSynthesis.log</name>
......@@ -107,7 +107,7 @@
<link>
<name>vivado_logs/VivadoTimingReportImplemented.log</name>
<type>1</type>
<location>/home/andrey/git/x393/vivado_logs/VivadoTimingReportImplemented-20150904164653967.log</location>
<location>/home/andrey/git/x393/vivado_logs/VivadoTimingReportImplemented-20151101221627109.log</location>
</link>
<link>
<name>vivado_logs/VivadoTimingReportSynthesis.log</name>
......
VivadoRoute_122_SkipSnapshotRoute=true
VivadoRoute_123_SkipSnapshotRoute=true
com.elphel.store.context.VivadoRoute=VivadoRoute_122_SkipSnapshotRoute<-@\#\#@->VivadoRoute_123_SkipSnapshotRoute<-@\#\#@->
VivadoRoute_125_directive_route=MoreGlobalIterations
com.elphel.store.context.VivadoRoute=VivadoRoute_122_SkipSnapshotRoute<-@\#\#@->VivadoRoute_123_SkipSnapshotRoute<-@\#\#@->VivadoRoute_125_directive_route<-@\#\#@->
eclipse.preferences.version=1
......@@ -249,7 +249,7 @@ module csconvert18a(
wire ystrt,nxtline;
reg [7:0] yaddr_r; // address for the external buffer memory to write 16x16x8bit Y data
reg ywe_r; // wrire enable of Y data
reg ywe_r; // write enable of Y data
reg [6:0] caddr_r; // address for the external buffer memory 2x8x8x8bit Cb+Cr data (MSB=0 - Cb, 1 - Cr)
reg cwe_r; // write enable for CbCr data
reg odd_pix; // odd pixel (assumes even number of pixels in a line
......@@ -267,10 +267,10 @@ module csconvert18a(
assign n000 = n000_r;
assign n255 = n255_r;
assign signed_y = signed_y_r; // - now signed char, -128(black) to +127 (white)
assign yaddr = yaddr_r;
assign ywe = ywe_r;
assign caddr = caddr_r;
assign cwe = cwe_r;
assign yaddr = yaddr_r2;
assign ywe = ywe_r2;
assign caddr = caddr_r2;
assign cwe = cwe_r2;
dly_16 #(.WIDTH(1)) i_strt_dly0 (.clk(CLK),.rst(1'b0), .dly(4'd15), .din(pre_first_in), .dout(strt_dly[0]));
dly_16 #(.WIDTH(1)) i_strt_dly1 (.clk(CLK),.rst(1'b0), .dly(4'd15), .din(strt_dly[0]), .dout(strt_dly[1]));
......@@ -532,20 +532,28 @@ Y[1,1]=(0x96*P[1,1]+ 0x1d*((P[1,0]+P[1,2])/2 + 0x4d*((P[0,1] +
reg [7:0] y;
// reg [7:0] y0; // bypass in monochrome mode
wire [7:0] y0 = pdc;
// wire [7:0] y0 = pdc;
reg [7:0] y0_r;
// wire [7:0] y0; // bypass in monochrome mode
reg [15:0] y1,y2,y3;
wire [15:0] y_sum =y1+y2+y3;
// TODO: insert register to ease mm1..3 -> y (OK to delay all outputs). Or is it not using DSP at all?
// wire [15:0] y_sum =y1+y2+y3;
reg [15:0] y_sum_r;
// always @ (posedge CLK) y0 <= pd1_dly; // m1; // equivalent
always @ (posedge CLK) y1 <= mm1;
always @ (posedge CLK) y2 <= mm2;
always @ (posedge CLK) y3 <= mm3;
// wire [7:0] pre_y= mono ? y0 : (y_sum[15:8]+y_sum[7]);
wire [7:0] pre_y= mono ? y0_r : (y_sum_r[15:8]+y_sum_r[7]);
// making y output signed -128..+127
wire [7:0] pre_y= mono ? y0 : (y_sum[15:8]+y_sum[7]);
always @ (posedge CLK) y[7:0] <= pre_y[7:0];
always @ (posedge CLK) signed_y_r[7:0] <= {~pre_y[7], pre_y[6:0]};
always @ (posedge CLK) begin
y1 <= mm1;
y2 <= mm2;
y3 <= mm3;
y0_r <= pdc;
y_sum_r <= y1+y2+y3;
y[7:0] <= pre_y[7:0];
signed_y_r[7:0] <= {~pre_y[7], pre_y[6:0]};
end
// Try easier and hope better algorithm of color extractions that should perform better on gradients.
......@@ -575,12 +583,21 @@ reg sub_y; // output accumulator/subtractor. 0 - load new data,
wire cwe0; // preliminary cwe_r (to be modulated by odd/even pixels)
reg cstrt; //ystrt dealyed by 1
reg cnxt; // nxtline delayed by 1
reg pre_sel_cbcrmult1;
// delaying, for now uing "old" ywe,cwe, yaddr,caddr - registering them on the output
always @ (posedge CLK) begin
if (~(ywe_r || ystrt || nxtline)) sel_cbcrmult1 <= ~(bayer_phase[1] ^ bayer_phase[0] ^ odd_line);
else sel_cbcrmult1 <= ~sel_cbcrmult1;
// if (~(ywe_r || ystrt || nxtline)) sel_cbcrmult1 <= ~(bayer_phase[1] ^ bayer_phase[0] ^ odd_line);
// else sel_cbcrmult1 <= ~sel_cbcrmult1;
if (~(ywe_r || ystrt || nxtline)) pre_sel_cbcrmult1 <= ~(bayer_phase[1] ^ bayer_phase[0] ^ odd_line);
else pre_sel_cbcrmult1 <= ~pre_sel_cbcrmult1;
sel_cbcrmult1 <=pre_sel_cbcrmult1;
sub_y <= ~sel_cbcrmult1;
cbcrmult1 <= sel_cbcrmult1?y[7:0]:pdc[7:0];
cbcrmult1 <= sel_cbcrmult1?y[7:0]:pdc[7:0];
// cbcrmult1 <= sel_cbcrmult1?y[7:0]:pdc[7:0];
cbcrmult1 <= sel_cbcrmult1?y[7:0]:y0_r[7:0]; // delayed by 1 clock
if (~ywe_r) use_cr <= ~(bayer_phase[1] ^ odd_line);
end
assign cbcrmult2 = use_cr?m_cr:m_cb; // maybe will need a register? (use_cr will still be good as it is valid early)
......@@ -601,23 +618,38 @@ end
dly_16 #(.WIDTH(1)) i_cwe0 (.clk(CLK),.rst(1'b0), .dly(4'd1), .din(ywe_r), .dout(cwe0));
//SRL16 i_cwe0 (.D(ywe_r ), .Q(cwe0), .A0(1'b1), .A1(1'b0), .A2(1'b0), .A3(1'b0), .CLK(CLK)); // dly=2=1+1
always @ (posedge CLK) begin
always @ (posedge CLK) begin
cstrt <= ystrt;
cnxt <= nxtline;
cwe_r <= cwe0 && sub_y;
// cwe_r <= cwe0 && sub_y;
cwe_r <= cwe0 && !sel_cbcrmult1;
caddr_r[2:0]<= cwe0?(caddr_r[2:0]+cwe_r):3'b0;
if (cstrt) caddr_r[6] <= ~bayer_phase[1];
else if (cnxt) caddr_r[6] <= ~caddr_r[6];
if (cstrt) caddr_r[5:3] <=3'b0;
else if (cnxt) caddr_r[5:3] <=(bayer_phase[1]^caddr_r[6])? caddr_r[5:3]:(caddr_r[5:3]+1);
end
end
// extra signals delayed by 1 clock
reg ywe_r2, cwe_r2;
reg [6:0] caddr_r2;
reg [7:0] yaddr_r2;
always @ (posedge CLK) begin
ywe_r2 <= ywe_r;
cwe_r2 <= cwe_r;
yaddr_r2 <= yaddr_r;
caddr_r2 <= caddr_r;
end
always @ (posedge CLK) begin
y_eq_0 <= (y0[7:0] == 8'h0);
y_eq_255 <= (y0[7:0] == 8'hff);
y_eq_0 <= (y0_r[7:0] == 8'h0);
y_eq_255 <= (y0_r[7:0] == 8'hff);
if (strt) n000_r[7:0] <= 8'h0;
else if ((n000_r[7:0]!=8'hff) && y_eq_0 && ywe_r) n000_r[7:0] <= n000_r[7:0]+1;
else if ((n000_r[7:0]!=8'hff) && y_eq_0 && ywe_r2) n000_r[7:0] <= n000_r[7:0]+1;
if (strt) n255_r[7:0] <= 8'h0;
else if ((n255_r[7:0]!=8'hff) && y_eq_255 && ywe_r) n255_r[7:0] <= n255_r[7:0]+1;
else if ((n255_r[7:0]!=8'hff) && y_eq_255 && ywe_r2) n255_r[7:0] <= n255_r[7:0]+1;
end
......
......@@ -85,7 +85,7 @@
* 1.69 SPH 03/19/13 Update tZQCS, tZQinit, tZQoper timing parameters
* 1.70 SPH 04/08/14 Update tRFC to PRECARGE check
*****************************************************************************************/
`define den4096Mb 1
`include "system_defines.vh"
// DO NOT CHANGE THE TIMESCALE
// MAKE SURE YOUR SIMULATOR USES "PS" RESOLUTION
`timescale 1ps / 1ps
......@@ -121,6 +121,19 @@ module ddr3 (
// to select the correct component density before continuing
ERROR: You must specify component density with +define+den____Mb.
`endif
initial begin
$display ("TCK_MIN = %d", TCK_MIN);
`ifdef sg15E
$display ("sg15E = `sg15E");
`endif
`ifdef sg093 // sg093 is equivalent to the JEDEC DDR3-2133 (14-14-14) speed bin
$display ("sg093");
`elsif sg15E
$display ("sg15E");
`endif
end
parameter check_strict_mrbits = 1;
parameter check_strict_timing = 1;
parameter feature_pasr = 1;
......
parameter FPGA_VERSION = 32'h0393005c; // 250MHz ???
parameter FPGA_VERSION = 32'h03930065; // (same rev) all met, using "old" (non-inverted) phase - OK (full phase range)
// parameter FPGA_VERSION = 32'h03930065; // switch phy_top.v (all met) - OK with inverted phase control (reduced phase range)
// parameter FPGA_VERSION = 32'h03930064; // switch mcomtr_sequencer.v (xclk not met) - wrong!
// parameter FPGA_VERSION = 32'h03930063; // switch mcntrl_linear_rw.v (met) good, worse mem valid phases
// parameter FPGA_VERSION = 32'h03930062; // (met)debugging - what was broken (using older versions of some files) - mostly OK (some glitches)
// parameter FPGA_VERSION = 32'h03930061; // restored bufr instead of bufio for memory high speed clock
// parameter FPGA_VERSION = 32'h03930060; // moving CLK1..3 in memory controller MMCM, keeping CLK0 and FB. Stuck at memory calib
// parameter FPGA_VERSION = 32'h0393005f; // restored mclk back to 200KHz, registers added to csconvert18a
// parameter FPGA_VERSION = 32'h0393005e; // trying mclk = 225 MHz (was 200MHz) define MCLK_VCO_MULT 18
// parameter FPGA_VERSION = 32'h0393005d; // trying mclk = 250 MHz (was 200MHz) define MCLK_VCO_MULT 20
// parameter FPGA_VERSION = 32'h0393005c; // 250MHz OK, no timing violations
// parameter FPGA_VERSION = 32'h0393005b; // 250MHz Not tested, timing violation in bit_stuffer_escape: xclk -0.808 -142.047 515
// parameter FPGA_VERSION = 32'h0393005a; // Trying xclk = 250MHz - timing viloations in xdct393, but particular hardware works
// parameter FPGA_VERSION = 32'h03930059; // 'new' (no pclk2x, no xclk2x clocks) sensor/converter w/o debug - OK
......
......@@ -138,7 +138,11 @@
parameter real REFCLK_FREQUENCY = 200.0, // 300.0,
parameter HIGH_PERFORMANCE_MODE = "FALSE",
parameter CLKIN_PERIOD = 20, // 10, //ns >1.25, 600<Fvco<1200 // Hardware 150MHz , change to | 6.667
`ifdef MCLK_VCO_MULT
parameter CLKFBOUT_MULT = `MCLK_VCO_MULT ,
`else
parameter CLKFBOUT_MULT = 16, // 8, // Fvco=Fclkin*CLKFBOUT_MULT_F/DIVCLK_DIVIDE, Fout=Fvco/CLKOUT#_DIVIDE | 16
`endif
parameter CLKFBOUT_MULT_REF = 16, // 18, // 9, // Fvco=Fclkin*CLKFBOUT_MULT_F/DIVCLK_DIVIDE, Fout=Fvco/CLKOUT#_DIVIDE | 6
parameter CLKFBOUT_DIV_REF = 4, // 200Mhz 3, // To get 300MHz for the reference clock
`else
......@@ -150,11 +154,12 @@
parameter CLKFBOUT_DIV_REF = 3, // To get 300MHz for the reference clock
`endif
parameter DIVCLK_DIVIDE= 1,
parameter CLKFBOUT_USE_FINE_PS= 0, //1, // 0 - old, 1 - new
parameter CLKFBOUT_PHASE = 0.000,
parameter SDCLK_PHASE = 0.000,
parameter CLK_PHASE = 0.000,
parameter CLK_PHASE = 0.000, //11.25, /// 0.000,
parameter CLK_DIV_PHASE = 0.000,
parameter MCLK_PHASE = 90.000,
parameter MCLK_PHASE = 90.000, //78.75, // 90.000,
parameter REF_JITTER1 = 0.010,
parameter SS_EN = "FALSE",
parameter SS_MODE = "CENTER_HIGH",
......
......@@ -193,12 +193,20 @@
task axi_set_phase;
input [PHASE_WIDTH-1:0] phase;
begin
reg [PHASE_WIDTH-1:0] inverted_phase;
begin
if (CLKFBOUT_USE_FINE_PS) begin
inverted_phase = -phase;
$display("SET CLOCK INVERTED PHASE (0x%x) to 0x%x @ %t",phase,inverted_phase,$time);
write_contol_register(LD_DLY_PHASE, {{(32-PHASE_WIDTH){1'b0}},inverted_phase}); // control regiter address
target_phase <= inverted_phase;
end else begin
$display("SET CLOCK PHASE to 0x%x @ %t",phase,$time);
write_contol_register(LD_DLY_PHASE, {{(32-PHASE_WIDTH){1'b0}},phase}); // control regiter address
write_contol_register(DLY_SET,0);
target_phase <= phase;
end
write_contol_register(DLY_SET,0);
end
endtask
task axi_set_wbuf_delay;
......
......@@ -153,6 +153,7 @@ module mcntrl393 #(
parameter CLKFBOUT_DIV_REF = 3, // To get 300MHz for the reference clock
`endif
parameter DIVCLK_DIVIDE= 1,
parameter CLKFBOUT_USE_FINE_PS= 1, // 0 - old, 1 - new
parameter CLKFBOUT_PHASE = 0.000,
parameter SDCLK_PHASE = 0.000,
parameter CLK_PHASE = 0.000,
......@@ -1816,6 +1817,7 @@ module mcntrl393 #(
.CLKFBOUT_MULT_REF (CLKFBOUT_MULT_REF),
.CLKFBOUT_DIV_REF (CLKFBOUT_DIV_REF),
.DIVCLK_DIVIDE (DIVCLK_DIVIDE),
.CLKFBOUT_USE_FINE_PS (CLKFBOUT_USE_FINE_PS),
.CLKFBOUT_PHASE (CLKFBOUT_PHASE),
.SDCLK_PHASE (SDCLK_PHASE),
.CLK_PHASE (CLK_PHASE),
......
......@@ -177,6 +177,7 @@ module mcntrl_linear_rw #(
reg [FRAME_HEIGHT_BITS-1:0] line_unfinished_r;
wire pre_want;
reg pre_want_r1;
wire [1:0] status_data;
wire [3:0] cmd_a;
wire [31:0] cmd_data;
......@@ -324,8 +325,9 @@ module mcntrl_linear_rw #(
assign frame_done= frame_done_r;
assign frame_finished= frame_finished_r;
// assign pre_want= chn_en && busy_r && !want_r && !xfer_start_r[0] && calc_valid && !last_block && !suspend && !frame_start_r[0];
assign pre_want= chn_en && busy_r && !want_r && !xfer_start_r[0] && calc_valid && !last_block && !suspend && !(|frame_start_r);
// assign pre_want= chn_en && busy_r && !want_r && !xfer_start_r[0] && calc_valid && !last_block && !suspend && !(|frame_start_r);
// accelerating pre_want:
assign pre_want= pre_want_r1 && !want_r && !xfer_start_r[0] && !suspend ;
assign last_in_row_w=(row_left=={{(FRAME_WIDTH_BITS-NUM_XFER_BITS){1'b0}},xfer_num128_r});
assign last_row_w= next_y==window_height;
......@@ -409,7 +411,7 @@ module mcntrl_linear_rw #(
if (mrst) want_r <= 0;
else if (chn_rst || xfer_grant || start_skip_r) want_r <= 0;
else if (pre_want && (page_cntr>{1'b0,cmd_extra_pages})) want_r <= 1;
else if (pre_want && (page_cntr > {1'b0,cmd_extra_pages})) want_r <= 1;
end
......@@ -469,6 +471,9 @@ wire start_not_partial= xfer_start_r[0] && !xfer_limited_by_mem_page_r;
// now have row start address, bank and row_left ;
// calculate number to read (min of row_left, maximal xfer and what is left in the DDR3 page
always @(posedge mclk) begin
// acceletaring pre_want
pre_want_r1 <= chn_en && !frame_done_r && busy_r && par_mod_r[PAR_MOD_LATENCY-2] && !(|frame_start_r[4:1]) && !last_block;
if (mrst) par_mod_r<=0;
else if (pgm_param_w ||
xfer_start_r[0] ||
......
......@@ -126,6 +126,7 @@ module memctrl16 #(
parameter CLKFBOUT_DIV_REF = 3, // To get 300MHz for the reference clock
`endif
parameter DIVCLK_DIVIDE= 1,
parameter CLKFBOUT_USE_FINE_PS= 1, // 0 - old, 1 - new
parameter CLKFBOUT_PHASE = 0.000,
parameter SDCLK_PHASE = 0.000,
parameter CLK_PHASE = 0.000,
......@@ -905,6 +906,7 @@ end
.CLKFBOUT_MULT_REF (CLKFBOUT_MULT_REF),
.CLKFBOUT_DIV_REF (CLKFBOUT_DIV_REF),
.DIVCLK_DIVIDE (DIVCLK_DIVIDE),
.CLKFBOUT_USE_FINE_PS (CLKFBOUT_USE_FINE_PS),
.CLKFBOUT_PHASE (CLKFBOUT_PHASE),
.SDCLK_PHASE (SDCLK_PHASE),
.CLK_PHASE (CLK_PHASE),
......
......@@ -78,7 +78,27 @@ reg set_r=0;
reg dci_disable_dqs_r, dci_disable_dq_r;
reg [7:0] ld_odly=8'b0, ld_idly=8'b0;
reg ld_odly_dqs,ld_idly_dqs,ld_odly_dm;
BUFR iclk_i (.O(iclk),.I(dqs_read), .CLR(1'b0),.CE(1'b1)); // OK, works with constraint? Seems now work w/o
/*
wire iclk_int;
//BUFR iclk_int_i (.O(iclk_int), .I(dqs_read), .CLR(1'b0),.CE(1'b1));
assign iclk_int = dqs_read && !rst;
BUFIO iclk_i (.O(iclk), .I(iclk_int));
CRITICAL WARNING: [Vivado 12-1411] Cannot set LOC property of ports, Could not legally place instance
mcntrl393_i/memctrl16_i/mcontr_sequencer_i/phy_cmd_i/phy_top_i/byte_lane0_i/dqs_i/iobufs_dqs_i/IBUFDS/IBUFDS_M at N7 (IOB_X1Y120
since it belongs to a shape containing instance mcntrl393_i/memctrl16_i/mcontr_sequencer_i/phy_cmd_i/phy_top_i/byte_lane0_i/iclk_i.
The shape requires relative placement between
mcntrl393_i/memctrl16_i/mcontr_sequencer_i/phy_cmd_i/phy_top_i/byte_lane0_i/dqs_i/iobufs_dqs_i/IBUFDS/IBUFDS_M and
mcntrl393_i/memctrl16_i/mcontr_sequencer_i/phy_cmd_i/phy_top_i/byte_lane0_i/iclk_i that cannnot be honored because it would result in
an invalid location for mcntrl393_i/memctrl16_i/mcontr_sequencer_i/phy_cmd_i/phy_top_i/byte_lane0_i/iclk_i. [x393.xdc:193]
----------
ERROR: [DRC 23-20] Rule violation (RTSTAT-1) Unrouted net - 2 net(s) are unrouted. The problem bus(es) and/or net(s) are
mcntrl393_i/memctrl16_i/mcontr_sequencer_i/phy_cmd_i/phy_top_i/byte_lane1_i/iclk_int,
mcntrl393_i/memctrl16_i/mcontr_sequencer_i/phy_cmd_i/phy_top_i/byte_lane0_i/iclk_int.
*/
wire [9:0] decode_sel={
(dly_addr[3:0]==9)?1'b1:1'b0,
(dly_addr[3:0]==8)?1'b1:1'b0,
......
......@@ -80,6 +80,7 @@ module mcontr_sequencer #(
parameter CLKFBOUT_MULT_REF = 9, // Fvco=Fclkin*CLKFBOUT_MULT_F/DIVCLK_DIVIDE, Fout=Fvco/CLKOUT#_DIVIDE
parameter CLKFBOUT_DIV_REF = 3, // To get 300MHz for the reference clock
parameter DIVCLK_DIVIDE= 1,
parameter CLKFBOUT_USE_FINE_PS= 1, // 0 - old, 1 - new
parameter CLKFBOUT_PHASE = 0.000,
parameter SDCLK_PHASE = 0.000,
parameter CLK_PHASE = 0.000,
......@@ -551,9 +552,9 @@ module mcontr_sequencer #(
.CLKFBOUT_MULT_REF (CLKFBOUT_MULT_REF),
.CLKFBOUT_DIV_REF (CLKFBOUT_DIV_REF),
.DIVCLK_DIVIDE (DIVCLK_DIVIDE),
.CLKFBOUT_USE_FINE_PS (CLKFBOUT_USE_FINE_PS),
.CLKFBOUT_PHASE (CLKFBOUT_PHASE),
.SDCLK_PHASE (SDCLK_PHASE), /// debugging
.CLK_PHASE (CLK_PHASE),
.CLK_DIV_PHASE (CLK_DIV_PHASE),
.MCLK_PHASE (MCLK_PHASE),
......
......@@ -36,6 +36,7 @@ module phy_cmd#(
parameter CLKFBOUT_MULT_REF = 9, // Fvco=Fclkin*CLKFBOUT_MULT_F/DIVCLK_DIVIDE, Fout=Fvco/CLKOUT#_DIVIDE
parameter CLKFBOUT_DIV_REF = 3, // To get 300MHz for the reference clock
parameter DIVCLK_DIVIDE= 1,
parameter CLKFBOUT_USE_FINE_PS= 1, // 0 - old, 1 - new
parameter CLKFBOUT_PHASE = 0.000,
parameter SDCLK_PHASE = 0.000,
parameter CLK_PHASE = 0.000,
......@@ -379,6 +380,7 @@ module phy_cmd#(
.CLKFBOUT_MULT_REF(CLKFBOUT_MULT_REF),
.CLKFBOUT_DIV_REF (CLKFBOUT_DIV_REF),
.DIVCLK_DIVIDE (DIVCLK_DIVIDE),
.CLKFBOUT_USE_FINE_PS (CLKFBOUT_USE_FINE_PS),
.CLKFBOUT_PHASE (CLKFBOUT_PHASE),
.SDCLK_PHASE (SDCLK_PHASE),
.CLK_PHASE (CLK_PHASE),
......
......@@ -43,6 +43,7 @@ module phy_top #(
parameter CLKFBOUT_MULT_REF = 9, // Fvco=Fclkin*CLKFBOUT_MULT_F/DIVCLK_DIVIDE, Fout=Fvco/CLKOUT#_DIVIDE
parameter CLKFBOUT_DIV_REF = 3, // To get 300MHz for the reference clock
parameter DIVCLK_DIVIDE= 1,
parameter CLKFBOUT_USE_FINE_PS =1, // if 1 move CLKFBOUT_PHASE and SDCLK_PHASE, if 0 - other outputs (moved phases should be 0/same)
parameter CLKFBOUT_PHASE = 0.000,
parameter SDCLK_PHASE = 0.000,
parameter CLK_PHASE = 0.000,
......@@ -312,6 +313,7 @@ wire sdclk; // BUFIO
// So shifting phase dynamically by plus/- 113 moves SDCLK by a full period (2.5ns) forward and backward (113= 0x71)
wire clk_pre, clk_div_pre, sdclk_pre, mclk_pre, clk_fb;
BUFR clk_bufr_i (.O(clk), .CE(), .CLR(), .I(clk_pre));
//BUFIO clk_buf_i (.O(clk), .I(clk_pre));
BUFR clk_div_bufr_i (.O(clk_div), .CE(), .CLR(), .I(clk_div_pre));
BUFIO iclk_bufio_i (.O(sdclk), .I(sdclk_pre) );
//BUFIO clk_ref_i (.O(ref_clk), .I(clk_ref_pre));
......@@ -325,19 +327,21 @@ BUFG mclk_i (.O(mclk),.I(mclk_pre) );
.BANDWIDTH (BANDWIDTH),
.CLKFBOUT_MULT_F (CLKFBOUT_MULT),
.DIVCLK_DIVIDE (DIVCLK_DIVIDE),
.CLKFBOUT_PHASE (CLKFBOUT_PHASE),
.CLKOUT0_PHASE (SDCLK_PHASE),
.CLKOUT1_PHASE (CLK_PHASE),
.CLKOUT2_PHASE (CLK_DIV_PHASE),
.CLKOUT3_PHASE (MCLK_PHASE),
.CLKFBOUT_PHASE (CLKFBOUT_USE_FINE_PS? 0.0 : CLKFBOUT_PHASE),
.CLKOUT0_PHASE (CLKFBOUT_USE_FINE_PS? 0.0 : SDCLK_PHASE),
.CLKOUT1_PHASE (CLKFBOUT_USE_FINE_PS? CLK_PHASE : 0.0),
.CLKOUT2_PHASE (CLKFBOUT_USE_FINE_PS? CLK_DIV_PHASE : 0.0),
.CLKOUT3_PHASE (CLKFBOUT_USE_FINE_PS? MCLK_PHASE : 90.000), // (78.75), // (MCLK_PHASE), // should be multiple of 11.25 (90.000/8)
//ERROR: [DRC 23-20] Rule violation (AVAL-139) Phase shift check - The MMCME2_ADV cell mcntrl393_i/memctrl16_i/mcontr_sequencer_i/phy_cmd_i/phy_top_i/mmcm_phase_cntr_i/MMCME2_ADV_i has a fractional CLKOUT3_PHASE value (75.000) with CLKOUT3_USE_FINE_PS set to FALSE. It should be a multiple of [45 / CLKOUT3_DIVIDE] = [45 / 4] = 11.250.
// .CLKOUT4_PHASE (0.000),
// .CLKOUT5_PHASE (0.000),
// .CLKOUT6_PHASE (0.000),
.CLKFBOUT_USE_FINE_PS ("FALSE"),
.CLKOUT0_USE_FINE_PS ("FALSE"),
.CLKOUT1_USE_FINE_PS ("TRUE"),
.CLKOUT2_USE_FINE_PS ("TRUE"),
.CLKOUT3_USE_FINE_PS ("TRUE"),
.CLKFBOUT_USE_FINE_PS (CLKFBOUT_USE_FINE_PS? "TRUE" : "FALSE"),
.CLKOUT0_USE_FINE_PS (CLKFBOUT_USE_FINE_PS? "TRUE" : "FALSE"),
.CLKOUT1_USE_FINE_PS (CLKFBOUT_USE_FINE_PS? "FALSE" : "TRUE"),
.CLKOUT2_USE_FINE_PS (CLKFBOUT_USE_FINE_PS? "FALSE" : "TRUE"),
.CLKOUT3_USE_FINE_PS (CLKFBOUT_USE_FINE_PS? "FALSE" : "TRUE"),
// .CLKOUT4_USE_FINE_PS("FALSE"),
// .CLKOUT5_USE_FINE_PS("FALSE"),
// .CLKOUT6_USE_FINE_PS("FALSE"),
......
This diff is collapsed.
......@@ -393,6 +393,11 @@ class ImportVerilogParameters(object):
# Try binary operation
# repeat until end of line or ')'
while True:
try:
opStart=skipWS(operand1[2])
except:
print("line=\n",line)
print("defines=\n",self.defines)
opStart=skipWS(operand1[2])
if (opStart == len(line)) : # or (line[opStart] == ')'): # just primary
return operand1
......@@ -485,7 +490,7 @@ class ImportVerilogParameters(object):
# Macro substitution excluding the very first character
if "`" in line [1:]:
for define in self.defines:
line.replace("`"+define,self.defines[define])
line = line.replace("`"+define,self.defines[define])
if line[0]== "`":
tokens=line[1:].replace("\t"," ").split(" ",1) #second tokens
for i in (1,2):
......
This diff is collapsed.
......@@ -1361,6 +1361,8 @@ class X393McntrlAdjust(object):
y=a*phase+b
y0=y
#find the lowest approximate solution to consider
if (quiet <2):
print("phase= %d, y=%f, variantStep=%f"%(phase,y,variantStep))
if y0 > (-max_dly_err):
while (y0 >= (variantStep-max_dly_err)):
y0 -= variantStep
......@@ -1369,6 +1371,8 @@ class X393McntrlAdjust(object):
while (y0<(-max_dly_err)):
y0 += variantStep
periods += 1
if (quiet <2):
print("y0=%f"%(y0))
dly_min= max(0,int(y0-4.5))
dly_max= min(max_lin_dly,int(y0+5.5))
dly_to_try=[]
......
......@@ -88,6 +88,12 @@ class X393McntrlTiming(object):
if phase is None:
phase= vrlg.get_default("DLY_PHASE")
vrlg.DLY_PHASE=phase & ((1<<vrlg.PHASE_WIDTH)-1)
if vrlg.CLKFBOUT_USE_FINE_PS:
phase_value = (-vrlg.DLY_PHASE) & ((1<<vrlg.PHASE_WIDTH)-1)
if quiet<2:
print("SET INVERTED CLOCK PHASE=0x%x (actual value is 0x%x)"%(vrlg.DLY_PHASE, phase_value))
self.x393_axi_tasks.write_control_register(vrlg.LD_DLY_PHASE, phase_value) # {{(32-PHASE_WIDTH){1'b0}},phase}); // control register address
else:
if quiet<2:
print("SET CLOCK PHASE=0x%x"%(vrlg.DLY_PHASE))
self.x393_axi_tasks.write_control_register(vrlg.LD_DLY_PHASE,vrlg.DLY_PHASE) # {{(32-PHASE_WIDTH){1'b0}},phase}); // control register address
......@@ -106,6 +112,9 @@ class X393McntrlTiming(object):
<wait_seq> read and re-send status request to make sure status reflects new data (just for testing, too fast for Python)
Returns 1 if success, 0 if timeout
"""
if vrlg.CLKFBOUT_USE_FINE_PS:
patt = 0x3000000 | ((-vrlg.DLY_PHASE) & 0xff)
else:
patt = 0x3000000 | vrlg.DLY_PHASE
mask = 0x3000100
if check_phase_value:
......@@ -452,6 +461,9 @@ class X393McntrlTiming(object):
Wait until clock phase shifter is ready
"""
data=self.x393_axi_tasks.read_status(vrlg.MCONTR_PHY_STATUS_REG_ADDR)
expected_phase = vrlg.DLY_PHASE
if (vrlg.CLKFBOUT_USE_FINE_PS):
expected_phase = (-expected_phase) & 0xff;
while (((data & vrlg.STATUS_PSHIFTER_RDY_MASK) == 0) or (((data ^ vrlg.DLY_PHASE) & 0xff) != 0)):
data=self.x393_axi_tasks.read_status(vrlg.MCONTR_PHY_STATUS_REG_ADDR)
if self.DRY_MODE: break
......
......@@ -9,6 +9,16 @@
// `define USE_PCLK2X
// `define USE_XCLK2X
// `define DEBUG_RING 1
// `define MCLK_VCO_MULT 16
// DDR3 memory speed grade and density
`define sg25 1
// `define sg15E 1
// `define sg187E 1
`define den4096Mb 1
`define MCLK_VCO_MULT 16
// `define MCLK_VCO_MULT 18
// `define MCLK_VCO_MULT 20
`define MEMBRIDGE_DEBUG_WRITE 1
// Enviroment-dependent options
......
......@@ -30,7 +30,7 @@ module fifo_1cycle
(
input rst, // reset, active high
input clk, // clock - positive edge
input srst, // sync reset
input sync_rst, // sync reset
input we, // write enable
input re, // read enable
input [DATA_WIDTH-1:0] data_in, // input data
......@@ -61,20 +61,20 @@ module fifo_1cycle
always @ (posedge clk or posedge rst) begin
if (rst) fill <= 0;
else if (srst) fill <= 0;
else if (sync_rst) fill <= 0;
else fill <= next_fill;
if (rst) wa <= 0;
else if (srst) wa <= 0;
else if (sync_rst) wa <= 0;
else if (we) wa <= wa+1;
if (rst) ra <= 0;
else if (srst) ra <= 0;
else if (sync_rst) ra <= 0;
else if (re) ra <= ra+1;
else if (fill==0) ra <= wa; // Just recover from bit errors
if (rst) nempty <= 0;
else if (srst) nempty <= 0;
else if (sync_rst) nempty <= 0;
else nempty <= (next_fill!=0);
......
......@@ -55,7 +55,7 @@ module fifo_same_clock
reg [DATA_WIDTH-1:0] outreg;
reg [DATA_DEPTH-1:0] ra;
reg [DATA_DEPTH-1:0] wa;
wire [DATA_DEPTH-1:0] next_fill;
// wire [DATA_DEPTH-1:0] next_fill;
reg wem;
wire rem;
reg out_full=0; //output register full
......@@ -63,7 +63,7 @@ module fifo_same_clock
reg ram_nempty;
assign next_fill = fill[DATA_DEPTH-1:0]+((wem && ~rem)?1:((~wem && rem && ram_nempty)?-1:0));
// assign next_fill = fill[DATA_DEPTH-1:0]+((wem && ~rem)?1:((~wem && rem && ram_nempty)?-1:0));
assign rem= ram_nempty && (re || !out_full);
assign data_out=outreg;
assign nempty=out_full;
......@@ -75,7 +75,9 @@ module fifo_same_clock
always @ (posedge clk or posedge rst) begin
if (rst) fill <= 0;
else if (sync_rst) fill <= 0;
else fill <= next_fill;
// else fill <= next_fill;
else if ( wem && ~rem) fill <= fill + 1;
else if (~wem && rem) fill <= fill - 1;
if (rst) wem <= 0;
else if (sync_rst) wem <= 0;
......@@ -83,7 +85,9 @@ module fifo_same_clock
if (rst) ram_nempty <= 0;
else if (sync_rst) ram_nempty <= 0;
else ram_nempty <= (next_fill != 0);
// else ram_nempty <= (next_fill != 0);
// else ram_nempty <= wem || (|fill[DATA_DEPTH-1:1]) || (fill[0] && !rem);
else ram_nempty <= (|fill[DATA_DEPTH-1:1]) || (fill[0] && wem) || ((fill[0] || wem) && !rem) ;
if (rst) wa <= 0;
else if (sync_rst) wa <= 0;
......
......@@ -84,7 +84,9 @@ module status_router16(
wire [1:0] rq_int;
wire [1:0] start_int; // only for the first cycle, combinatorial
status_router2 status_router2_top_i (
status_router2 #(
.FIFO_TYPE ("TWO_CYCLE") //= "ONE_CYCLE" // higher latency, but easier timing - use on some levels (others - default "ONE_CYCLE")
) status_router2_top_i (
.rst (rst), // input
.clk (clk), // input
.srst (srst), // input
......
......@@ -22,7 +22,9 @@
`timescale 1ns/1ps
`include "system_defines.vh"
//`define DEBUG_FIFO 1
module status_router2 (
module status_router2 #(
parameter FIFO_TYPE = "ONE_CYCLE" // "TWO_CYCLE"
)(
input rst,
input clk,
input srst, // sync reset
......@@ -99,26 +101,28 @@ module status_router2 (
end
/* fifo_same_clock has currently latency of 2 cycles, use smth. faster here? - fifo_1cycle (but it has unregistered data output) */
generate
if (FIFO_TYPE == "ONE_CYCLE") begin
fifo_1cycle #(
.DATA_WIDTH(9),
.DATA_DEPTH(4) // 16
) fifo_in0_i (
.rst (1'b0), // rst), // input
.clk (clk), // input
.srst (srst), // input
.sync_rst (srst), // input
.we (start_rcv[0] || rcv_rest_r[0]), // input
.re (fifo_re[0]), // input
.data_in ({rcv_rest_r[0] & ~rq_in[0], db_in0}), // input[8:0] MSB marks last byte
.data_out ({fifo_last_byte[0],fifo0_out}), // output[8:0]
.nempty (fifo_nempty_pre[0]), // output
.nempty (fifo_nempty_pre[0]), // output reg
.half_full (fifo_half_full[0]) // output reg
`ifdef DEBUG_FIFO
`ifdef DEBUG_FIFO
,.under(), // output reg
.over(), // output reg
.wcount(), // output[3:0] reg
.rcount(), // output[3:0] reg
.num_in_fifo() // output[3:0]
`endif
`endif
);
fifo_1cycle #(
......@@ -127,21 +131,67 @@ module status_router2 (
) fifo_in1_i (
.rst (1'b0), // rst), // input
.clk (clk), // input
.srst (srst), // input
.sync_rst (srst), // input
.we (start_rcv[1] || rcv_rest_r[1]), // input
.re (fifo_re[1]), // input
.data_in ({rcv_rest_r[1] & ~rq_in[1], db_in1}), // input[8:0] MSB marks last byte
.data_out ({fifo_last_byte[1],fifo1_out}), // output[8:0]
.nempty (fifo_nempty_pre[1]), // output
.nempty (fifo_nempty_pre[1]), // output reg
.half_full (fifo_half_full[1]) // output reg
`ifdef DEBUG_FIFO
`ifdef DEBUG_FIFO
,.under(), // output reg
.over(), // output reg
.wcount(), // output[3:0] reg
.rcount(), // output[3:0] reg
.num_in_fifo() // output[3:0]
`endif
`endif
);
end else begin
fifo_same_clock #(
.DATA_WIDTH(9),
.DATA_DEPTH(4) // 16
) fifo_in0_i (
.rst (1'b0), // rst), // input
.clk (clk), // input
.sync_rst (srst), // input
.we (start_rcv[0] || rcv_rest_r[0]), // input
.re (fifo_re[0]), // input
.data_in ({rcv_rest_r[0] & ~rq_in[0], db_in0}), // input[8:0] MSB marks last byte
.data_out ({fifo_last_byte[0],fifo0_out}), // output[8:0]
.nempty (fifo_nempty_pre[0]), // output reg
.half_full (fifo_half_full[0]) // output reg
`ifdef DEBUG_FIFO
,.under(), // output reg
.over(), // output reg
.wcount(), // output[3:0] reg
.rcount(), // output[3:0] reg
.num_in_fifo() // output[3:0]
`endif
);
fifo_same_clock #(
.DATA_WIDTH(9),
.DATA_DEPTH(4) // 16
) fifo_in1_i (
.rst (1'b0), // rst), // input
.clk (clk), // input
.sync_rst (srst), // input
.we (start_rcv[1] || rcv_rest_r[1]), // input
.re (fifo_re[1]), // input
.data_in ({rcv_rest_r[1] & ~rq_in[1], db_in1}), // input[8:0] MSB marks last byte
.data_out ({fifo_last_byte[1],fifo1_out}), // output[8:0]
.nempty (fifo_nempty_pre[1]), // output reg
.half_full (fifo_half_full[1]) // output reg
`ifdef DEBUG_FIFO
,.under(), // output reg
.over(), // output reg
.wcount(), // output[3:0] reg
.rcount(), // output[3:0] reg
.num_in_fifo() // output[3:0]
`endif
);
end
endgenerate
// one car per green (round robin priority)
// start sending out with with one cycle latency - now 2 cycles because of the FIFO
......
......@@ -47,7 +47,9 @@ module status_router4(
wire [1:0] rq_int;
wire [1:0] start_int; // only for the first cycle, combinatorial
status_router2 status_router2_top_i (
status_router2 #(
.FIFO_TYPE ("TWO_CYCLE") //= "ONE_CYCLE" // higher latency, but easier timing - use on some levels (others - default "ONE_CYCLE")
) status_router2_top_i (
.rst (rst), // input
.clk (clk), // input
.srst (srst), // input
......
......@@ -59,7 +59,9 @@ module status_router8(
wire [1:0] rq_int;
wire [1:0] start_int; // only for the first cycle, combinatorial
status_router2 status_router2_top_i (
status_router2 #(
.FIFO_TYPE ("ONE_CYCLE") //= "ONE_CYCLE" // higher latency, but easier timing - use on some levels (others - default "ONE_CYCLE")
) status_router2_top_i (
.rst (rst), // input
.clk (clk), // input
.srst (srst), // input
......
......@@ -32,7 +32,8 @@ module sync_resets#(
);
reg en_locked=0; // mostly for simulation, locked[0] is 1'bx until the first clock[0] pulse
wire [WIDTH-1:0] rst_w; // resets matching input clocks
wire rst_early_master;
wire rst_early_master_w;
reg rst_early_master;
assign rst = rst_w;
reg mrst = 1;
always @ (posedge arst or posedge clk[0]) begin
......@@ -43,13 +44,16 @@ module sync_resets#(
if (arst) mrst <= 1;
else mrst <= ~(locked[0] && en_locked);
end
always @(posedge clk[0]) begin
rst_early_master <= rst_early_master_w;
end
level_cross_clocks #(
.WIDTH (1),
.REGISTER (REGISTER)
) level_cross_clocks_mrst_i (
.clk (clk[0]), // input
.d_in (mrst), // input[0:0]
.d_out (rst_early_master) // output[0:0]
.d_out (rst_early_master_w) // output[0:0]
);
generate
......@@ -57,7 +61,7 @@ module sync_resets#(
for (i = 1; i < WIDTH; i = i + 1) begin: rst_block
level_cross_clocks #(
.WIDTH (1),
.REGISTER (REGISTER)
.REGISTER ((i==5) ? 1: REGISTER) // disable for aclk
) level_cross_clocks_rst_i (
.clk (clk[i]), // input
.d_in (mrst || rst_early_master || ~locked[i] ), // input[0:0]
......
......@@ -276,7 +276,8 @@ module ddr3_wrap#(
assign #(DQS_IN_DELAY_H) DQSU = en_dqs_in[1]? DQSU_DH3: 1'bz;
assign #(DQS_IN_DELAY_H) NDQSU = en_dqs_in[1]? NDQSU_DH3: 1'bz;
ddr3 #(
ddr3
/*#(
.TCK_MIN (2500),
.TJIT_PER (100),
.TJIT_CC (200),
......@@ -444,7 +445,8 @@ module ddr3_wrap#(
.DIFF_GROUP (2'd2),
.SIMUL_500US (5),
.SIMUL_200US (2)
) ddr3_i (
) */
ddr3_i (
.rst_n (SDRST_D), // input
.ck (SDCLK_D), // input
.ck_n (SDNCLK_D), // input
......
......@@ -1026,6 +1026,7 @@ assign axi_grst = axi_rst_pre;
.CLKFBOUT_MULT_REF (CLKFBOUT_MULT_REF),
.CLKFBOUT_DIV_REF (CLKFBOUT_DIV_REF),
.DIVCLK_DIVIDE (DIVCLK_DIVIDE),
.CLKFBOUT_USE_FINE_PS (CLKFBOUT_USE_FINE_PS),
.CLKFBOUT_PHASE (CLKFBOUT_PHASE),
.SDCLK_PHASE (SDCLK_PHASE),
.CLK_PHASE (CLK_PHASE),
......
This diff is collapsed.
......@@ -1296,6 +1296,7 @@ assign bresp= x393_i.ps7_i.MAXIGP0BRESP;
.CLKFBOUT_DIV_REF (CLKFBOUT_DIV_REF),
.DIVCLK_DIVIDE (DIVCLK_DIVIDE),
.CLKFBOUT_PHASE (CLKFBOUT_PHASE),
.CLKFBOUT_USE_FINE_PS (CLKFBOUT_USE_FINE_PS),
.SDCLK_PHASE (SDCLK_PHASE),
.CLK_PHASE (CLK_PHASE),
.CLK_DIV_PHASE (CLK_DIV_PHASE),
......
......@@ -1423,6 +1423,7 @@ assign bresp= x393_i.ps7_i.MAXIGP0BRESP;
.CLKFBOUT_MULT_REF (CLKFBOUT_MULT_REF),
.CLKFBOUT_DIV_REF (CLKFBOUT_DIV_REF),
.DIVCLK_DIVIDE (DIVCLK_DIVIDE),
.CLKFBOUT_USE_FINE_PS (CLKFBOUT_USE_FINE_PS),
.CLKFBOUT_PHASE (CLKFBOUT_PHASE),
.SDCLK_PHASE (SDCLK_PHASE),
.CLK_PHASE (CLK_PHASE),
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment