/*!
* Module:byte_lane
* @file byte_lane.v
* @date 2014-04-26
* @author Andrey Filippov
*
* @brief DDR3 byte lane, including DQS I/O, 8xDQ I/O and DM output
*
* @copyright Copyright (c) 2014 Elphel, Inc.
*
* License:
*
* byte_lane.v is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* byte_lane.v is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*
* Additional permission under GNU GPL version 3 section 7:
* If you modify this Program, or any covered work, by linking or combining it
* with independent modules provided by the FPGA vendor only (this permission
* does not extend to any 3-rd party modules, "soft cores" or macros) under
* different license terms solely for the purpose of generating binary "bitstream"
* files and/or simulating the code, the copyright holders of this Program give
* you the right to distribute the covered work without those independent modules
* as long as the source code for them is available from the FPGA vendor free of
* charge, and there is no dependence on any encrypted modules for simulating of
* the combined code. This permission applies to you if the distributed code
* contains all the components and scripts required to completely simulate it
* with at least one of the Free Software programs.
*/
`timescale 1ns/1ps
`include "system_defines.vh"
// minimizing total DQS in delay to match DQ (finedelay stage adds some?)
//`define NOFINEDELAY_DQS 1
module byte_lane #(
parameter IODELAY_GRP ="IODELAY_MEMORY",
parameter IBUF_LOW_PWR ="TRUE",
parameter IOSTANDARD_DQ = "SSTL15_T_DCI",
parameter IOSTANDARD_DM = "SSTL15",
parameter IOSTANDARD_DQS = "DIFF_SSTL15_T_DCI",
parameter SLEW_DQ = "SLOW",
parameter SLEW_DQS = "SLOW",
parameter real REFCLK_FREQUENCY = 300.0,
parameter HIGH_PERFORMANCE_MODE = "FALSE"
)(
inout [7:0] dq, // DQ I/O pads
// inout dm, // DM I/O pad (actually only output)
output dm, // DM I/O pad (actually only output)
inout dqs, // DQS I/O pad
inout ndqs, // ~DQS I/O pad
input clk, // free-running system clock, same frequency as iclk (shared for R/W)
input clk_div, // free-running half clk frequency, front aligned to clk (shared for R/W)
input inv_clk_div, // invert clk_div for R channels (clk_div is shared between R and W)
input rst,
input dci_disable_dqs, // disable DCI termination during writes and idle for dqs
input dci_disable_dq, // disable DCI termination during writes and idle for dq and dm signals
input [31:0] din, // parallel data to be sent out (4 bits per DG I/))
input [3:0] din_dm, // parallel data to be sent out over DM
input [3:0] tin_dq, // tristate for data out (sent out earlier than data!) and dm
input [3:0] din_dqs, // parallel data to be sent out over DQS
input [3:0] tin_dqs, // tristate for DQS out (sent out earlier than data!)
output [31:0] dout, // parallel data received from DDR3 memory, 4 bits per DQ I/O
input [7:0] dly_data, // delay value (3 LSB - fine delay)
input [4:0] dly_addr, // select which delay to program
input ld_delay, // load delay data to selected iodelay (clk_div synchronous)
input set // clk_div synchronous set all delays from previously loaded values
);
wire dqs_read;
wire iclk; // source-synchronous clock (BUFR from DQS)
reg [31:0] din_r=0;
// Preventing register removal of equivalent registers
`ifndef IGNORE_ATTR
(* keep = "true" *)
`endif
reg [3:0] din_dm_r=0, din_dqs_r=0, tin_dq_r=4'hf, tin_dqs_r=4'hf;
`ifndef IGNORE_ATTR
(* keep = "true" *)
`endif
reg [7:0] dly_data_r=0;
`ifndef IGNORE_ATTR
(* keep = "true" *)
`endif
reg set_r=0;
`ifndef IGNORE_ATTR
(* keep = "true" *)
`endif
reg dci_disable_dqs_r, dci_disable_dq_r;
reg [7:0] ld_odly=8'b0, ld_idly=8'b0;
reg ld_odly_dqs,ld_idly_dqs,ld_odly_dm;
BUFR iclk_i (.O(iclk),.I(dqs_read), .CLR(1'b0),.CE(1'b1)); // OK, works with constraint? Seems now work w/o
/*
wire iclk_int;
//BUFR iclk_int_i (.O(iclk_int), .I(dqs_read), .CLR(1'b0),.CE(1'b1));
assign iclk_int = dqs_read && !rst;
BUFIO iclk_i (.O(iclk), .I(iclk_int));
CRITICAL WARNING: [Vivado 12-1411] Cannot set LOC property of ports, Could not legally place instance
mcntrl393_i/memctrl16_i/mcontr_sequencer_i/phy_cmd_i/phy_top_i/byte_lane0_i/dqs_i/iobufs_dqs_i/IBUFDS/IBUFDS_M at N7 (IOB_X1Y120
since it belongs to a shape containing instance mcntrl393_i/memctrl16_i/mcontr_sequencer_i/phy_cmd_i/phy_top_i/byte_lane0_i/iclk_i.
The shape requires relative placement between
mcntrl393_i/memctrl16_i/mcontr_sequencer_i/phy_cmd_i/phy_top_i/byte_lane0_i/dqs_i/iobufs_dqs_i/IBUFDS/IBUFDS_M and
mcntrl393_i/memctrl16_i/mcontr_sequencer_i/phy_cmd_i/phy_top_i/byte_lane0_i/iclk_i that cannnot be honored because it would result in
an invalid location for mcntrl393_i/memctrl16_i/mcontr_sequencer_i/phy_cmd_i/phy_top_i/byte_lane0_i/iclk_i. [x393.xdc:193]
----------
ERROR: [DRC 23-20] Rule violation (RTSTAT-1) Unrouted net - 2 net(s) are unrouted. The problem bus(es) and/or net(s) are
mcntrl393_i/memctrl16_i/mcontr_sequencer_i/phy_cmd_i/phy_top_i/byte_lane1_i/iclk_int,
mcntrl393_i/memctrl16_i/mcontr_sequencer_i/phy_cmd_i/phy_top_i/byte_lane0_i/iclk_int.
*/
wire [9:0] decode_sel={
(dly_addr[3:0]==9)?1'b1:1'b0,
(dly_addr[3:0]==8)?1'b1:1'b0,
(dly_addr[3:0]==7)?1'b1:1'b0,
(dly_addr[3:0]==6)?1'b1:1'b0,
(dly_addr[3:0]==5)?1'b1:1'b0,
(dly_addr[3:0]==4)?1'b1:1'b0,
(dly_addr[3:0]==3)?1'b1:1'b0,
(dly_addr[3:0]==2)?1'b1:1'b0,
(dly_addr[3:0]==1)?1'b1:1'b0,
(dly_addr[3:0]==0)?1'b1:1'b0};
//always @ (posedge clk_div or posedge rst) begin
always @ (posedge clk_div) begin
if (rst) begin
din_r <= 32'b0; din_dm_r<=0; din_dqs_r<=0; tin_dq_r<=4'hf; tin_dqs_r<=4'hf;
dly_data_r<=8'b0;set_r<=1'b0;
dci_disable_dqs_r <= 1'b1; dci_disable_dq_r <=1'b1;
ld_odly<=8'b0; ld_idly<=8'b0; ld_odly_dqs<=1'b0; ld_idly_dqs<=1'b0; ld_odly_dm<=1'b0;
end else begin
din_r<=din[31:0]; din_dm_r<=din_dm; din_dqs_r<=din_dqs; tin_dq_r<=tin_dq; tin_dqs_r<=tin_dqs;
dly_data_r<=dly_data; set_r<=set;
dci_disable_dqs_r <= dci_disable_dqs; dci_disable_dq_r <= dci_disable_dq;
{ld_odly_dm,ld_odly_dqs,ld_odly[7:0]} <= {10{(~dly_addr[4]) & ld_delay}} & decode_sel;
{ ld_idly_dqs,ld_idly[7:0]} <= {9 {( dly_addr[4]) & ld_delay}} & decode_sel[8:0];
end
end
generate
genvar i;
for (i=0; i < 8; i=i+1) begin: dq_block
dq_single #(
.IODELAY_GRP(IODELAY_GRP),
.IBUF_LOW_PWR(IBUF_LOW_PWR),
.IOSTANDARD(IOSTANDARD_DQ),
.SLEW(SLEW_DQ),
.REFCLK_FREQUENCY(REFCLK_FREQUENCY),
.HIGH_PERFORMANCE_MODE(HIGH_PERFORMANCE_MODE)
) dq_i(
.dq(dq[i]), // I/O pad
.iclk(iclk), // source-synchronous clock (BUFR from DQS)
.clk(clk), // free-running system clock, same frequency as iclk (shared for R/W)
.clk_div(clk_div), // free-running half clk frequency, front aligned to clk (shared for R/W)
.inv_clk_div(inv_clk_div), // invert clk_div for R channel (clk_div is shared between R and W)
.rst(rst),
.dci_disable(dci_disable_dq_r), // disable DCI termination during writes and idle
.dly_data(dly_data_r), // delay value (3 LSB - fine delay)
.din({din_r[i+24],din_r[i+16],din_r[i+8],din_r[i]}) , // parallel data to be sent out
.tin(tin_dq_r), // tristate for data out (sent out earlier than data!)
.dout({dout[i+24],dout[i+16],dout[i+8],dout[i]}), // parallel data received from DDR3 memory
.set_odelay(set_r), // clk_div synchronous load odelay value from dly_data
.ld_odelay(ld_odly[i]), // clk_div synchronous set odealy value from loaded
.set_idelay(set_r), // clk_div synchronous load idelay value from dly_data
.ld_idelay(ld_idly[i]) // clk_div synchronous set idealy value from loaded
);
end
endgenerate
dm_single #(
.IODELAY_GRP(IODELAY_GRP),
.IBUF_LOW_PWR(IBUF_LOW_PWR),
.IOSTANDARD(IOSTANDARD_DM),
.SLEW(SLEW_DQ),
.REFCLK_FREQUENCY(REFCLK_FREQUENCY),
.HIGH_PERFORMANCE_MODE(HIGH_PERFORMANCE_MODE)
) dm_i(
.dm(dm), // DM output pad
.clk(clk), // free-running system clock, same frequency as iclk (shared for R/W)
.clk_div(clk_div), // free-running half clk frequency, front aligned to clk (shared for R/W)
.rst(rst),
.dci_disable(dci_disable_dq_r), // disable DCI termination during writes and idle
.dly_data(dly_data_r), // delay value (3 LSB - fine delay)
.din(din_dm_r[3:0]) , // parallel data to be sent out
.tin(tin_dq_r), // tristate for data out (sent out earlier than data!)
.set_odelay(set_r), // clk_div synchronous load odelay value from dly_data
.ld_odelay(ld_odly_dm) // clk_div synchronous set odealy value from loaded
);
`ifdef NOFINEDELAY_DQS
dqs_single_nofine #(
`else
dqs_single #(
`endif
.IODELAY_GRP(IODELAY_GRP),
.IBUF_LOW_PWR(IBUF_LOW_PWR),
.IOSTANDARD(IOSTANDARD_DQS),
.SLEW(SLEW_DQS),
.REFCLK_FREQUENCY(REFCLK_FREQUENCY),
.HIGH_PERFORMANCE_MODE(HIGH_PERFORMANCE_MODE)
) dqs_i (
.dqs(dqs),
.ndqs(ndqs),
.clk(clk),
.clk_div(clk_div),
.rst(rst),
.dqs_received_dly(dqs_read),
.dci_disable(dci_disable_dqs_r), // disable DCI termination during writes and idle
.dly_data(dly_data_r[7:0]),
.din(din_dqs_r[3:0]),
.tin(tin_dqs_r[3:0]),
.set_odelay(set_r),
.ld_odelay(ld_odly_dqs),
.set_idelay(set_r),
.ld_idelay(ld_idly_dqs)
);
endmodule