phy_top.v 17.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
/*******************************************************************************
 * Module: phy_top
 * Date:2014-04-30  
 * Author: Andrey Filippov
 * Description: Top module of the DDR3 phy
 *
 * Copyright (c) 2014 Elphel, Inc.
 * phy_top.v is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 *  phy_top.v is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/> .
 *******************************************************************************/
`timescale 1ns/1ps

module  phy_top #(
    parameter IOSTANDARD_DQ =   "SSTL15_T_DCI",
    parameter IOSTANDARD_DQS =  "DIFF_SSTL15_T_DCI",
    parameter IOSTANDARD_CMDA = "SSTL15",
Andrey Filippov's avatar
Andrey Filippov committed
27
    parameter IOSTANDARD_CLK = "DIFF_SSTL15",
28 29 30
    parameter SLEW_DQ =         "SLOW",
    parameter SLEW_DQS =        "SLOW",
    parameter SLEW_CMDA =       "SLOW",
Andrey Filippov's avatar
Andrey Filippov committed
31
    parameter SLEW_CLK =       "SLOW",
32 33 34 35
    parameter IBUF_LOW_PWR =    "TRUE",
    parameter IODELAY_GRP =     "IODELAY_MEMORY",
    parameter real REFCLK_FREQUENCY = 300.0,
    parameter HIGH_PERFORMANCE_MODE = "FALSE",
Andrey Filippov's avatar
Andrey Filippov committed
36 37 38 39 40 41 42 43 44 45
    parameter integer ADDRESS_NUMBER= 15,
    parameter PHASE_WIDTH           = 8,
    parameter BANDWIDTH =     "OPTIMIZED",
    // Assuming 100MHz input clock, 800MHz Fvco, 400MHz clk, 200MHz clk_div, 200MHz mclk 
    parameter CLKIN_PERIOD          = 10, //ns >1.25, 600<Fvco<1200
    parameter CLKFBOUT_MULT =       8, // Fvco=Fclkin*CLKFBOUT_MULT_F/DIVCLK_DIVIDE, Fout=Fvco/CLKOUT#_DIVIDE
    parameter CLKFBOUT_MULT_REF =   9, // Fvco=Fclkin*CLKFBOUT_MULT_F/DIVCLK_DIVIDE, Fout=Fvco/CLKOUT#_DIVIDE
    parameter CLKFBOUT_DIV_REF =    3, // To get 300MHz for the reference clock
    parameter DIVCLK_DIVIDE=        1,
    parameter CLKFBOUT_PHASE =      0.000,
Andrey Filippov's avatar
Andrey Filippov committed
46
    parameter SDCLK_PHASE =          0.000,
Andrey Filippov's avatar
Andrey Filippov committed
47 48
    parameter CLK_PHASE =           0.000,
    parameter CLK_DIV_PHASE =       0.000,
Andrey Filippov's avatar
Andrey Filippov committed
49
    parameter MCLK_PHASE =          90.000,  
Andrey Filippov's avatar
Andrey Filippov committed
50 51 52 53
    parameter REF_JITTER1 =         0.010,
    parameter SS_EN =              "FALSE",
    parameter SS_MODE =      "CENTER_HIGH",
    parameter SS_MOD_PERIOD =       10000
54
)(
Andrey Filippov's avatar
Andrey Filippov committed
55 56
    output                       ddr3_clk, // DDR3 clock differential output, positive
    output                       ddr3_nclk,// DDR3 clock differential output, negative
57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
    output  [ADDRESS_NUMBER-1:0] ddr3_a,   // output address ports (14:0) for 4Gb device
    output                 [2:0]ddr3_ba,  // output bank address ports
    output                       ddr3_we,  // output WE port
    output                       ddr3_ras, // output RAS port
    output                       ddr3_cas, // output CAS port
    output                       ddr3_cke, // output Clock Enable port
    output                       ddr3_odt, // output ODT port

    inout                 [15:0] dq,       // DQ  I/O pads
    inout                        dml,      // LDM  I/O pad (actually only output)
    inout                        dqsl,     // LDQS I/O pad
    inout                        ndqsl,    // ~LDQS I/O pad
    inout                        dmu,      // UDM  I/O pad (actually only output)
    inout                        dqsu,     // UDQS I/O pad
    inout                        ndqsu,    // ~UDQS I/O pad
    
Andrey Filippov's avatar
Andrey Filippov committed
73 74 75 76
    input                        clk_in,   // master input clock, initially assuming 100MHz
    output                       clk,      // free-running system clock, same frequency as iclk (shared for R/W),     BUFR output
    output                       clk_div,  // free-running half clk frequency, front aligned to clk (shared for R/W), BUFR output
    output                       mclk,     // same as clk_div, through separate BUFG and static phase adjust
77 78 79 80 81 82 83 84 85
    input                        rst_in,      // reset delays/serdes
    
    input [2*ADDRESS_NUMBER-1:0] in_a,     // input address, 2 bits per signal (first, second) (29:0) for 4Gb device
    input                  [5:0] in_ba,    // input bank address, 2 bits per signal (first, second)
    input                  [1:0] in_we,    // input WE, 2 bits (first, second)
    input                  [1:0] in_ras,   // input RAS, 2 bits (first, second)
    input                  [1:0] in_cas,   // input CAS, 2 bits (first, second)
    input                  [1:0] in_cke,   // input CKE, 2 bits (first, second)
    input                  [1:0] in_odt,   // input ODT, 2 bits (first, second)
Andrey Filippov's avatar
Andrey Filippov committed
86 87
//    input                  [1:0] in_tri,   // tristate command/address outputs - same timing, but no odelay
    input                        in_tri,   // tristate command/address outputs - same timing, but no odelay
88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
    
    input                 [63:0] din,             // parallel data to be sent out (4 bits per DG I/))
    input                  [7:0] din_dm,          // parallel data to be sent out over DM
    input                  [7:0] tin_dq,          // tristate for data out (sent out earlier than data!) and dm 
    input                  [7:0] din_dqs,         // parallel data to be sent out over DQS
    input                  [7:0] tin_dqs,         // tristate for DQS out (sent out earlier than data!) 
    output                [63:0] dout,            // parallel data received from DDR3 memory, 4 bits per DQ I/O
    
    
    input                        inv_clk_div,     // invert clk_div for R channels (clk_div is shared between R and W)
    input                        dci_disable_dqs, // disable DCI termination during writes and idle for dqs
    input                        dci_disable_dq,  // disable DCI termination during writes and idle for dq and dm signals
    
    input                  [7:0] dly_data, // delay value (3 LSB - fine delay)
    input                  [6:0] dly_addr, // select which delay to program
Andrey Filippov's avatar
Andrey Filippov committed
103
    input                        ld_delay, // load delay data to selected iodelayl (clk_div synchronous)
Andrey Filippov's avatar
Andrey Filippov committed
104 105 106 107
    input                        set,       // clk_div synchronous set all delays from previously loaded values
    output                       locked,
    output                       ps_rdy,
    output     [PHASE_WIDTH-1:0] ps_out 
108 109 110 111 112 113 114 115 116
);
  reg rst=1'b0;
  always @(posedge clk or posedge rst_in) begin
    if (rst_in) rst <= 1'b1;
    else        rst <= 1'b0;
  end
  wire  ld_data_l = (dly_addr[6:5] == 2'h0) && ld_delay ;             
  wire  ld_data_h = (dly_addr[6:5] == 2'h1) && ld_delay ;             
  wire  ld_cmda =   (dly_addr[6:5] == 2'h2) && ld_delay ;             
Andrey Filippov's avatar
Andrey Filippov committed
117 118 119 120 121
  wire  ld_mmcm=    (dly_addr[6:0] == 7'h60) && ld_delay ;
  wire  clkfb_ref, clk_ref_pre; 
  wire  clk_ref; // 200MHz/300Mhz to calibrate I/O delays            
  wire locked_mmcm,locked_pll, dly_ready;
  assign locked=locked_mmcm && locked_pll && dly_ready; // both PLL ready, I/O delay calibrated
122 123 124 125 126 127 128 129 130
  cmd_addr #(
    .IODELAY_GRP(IODELAY_GRP),
    .IOSTANDARD(IOSTANDARD_CMDA),
    .SLEW(SLEW_CMDA),
    .REFCLK_FREQUENCY(REFCLK_FREQUENCY),
    .HIGH_PERFORMANCE_MODE(HIGH_PERFORMANCE_MODE),
    .ADDRESS_NUMBER(ADDRESS_NUMBER)
  ) cmd_addr_i(
    .ddr3_a   (ddr3_a[ADDRESS_NUMBER-1:0]), // output address ports (14:0) for 4Gb device
Andrey Filippov's avatar
Andrey Filippov committed
131
    .ddr3_ba  (ddr3_ba[2:0]),             // output bank address ports
132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
    .ddr3_we  (ddr3_we),                 // output WE port
    .ddr3_ras (ddr3_ras),                // output RAS port
    .ddr3_cas (ddr3_cas),                // output CAS port
    .ddr3_cke (ddr3_cke),                // output Clock Enable port
    .ddr3_odt (ddr3_odt),                // output ODT port
    .clk      (clk),                     // free-running system clock, same frequency as iclk (shared for R/W)
    .clk_div  (clk_div),                 // free-running half clk frequency, front aligned to clk (shared for R/W)
    .rst      (rst),                     // reset delays/serdes
    .in_a     (in_a[2*ADDRESS_NUMBER-1:0]), // input address, 2 bits per signal (first, second) (29:0) for 4Gb device
    .in_ba    (in_ba[5:0]),              // input bank address, 2 bits per signal (first, second)
    .in_we    (in_we[1:0]),              // input WE, 2 bits (first, second)
    .in_ras   (in_ras[1:0]),             // input RAS, 2 bits (first, second)
    .in_cas   (in_cas[1:0]),             // input CAS, 2 bits (first, second)
    .in_cke   (in_cke[1:0]),             // input CKE, 2 bits (first, second)
    .in_odt   (in_odt[1:0]),             // input ODT, 2 bits (first, second)
Andrey Filippov's avatar
Andrey Filippov committed
147 148
//    .in_tri   (in_tri[1:0]),             // tristate command/address outputs - same timing, but no odelay
    .in_tri   (in_tri),             // tristate command/address outputs - same timing, but no odelay
149 150
    .dly_data (dly_data[7:0]),           // delay value (3 LSB - fine delay)
    .dly_addr (dly_addr[4:0]),           // select which delay to program
Andrey Filippov's avatar
Andrey Filippov committed
151
    .ld_delay (ld_cmda),               // load delay data to selected iodelayl (clk_div synchronous)
152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174
    .set      (set)                      // clk_div synchronous set all delays from previously loaded values
);

  byte_lane #(
    .IODELAY_GRP     (IODELAY_GRP),
    .IBUF_LOW_PWR    (IBUF_LOW_PWR),
    .IOSTANDARD_DQ   (IOSTANDARD_DQ),
    .IOSTANDARD_DQS  (IOSTANDARD_DQS),
    .SLEW_DQ         (SLEW_DQ),
    .SLEW_DQS        (SLEW_DQS),
    .REFCLK_FREQUENCY(REFCLK_FREQUENCY),
    .HIGH_PERFORMANCE_MODE(HIGH_PERFORMANCE_MODE)
  ) byte_lane0_i (
    .dq              (dq[7:0]),          // DQ  I/O pads
    .dm              (dml),              // DM  I/O pad (actually only output)
    .dqs             (dqsl),             //  DQS I/O pad
    .ndqs            (ndqsl),            // ~DQS I/O pad
    .clk             (clk),              // free-running system clock, same frequency as iclk (shared for R/W)
    .clk_div         (clk_div),          // free-running half clk frequency, front aligned to clk (shared for R/W)
    .inv_clk_div     (inv_clk_div),      // invert clk_div for R channels (clk_div is shared between R and W)
    .rst             (rst),
    .dci_disable_dqs (dci_disable_dqs),  // disable DCI termination during writes and idle for dqs
    .dci_disable_dq  (dci_disable_dq),   // disable DCI termination during writes and idle for dq and dm signals
Andrey Filippov's avatar
Andrey Filippov committed
175
    .din             (din[31:0]),        // parallel data to be sent out (4 bits per DQ I/O))
176 177 178 179 180 181 182
    .din_dm          (din_dm[3:0]),      // parallel data to be sent out over DM
    .tin_dq          (tin_dq[3:0]),      // tristate for data out (sent out earlier than data!) and dm 
    .din_dqs         (din_dqs[3:0]),     // parallel data to be sent out over DQS
    .tin_dqs         (tin_dqs[3:0]),     // tristate for DQS out (sent out earlier than data!) 
    .dout            (dout[31:0]),       // parallel data received from DDR3 memory, 4 bits per DQ I/O
    .dly_data        (dly_data[7:0]),    // delay value (3 LSB - fine delay)
    .dly_addr        (dly_addr[4:0]),    // select which delay to program
Andrey Filippov's avatar
Andrey Filippov committed
183
    .ld_delay        (ld_data_l),        // load delay data to selected iodelayl (clk_div synchronous)
184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206
    .set             (set)               // clk_div synchronous set all delays from previously loaded values
);

  byte_lane #(
    .IODELAY_GRP     (IODELAY_GRP),
    .IBUF_LOW_PWR    (IBUF_LOW_PWR),
    .IOSTANDARD_DQ   (IOSTANDARD_DQ),
    .IOSTANDARD_DQS  (IOSTANDARD_DQS),
    .SLEW_DQ         (SLEW_DQ),
    .SLEW_DQS        (SLEW_DQS),
    .REFCLK_FREQUENCY(REFCLK_FREQUENCY),
    .HIGH_PERFORMANCE_MODE(HIGH_PERFORMANCE_MODE)
  ) byte_lane1_i (
    .dq              (dq[15:8]),         // DQ  I/O pads
    .dm              (dmu),              // DM  I/O pad (actually only output)
    .dqs             (dqsu),             //  DQS I/O pad
    .ndqs            (ndqsu),            // ~DQS I/O pad
    .clk             (clk),              // free-running system clock, same frequency as iclk (shared for R/W)
    .clk_div         (clk_div),          // free-running half clk frequency, front aligned to clk (shared for R/W)
    .inv_clk_div     (inv_clk_div),      // invert clk_div for R channels (clk_div is shared between R and W)
    .rst             (rst),
    .dci_disable_dqs (dci_disable_dqs),  // disable DCI termination during writes and idle for dqs
    .dci_disable_dq  (dci_disable_dq),   // disable DCI termination during writes and idle for dq and dm signals
Andrey Filippov's avatar
Andrey Filippov committed
207
    .din             (din[63:32]),       // parallel data to be sent out (4 bits per DQ I/O))
208 209 210 211 212 213 214
    .din_dm          (din_dm[7:4]),      // parallel data to be sent out over DM
    .tin_dq          (tin_dq[7:4]),      // tristate for data out (sent out earlier than data!) and dm 
    .din_dqs         (din_dqs[7:4]),     // parallel data to be sent out over DQS
    .tin_dqs         (tin_dqs[7:4]),     // tristate for DQS out (sent out earlier than data!) 
    .dout            (dout[63:32]),      // parallel data received from DDR3 memory, 4 bits per DQ I/O
    .dly_data        (dly_data[7:0]),    // delay value (3 LSB - fine delay)
    .dly_addr        (dly_addr[4:0]),    // select which delay to program
Andrey Filippov's avatar
Andrey Filippov committed
215
    .ld_delay        (ld_data_h),        // load delay data to selected iodelayl (clk_div synchronous)
216 217
    .set             (set)               // clk_div synchronous set all delays from previously loaded values
);
Andrey Filippov's avatar
Andrey Filippov committed
218
//ddr3_clk
Andrey Filippov's avatar
Andrey Filippov committed
219
wire sdclk; // BUFIO
Andrey Filippov's avatar
Andrey Filippov committed
220 221 222 223
    oddr_ds #(
        .IOSTANDARD(IOSTANDARD_CLK),
        .SLEW(SLEW_CLK)
    ) oddr_ds_i (
Andrey Filippov's avatar
Andrey Filippov committed
224
        .clk(sdclk), // input
Andrey Filippov's avatar
Andrey Filippov committed
225 226 227 228 229 230 231 232 233 234 235 236 237
        .ce(1'b1), // input
        .rst(1'b0), // input
        .set(1'b0), // input
        .din(2'b01), // input[1:0] 
        .dq(ddr3_clk), // output
        .ndq(ddr3_nclk) // output
    );
// Clocks: MMCM is used to generate ddr3 differential clock (no dynamic phase shift),
// clk - write bit clock, phase dynamically adjusted, BUFR (initially 400MHz)
// clk_div  half bit frequency clock, phase dynamically adjusted, BUFR. Used also for delay/phase control (200MHz)
// mclk - same frequency as clk_div (same dynamic phase adjust), but with BUFG to be used in other regions. Phase to be
// statically adjusted for clock boundary crossing
// Phase control included, allowing setting phase in +/- 127 steps, each 1/56 of 1/Fvco (~22ps for Fvco=800MHz)
Andrey Filippov's avatar
Andrey Filippov committed
238
wire clk_pre, clk_div_pre, sdclk_pre, mclk_pre, clk_fb;
Andrey Filippov's avatar
Andrey Filippov committed
239 240
BUFR clk_bufr_i (.O(clk), .CE(), .CLR(), .I(clk_pre));
BUFR clk_div_bufr_i (.O(clk_div), .CE(), .CLR(), .I(clk_div_pre));
Andrey Filippov's avatar
Andrey Filippov committed
241
BUFIO iclk_bufio_i (.O(sdclk), .I(sdclk_pre) );
Andrey Filippov's avatar
Andrey Filippov committed
242 243 244 245
//BUFIO clk_ref_i (.O(clk_ref), .I(clk_ref_pre));
//assign clk_ref=clk_ref_pre;
//BUFH clk_ref_i (.O(clk_ref), .I(clk_ref_pre));
BUFG clk_ref_i (.O(clk_ref), .I(clk_ref_pre));
Andrey Filippov's avatar
Andrey Filippov committed
246 247 248 249 250 251 252 253 254
BUFG mclk_i (.O(mclk),.I(mclk_pre) );
    /* Instance template for module mmcm_phase_cntr */
    mmcm_phase_cntr #(
        .PHASE_WIDTH         (PHASE_WIDTH),
        .CLKIN_PERIOD        (CLKIN_PERIOD),
        .BANDWIDTH           (BANDWIDTH),
        .CLKFBOUT_MULT_F     (CLKFBOUT_MULT),
        .DIVCLK_DIVIDE       (DIVCLK_DIVIDE),
        .CLKFBOUT_PHASE      (CLKFBOUT_PHASE),
Andrey Filippov's avatar
Andrey Filippov committed
255
        .CLKOUT0_PHASE       (SDCLK_PHASE),
Andrey Filippov's avatar
Andrey Filippov committed
256 257 258
        .CLKOUT1_PHASE       (CLK_PHASE),
        .CLKOUT2_PHASE       (CLK_DIV_PHASE),
        .CLKOUT3_PHASE       (MCLK_PHASE),
Andrey Filippov's avatar
Andrey Filippov committed
259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286
//        .CLKOUT4_PHASE          (0.000),
//        .CLKOUT5_PHASE          (0.000),
//        .CLKOUT6_PHASE          (0.000),
        .CLKFBOUT_USE_FINE_PS     ("FALSE"),
        .CLKOUT0_USE_FINE_PS      ("FALSE"),
        .CLKOUT1_USE_FINE_PS ("TRUE"),
        .CLKOUT2_USE_FINE_PS ("TRUE"),
        .CLKOUT3_USE_FINE_PS ("TRUE"),
//        .CLKOUT4_USE_FINE_PS("FALSE"),
//        .CLKOUT5_USE_FINE_PS("FALSE"),
//        .CLKOUT6_USE_FINE_PS("FALSE"),
        .CLKOUT0_DIVIDE_F    (2.000),
        .CLKOUT1_DIVIDE      (2),
        .CLKOUT2_DIVIDE      (4),
        .CLKOUT3_DIVIDE      (4),
//        .CLKOUT4_DIVIDE(1),
//        .CLKOUT5_DIVIDE(1),
//        .CLKOUT6_DIVIDE(1),
        .COMPENSATION             ("ZHOLD"),
        .REF_JITTER1         (REF_JITTER1),
//        .REF_JITTER2(0.010),
        .SS_EN               (SS_EN),
        .SS_MODE             (SS_MODE),
        .SS_MOD_PERIOD       (SS_MOD_PERIOD),
        .STARTUP_WAIT             ("FALSE")
    ) mmcm_phase_cntr_i (
        .clkin               (clk_in), // input
        .clkfbin             (clk_fb), // input
287 288
//        .rst                 (rst), // input
        .rst                 (rst_in), // input
Andrey Filippov's avatar
Andrey Filippov committed
289 290 291 292 293 294
        .pwrdwn                 (1'b0), // input
        .psclk               (clk_div), // input
        .ps_we               (ld_mmcm), // input
        .ps_din              (dly_data), // input[7:0] 
        .ps_ready            (ps_rdy), // output
        .ps_dout             (ps_out), // output[7:0] reg 
Andrey Filippov's avatar
Andrey Filippov committed
295
        .clkout0             (sdclk_pre), // output
Andrey Filippov's avatar
Andrey Filippov committed
296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321
        .clkout1             (clk_pre), // output
        .clkout2             (clk_div_pre), // output
        .clkout3             (mclk_pre), // output
        .clkout4(), // output
        .clkout5(), // output
        .clkout6(), // output
        .clkout0b(), // output
        .clkout1b(), // output
        .clkout2b(), // output
        .clkout3b(), // output
        .clkfbout(clk_fb), // output
        .clkfboutb(), // output
        .locked(locked_mmcm) // output
    );

// Generate reference clock for the I/O delays
    pll_base #(
        .CLKIN_PERIOD(CLKIN_PERIOD),
        .BANDWIDTH("OPTIMIZED"),
        .CLKFBOUT_MULT(CLKFBOUT_MULT_REF),
        .CLKOUT0_DIVIDE(CLKFBOUT_DIV_REF),
        .REF_JITTER1(0.010),
        .STARTUP_WAIT("FALSE")
    ) pll_base_i (
        .clkin(clk_in), // input
        .clkfbin(clkfb_ref), // input
322 323
//        .rst(rst), // input
        .rst(rst_in), // input
Andrey Filippov's avatar
Andrey Filippov committed
324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342
        .pwrdwn(1'b0), // input
        .clkout0(clk_ref_pre), // output
        .clkout1(), // output
        .clkout2(), // output
        .clkout3(), // output
        .clkout4(), // output
        .clkout5(), // output
        .clkfbout(clkfb_ref), // output
        .locked(locked_pll) // output
    );
// Does it need to be re-calibrated periodically?
idelay_ctrl# (
 .IODELAY_GRP("IODELAY_MEMORY")
) idelay_ctrl_i (
    .refclk(clk_ref),
    .rst(rst),
    .rdy(dly_ready)
);

343 344 345

endmodule