/******************************************************************************* * Module: ddrc_sequencer * Date:2014-05-16 * Author: Andrey Filippov * Description: ddr3 sequnecer * * Copyright (c) 2014 Elphel, Inc. * ddrc_sequencer.v is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * ddrc_sequencer.v is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . *******************************************************************************/ `timescale 1ns/1ps module ddrc_sequencer #( parameter PHASE_WIDTH = 8, parameter SLEW_DQ = "SLOW", parameter SLEW_DQS = "SLOW", parameter SLEW_CMDA = "SLOW", parameter SLEW_CLK = "SLOW", parameter IBUF_LOW_PWR = "TRUE", parameter real REFCLK_FREQUENCY = 300.0, parameter HIGH_PERFORMANCE_MODE = "FALSE", parameter CLKIN_PERIOD = 10, //ns >1.25, 60032r) - used for mode set, refresh, write levelling, ... input cmd0_clk, input cmd0_we, input [9:0] cmd0_addr, input [31:0] cmd0_data, // automatic command port1 , filled by the PL, 32w 32r, used for actual page R/W input cmd1_clk, input cmd1_we, input [9:0] cmd1_addr, input [31:0] cmd1_data, // Controller run interface, posedge mclk input [10:0] run_addr, // controller sequencer start address (0..11'h3ff - cmd0, 11'h400..11'h7ff - cmd1) input [3:0] run_chn, // data channel to use input run_seq, // start controller sequence (will and with !ddr_rst for stable mclk) output run_done, // controller sequence finished output run_busy, // controller sequence in progress // inteface to control I/O delays and mmcm input [7:0] dly_data, // delay value (3 LSB - fine delay) input [6:0] dly_addr, // select which delay to program input ld_delay, // load delay data to selected iodelayl (clk_div synchronous) input set, // clk_div synchronous set all delays from previously loaded values // output locked, output locked_mmcm, output locked_pll, output dly_ready, output dci_ready, output phy_locked_mmcm, output phy_locked_pll, output phy_dly_ready, output phy_dci_ready, output [7:0] tmp_debug, output ps_rdy, output [PHASE_WIDTH-1:0] ps_out, // read port 0 input port0_clk, input port0_re, input port0_regen, input [1:0] port0_page, input [1:0] port0_int_page, input [7:0] port0_addr, output [31:0] port0_data, // write port 1 input port1_clk, input port1_we, input [1:0] port1_page, input [1:0] port1_int_page, input [7:0] port1_addr, input [31:0] port1_data, // extras input cmda_en, // enable (!tristate) command and address lines // not likely to be used input ddr_rst, // generate reset to DDR3 memory (active high) input dci_rst, // active high - reset DCI circuitry input dly_rst, // active high - delay calibration circuitry input ddr_cke, // DDR clock enable , XOR-ed with command bit input inv_clk_div, input [7:0] dqs_pattern, // 8'h55 input [7:0] dqm_pattern, // 8'h00 input [ 3:0] dq_tri_on_pattern, // DQ tri-state control word, first when enabling output input [ 3:0] dq_tri_off_pattern, // DQ tri-state control word, first after disabling output input [ 3:0] dqs_tri_on_pattern, // DQS tri-state control word, first when enabling output input [ 3:0] dqs_tri_off_pattern,// DQS tri-state control word, first after disabling output input [ 3:0] wbuf_delay ); localparam ADDRESS_NUMBER = 15; // wire [35:0] phy_cmd; // input[35:0] wire [31:0] phy_cmd_word; // selected output from eithe cmd0 buffer or cmd1 buffer wire [31:0] phy_cmd0_word; // cmd0 buffer output wire [31:0] phy_cmd1_word; // cmd1 buffer output reg [ 8:0] buf_raddr; reg [ 8:0] buf_waddr_negedge; reg buf_wr_negedge; wire [63:0] buf_wdata; // output[63:0] reg [63:0] buf_wdata_negedge; // output[63:0] wire [63:0] buf_rdata; // multiplexed input from one of the write channels buffer wire [63:0] buf1_rdata; wire buf_wr; // delayed by specified number of clock cycles wire buf_wr_ndly; // before dealy wire buf_rd; // read next 64 bytes from the buffer, need one extra pre-read wire rst=rst_in; // wire [ 9:0] next_cmd_addr; reg [ 9:0] cmd_addr; // command word adderss reg cmd_sel; reg [ 2:0] cmd_busy; // bit 0 - immediately, wire phy_cmd_nop; // decoded command (ras, cas, we) was NOP wire phy_cmd_add_pause; // decoded from the command word - add one pause command after the current one reg add_pause; // previos command had phy_cmd_add_pause set wire sequence_done; wire [CMD_PAUSE_BITS-1:0] pause_len; reg cmd_fetch; // previous cycle command was read from the command memory, current: command valid wire pause; // do not register new data from the command memory reg [CMD_PAUSE_BITS-1:0] pause_cntr; reg [1:0] buf_page; // one of 4 pages in the channel buffer to use for R/W reg [15:0] buf_sel_1hot; // 1 hot channel buffer select reg [3:0] run_chn_d; reg run_seq_d; // reg tmp_dbg7=0; wire [7:0] tmp_debug_a; /* always @ (posedge clk_in) begin tmp_dbg7 <= ~tmp_dbg7; end assign tmp_debug[7:0] = {tmp_dbg7,tmp_debug_a[6:0]}; */ assign tmp_debug[7:0] = tmp_debug_a[7:0]; // clk_in assign run_done=sequence_done; assign run_busy=cmd_busy[0]; //earliest assign pause=cmd_fetch? (phy_cmd_add_pause || (phy_cmd_nop && (pause_len != 0))): (cmd_busy[2] && (pause_cntr[CMD_PAUSE_BITS-1:1]!=0)); /// debugging assign phy_cmd_word = cmd_sel?phy_cmd1_word:phy_cmd0_word; // TODO: hangs even with 0-s in phy_cmd /// assign phy_cmd_word = phy_cmd_word?0:0; assign buf_rdata[63:0] = ({64{buf_sel_1hot[1]}} & buf1_rdata[63:0]); // ORed with other read channels terms always @ (posedge mclk or posedge rst) begin if (rst) cmd_busy <= 0; // else if (sequence_done) cmd_busy <= 0; else if (ddr_rst) cmd_busy <= 0; // *************** reset sequencer with DDR reset else if (sequence_done && cmd_busy[2]) cmd_busy <= 0; else cmd_busy <= {cmd_busy[1:0],run_seq | cmd_busy[0]}; // Pause counter if (rst) pause_cntr <= 0; else if (!cmd_busy[1]) pause_cntr <= 0; // not needed? else if (cmd_fetch && phy_cmd_nop) pause_cntr <= pause_len; else if (pause_cntr!=0) pause_cntr <= pause_cntr-1; //SuppressThisWarning ISExst Result of 32-bit expression is truncated to fit in 10-bit target. // Fetch - command data valid if (rst) cmd_fetch <= 0; else cmd_fetch <= cmd_busy[0] && !pause; if (rst) add_pause <= 0; else add_pause <= cmd_fetch && phy_cmd_add_pause; // Command read address if (rst) cmd_addr <= 0; else if (run_seq) cmd_addr <= run_addr[9:0]; else if (cmd_busy[0] && !pause) cmd_addr <= cmd_addr + 1; //SuppressThisWarning ISExst Result of 11-bit expression is truncated to fit in 10-bit target. // command bank select (0 - "manual" (software programmed sequences), 1 - "auto" (normal block r/w) if (rst) cmd_sel <= 0; else if (run_seq) cmd_sel <= run_addr[10]; if (rst) buf_page <= 0; else if (run_seq) case (run_chn) 4'h0: buf_page <= port0_int_page; 4'h1: buf_page <= port1_int_page; // Add other channels later default: buf_page <= 2'bxx; endcase if (rst) buf_sel_1hot <= 0; else buf_sel_1hot <= { (run_chn_d==4'hf)?1'b1:1'b0, (run_chn_d==4'he)?1'b1:1'b0, (run_chn_d==4'hd)?1'b1:1'b0, (run_chn_d==4'hc)?1'b1:1'b0, (run_chn_d==4'hb)?1'b1:1'b0, (run_chn_d==4'ha)?1'b1:1'b0, (run_chn_d==4'h9)?1'b1:1'b0, (run_chn_d==4'h8)?1'b1:1'b0, (run_chn_d==4'h7)?1'b1:1'b0, (run_chn_d==4'h6)?1'b1:1'b0, (run_chn_d==4'h5)?1'b1:1'b0, (run_chn_d==4'h4)?1'b1:1'b0, (run_chn_d==4'h3)?1'b1:1'b0, (run_chn_d==4'h2)?1'b1:1'b0, (run_chn_d==4'h1)?1'b1:1'b0, (run_chn_d==4'h0)?1'b1:1'b0 }; if (rst) buf_raddr <= 9'h0; else if (run_seq_d) buf_raddr <= {buf_page,7'h0}; else if (buf_wr || buf_rd) buf_raddr <= buf_raddr +1; // Separate read/write address? read address re-registered @ negedge //SuppressThisWarning ISExst Result of 10-bit expression is truncated to fit in 9-bit target. if (rst) run_chn_d <= 0; else if (run_seq) run_chn_d <= run_chn; if (rst) run_seq_d <= 0; else run_seq_d <= run_seq; end // re-register buffer write address to match DDR3 data always @ (negedge mclk) begin buf_waddr_negedge <= buf_raddr; buf_wr_negedge <= buf_wr; buf_wdata_negedge <= buf_wdata; end // Command sequence memories: // Command sequence memory 0 ("manual"): wire ren0=!cmd_sel && cmd_busy[0] && !pause; // cmd_busy - multibit wire ren1= cmd_sel && cmd_busy[0] && !pause; ram_1kx32_1kx32 #( .REGISTERS(1) // (0) // register output ) cmd0_buf_i ( .rclk (mclk), // input .raddr (cmd_addr), // input[9:0] /// .ren (!cmd_sel && cmd_busy && !pause), // input /// .regen (!cmd_sel && cmd_busy && !pause), // input .ren (ren0), // input TODO: verify cmd_busy[0] is correct (was cmd_busy ) .regen (ren0), // input .data_out (phy_cmd0_word), // output[31:0] .wclk (cmd0_clk), // input .waddr (cmd0_addr), // input[9:0] .we (cmd0_we), // input .web (4'hf), // input[3:0] .data_in (cmd0_data) // input[31:0] ); // Command sequence memory 0 ("manual"): ram_1kx32_1kx32 #( .REGISTERS(1) // (0) // register output ) cmd1_buf_i ( .rclk (mclk), // input .raddr (cmd_addr), // input[9:0] /// .ren ( cmd_sel && cmd_busy && !pause), // input /// .regen ( cmd_sel && cmd_busy && !pause), // input .ren ( ren1), // input .regen ( ren1), // input .data_out (phy_cmd1_word), // output[31:0] .wclk (cmd1_clk), // input .waddr (cmd1_addr), // input[9:0] .we (cmd1_we), // input .web (4'hf), // input[3:0] .data_in (cmd1_data) // input[31:0] ); // Port memory buffer (4 pages each, R/W fixed, port 0 - AXI read from DDR, port 1 - AXI write to DDR // Port 0 (read DDR to AXI) buffer ram_512x64w_1kx32r #( .REGISTERS(1) ) port0_buf_i ( .rclk (port0_clk), // input .raddr ({port0_page,port0_addr}), // input[9:0] .ren (port0_re), // input .regen (port0_regen), // input .data_out (port0_data), // output[31:0] .wclk (!mclk), // input .waddr (buf_waddr_negedge), // input[8:0] .we (buf_sel_1hot[0] && buf_wr_negedge), // input .web (8'hff), // input[7:0] .data_in (buf_wdata_negedge) // input[63:0] ); // Port 1 (write DDR from AXI) buffer ram_1kx32w_512x64r #( .REGISTERS(1) ) port1_buf_i ( .rclk(mclk), // input .raddr(buf_raddr), // input[8:0] .ren(buf_sel_1hot[1] && buf_rd), // input .regen(buf_sel_1hot[1] && buf_rd), // input .data_out(buf1_rdata), // output[63:0] .wclk(port1_clk), // input .waddr({port1_page,port1_addr}), // input[9:0] .we(port1_we), // input .web(4'hf), // input[3:0] .data_in(port1_data) // input[31:0] ); phy_cmd #( .ADDRESS_NUMBER (ADDRESS_NUMBER), .PHASE_WIDTH (PHASE_WIDTH), .SLEW_DQ (SLEW_DQ), .SLEW_DQS (SLEW_DQS), .SLEW_CMDA (SLEW_CMDA), .SLEW_CLK (SLEW_CLK), .IBUF_LOW_PWR (IBUF_LOW_PWR), .REFCLK_FREQUENCY (REFCLK_FREQUENCY), .HIGH_PERFORMANCE_MODE (HIGH_PERFORMANCE_MODE), .CLKIN_PERIOD (CLKIN_PERIOD), .CLKFBOUT_MULT (CLKFBOUT_MULT), .CLKFBOUT_MULT_REF (CLKFBOUT_MULT_REF), .CLKFBOUT_DIV_REF (CLKFBOUT_DIV_REF), .DIVCLK_DIVIDE (DIVCLK_DIVIDE), .CLKFBOUT_PHASE (CLKFBOUT_PHASE), .SDCLK_PHASE (SDCLK_PHASE),/// debugging .CLK_PHASE (CLK_PHASE), .CLK_DIV_PHASE (CLK_DIV_PHASE), .MCLK_PHASE (MCLK_PHASE), .REF_JITTER1 (REF_JITTER1), .SS_EN (SS_EN), .SS_MODE (SS_MODE), .SS_MOD_PERIOD (SS_MOD_PERIOD), .CMD_PAUSE_BITS (CMD_PAUSE_BITS), // numer of (address) bits to encode pause .CMD_DONE_BIT (CMD_DONE_BIT) // bit number (address) to signal sequence done ) phy_cmd_i ( .SDRST (SDRST), // output .SDCLK (SDCLK), // output .SDNCLK (SDNCLK), // output .SDA (SDA[ADDRESS_NUMBER-1:0]), // output[14:0] .SDBA (SDBA[2:0]), // output[2:0] .SDWE (SDWE), // output .SDRAS (SDRAS), // output .SDCAS (SDCAS), // output .SDCKE (SDCKE), // output .SDODT (SDODT), // output .SDD (SDD[15:0]), // inout[15:0] .SDDML (SDDML), // inout .DQSL (DQSL), // inout .NDQSL (NDQSL), // inout .SDDMU (SDDMU), // inout .DQSU (DQSU), // inout .NDQSU (NDQSU), // inout .clk_in (clk_in), // input .rst_in (rst_in), // input .mclk (mclk), // output .dly_data (dly_data[7:0]), // input[7:0] .dly_addr (dly_addr[6:0]), // input[6:0] .ld_delay (ld_delay), // input .set (set), // input // .locked (locked), // output .locked_mmcm (locked_mmcm), // output .locked_pll (locked_pll), // output .dly_ready (dly_ready), // output .dci_ready (dci_ready), // output .phy_locked_mmcm (phy_locked_mmcm), // output .phy_locked_pll (phy_locked_pll), // output .phy_dly_ready (phy_dly_ready), // output .phy_dci_ready (phy_dci_ready), // output .tmp_debug (tmp_debug_a[7:0]), .ps_rdy (ps_rdy), // output .ps_out (ps_out[7:0]), // output[7:0] .phy_cmd_word (phy_cmd_word[31:0]), // input[31:0] .phy_cmd_nop (phy_cmd_nop), // output .phy_cmd_add_pause (phy_cmd_add_pause), // one pause cycle (for 8-bursts) .add_pause (add_pause), .pause_len (pause_len), // output [CMD_PAUSE_BITS-1:0] .sequence_done (sequence_done), // output .buf_wdata (buf_wdata[63:0]), // output[63:0] .buf_rdata (buf_rdata[63:0]), // input[63:0] .buf_wr (buf_wr_ndly), // output .buf_rd (buf_rd), // output .cmda_en (cmda_en), // input .ddr_rst (ddr_rst), // input .dci_rst (dci_rst), // input .dly_rst (dly_rst), // input .ddr_cke (ddr_cke), // input .inv_clk_div (inv_clk_div), // input .dqs_pattern (dqs_pattern), // input[7:0] .dqm_pattern (dqm_pattern), // input[7:0] .dq_tri_on_pattern (dq_tri_on_pattern[3:0]), // input[3:0] .dq_tri_off_pattern (dq_tri_off_pattern[3:0]), // input[3:0] .dqs_tri_on_pattern (dqs_tri_on_pattern[3:0]), // input[3:0] .dqs_tri_off_pattern (dqs_tri_off_pattern[3:0]) // input[3:0] ); // delay buf_wr by 1-16 cycles to compensate for DDR and HDL code latency (~7 cycles?) dly01_16 buf_wr_dly_i ( .clk(mclk), // input .rst(1'b0), // input .dly(wbuf_delay[3:0]), // input[3:0] .din(buf_wr_ndly), // input .dout(buf_wr) // output reg ); endmodule