ahci_dma.v 25.4 KB
Newer Older
1 2 3
/*******************************************************************************
 * Module: ahci_dma
 * Date:2016-01-01  
4
 * Author: Andrey Filippov     
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
 * Description: DMA R/W over 64-AXI channel for AHCI implementation
 *
 * Copyright (c) 2016 Elphel, Inc .
 * ahci_dma.v is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 *  ahci_dma.v is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/> .
20 21 22 23 24 25 26 27 28 29 30 31 32
 *
 * Additional permission under GNU GPL version 3 section 7:
 * If you modify this Program, or any covered work, by linking or combining it
 * with independent modules provided by the FPGA vendor only (this permission
 * does not extend to any 3-rd party modules, "soft cores" or macros) under
 * different license terms solely for the purpose of generating binary "bitstream"
 * files and/or simulating the code, the copyright holders of this Program give
 * you the right to distribute the covered work without those independent modules
 * as long as the source code for them is available from the FPGA vendor free of
 * charge, and there is no dependence on any encrypted modules for simulating of
 * the combined code. This permission applies to you if the distributed code
 * contains all the components and scripts required to completely simulate it
 * with at least one of the Free Software programs.
33 34 35 36 37 38 39 40 41 42 43 44 45 46
 *******************************************************************************/
`timescale 1ns/1ps

module  ahci_dma (
//    input         rst,
    input                         mrst, // @posedge mclk - sync reset
    input                         hrst, // @posedge hclk - sync reset
    
    input                         mclk, // for command/status
    input                         hclk,   // global clock to run axi_hp @ 150MHz
    // Control interface  (@mclk)
    input                  [31:7] ctba,         // command table base address
    input                         ctba_ld,      // load command table base address
    input                  [15:0] prdtl,        // number of entries in PRD table (valid at cmd_start)
47
    input                         dev_wr,       // write to device (valid at start)
48
    input                         cmd_start,     // start processing command table, reset prdbc
49
    input                         prd_start,     // at or after cmd_start - enable reading PRD/data (if any)
50 51 52 53 54 55 56 57
    input                         cmd_abort,     // try to abort a command TODO: Implement

// Optional control of the AXI cache mode, default will be set to 4'h3, 4'h3 at mrst
    input                  [3:0]  axi_wr_cache_mode, 
    input                  [3:0]  axi_rd_cache_mode,
    input                         set_axi_wr_cache_mode,
    input                         set_axi_rd_cache_mode,
    
58 59
    // Some data from the command table will be used internally, data will be available on the general
    // sys_out[31:0] port and should be consumed
60
    output reg                    ct_busy,      // cleared after 0x20 DWORDs are read out
61 62
    // reading out command table data
    input                  [ 4:0] ct_addr,     // DWORD address
63
    input                  [ 1:0] ct_re,       // [0] - re, [1]-regen  
64 65
    output reg             [31:0] ct_data,     // 
    
66 67
    // After the first 0x80 bytes of the Command Table are read out, this module will read/process PRDs,
    // not forwarding them to the output 
68 69 70
    output                        prd_done,     // @mclk prd done (regardless of the interrupt) - data transfer of one PRD is finished (any direction)
    input                         prd_irq_clear, // reset pending prd_irq
    output reg                    prd_irq_pend,  // prd interrupt pending. This is just a condition for irq - actual will be generated after FIS OK
71
    output reg                    cmd_busy,     // all commands
72
    output                        cmd_done,     // @ mclk
73 74
    
    // Data System memory -> HBA interface @ mclk
75
    output                 [31:0] sys_out,      // 32-bit data from the system memory to HBA (dma data)
76
    output                        sys_dav,      // at least one dword is ready to be read
77
//    output                        sys_dav_many, // several DWORDs are in the FIFO (TODO: decide how many)
78 79
    input                         sys_re,       // sys_out data read, advance internal FIFO
    // Data HBA -> System memory  interface @ mclk
80
    input                  [31:0] sys_in,       // HBA -> system memory
81 82 83
    output                        sys_nfull,    // internal FIFO has room for more data (will decide - how big reserved space to keep)
    input                         sys_we,    
    
84 85
    output                        extra_din,    // all DRDs are transferred to memory, but FIFO has some data. Valid when transfer is stopped
    
86 87 88 89 90 91 92
    // axi_hp signals write channel
    // write address
    output  [31:0] afi_awaddr,
    output         afi_awvalid,
    input          afi_awready, // @SuppressThisWarning VEditor unused - used FIF0 level
    output  [ 5:0] afi_awid,
    output  [ 1:0] afi_awlock,
93
    output reg [ 3:0] afi_awcache,
94 95 96 97 98 99 100 101 102 103 104 105 106
    output  [ 2:0] afi_awprot,
    output  [ 3:0] afi_awlen,
    output  [ 1:0] afi_awsize,
    output  [ 1:0] afi_awburst,
    output  [ 3:0] afi_awqos,
    // write data
    output  [63:0] afi_wdata,
    output         afi_wvalid,
    input          afi_wready,  // @SuppressThisWarning VEditor unused - used FIF0 level
    output  [ 5:0] afi_wid,
    output         afi_wlast,
    output  [ 7:0] afi_wstrb,
    // write response
107
    input          afi_bvalid,   // @SuppressThisWarning VEditor unused
108 109 110 111 112 113 114 115 116
    output         afi_bready,
    input   [ 5:0] afi_bid,      // @SuppressThisWarning VEditor unused
    input   [ 1:0] afi_bresp,    // @SuppressThisWarning VEditor unused
    // PL extra (non-AXI) signals
    input   [ 7:0] afi_wcount,
    input   [ 5:0] afi_wacount,
    output         afi_wrissuecap1en,
    // AXI_HP signals - read channel
    // read address
117
    output  [31:0] afi_araddr,
118 119
    output            afi_arvalid,
    input             afi_arready,  // @SuppressThisWarning VEditor unused - used FIF0 level
120 121
    output  [ 5:0] afi_arid,
    output  [ 1:0] afi_arlock,
122
    output reg [ 3:0] afi_arcache,
123
    output  [ 2:0] afi_arprot,
124
    output  [ 3:0] afi_arlen,
125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
    output  [ 1:0] afi_arsize,
    output  [ 1:0] afi_arburst,
    output  [ 3:0] afi_arqos,
    // read data
    input   [63:0] afi_rdata,
    input          afi_rvalid,
    output         afi_rready,
    input   [ 5:0] afi_rid,     // @SuppressThisWarning VEditor unused
    input          afi_rlast,   // @SuppressThisWarning VEditor unused
    input   [ 1:0] afi_rresp,   // @SuppressThisWarning VEditor unused
    // PL extra (non-AXI) signals
    input   [ 7:0] afi_rcount,
    input   [ 2:0] afi_racount,
    output         afi_rdissuecap1en
);


142 143 144 145 146 147 148 149 150
// Read command table
//    localparam AFI_FIFO_LAT = 2; // >=2
   localparam SAFE_RD_BITS =   3; //2; // 3;

    reg     [31:0] ct_data_ram [0:31];
    reg      [3:0] int_data_addr;    // internal (ct,prd) data address
    reg     [31:7] ctba_r;
    reg     [15:0] prdtl_mclk;
    wire           cmd_start_hclk;
151 152 153
    reg            prd_start_r;
    wire           prd_start_hclk;
    reg            prd_start_hclk_r; // to make sure it is with/after prd_start_hclk if in mclk they are in the same cycle
154
    wire           cmd_abort_hclk; // TODO: Implement as graceful as possible command abort
155 156 157
    reg            prd_enabled;
    reg      [1:0] ct_over_prd_enabled; // prd read and data r/w enabled, command table fetch done
    
158
    reg     [31:4] ct_maddr; // granularity matches PRDT entry - 4xDWORD, 2xQWORD
159
    wire           ct_done;
160
    wire           first_prd_fetch; // CT read done, prd enabled
161
    reg     [31:0] afi_addr; // common for afi_araddr and afi_awaddr
162 163 164
    wire           axi_set_raddr_ready = !(|afi_racount[2:1]) && (!axi_set_raddr_r || !afi_racount[0]); // What is the size of ra fifo - just 4? Latency?
//    wire           axi_set_raddr_ready = !(|afi_racount) && !axi_set_raddr_r); // Most pessimistic
    wire           axi_set_waddr_ready = !afi_wacount[5] && !afi_wacount[4]; // What is the size of wa fifo - just 32? Using just half - safe
165
    wire           axi_set_raddr_w;
166
    wire           axi_set_waddr_w;
167 168
    wire           axi_set_addr_data_w;
    
169
    reg            axi_set_raddr_r; // [0] - actual write address to fifo
170
    reg            axi_set_waddr_r; // [0] - actual write address to fifo
171 172 173
    reg            is_ct_addr;    // current address is ct address
    reg            is_prd_addr;   // current address is prd address
    reg            is_data_addr;  // current address is data address (r or w)
174 175 176 177 178 179 180
    
    reg     [31:1] data_addr; // 2 lower addresses will be used in in/out fifo modules
    reg      [3:0] data_len; //
    reg            data_irq; // interrupt at the end of this PRD
    reg     [21:1] wcount;  // Word count
    reg            wcount_set; 
    reg     [21:1] qwcount; // only [21:3] are used
181 182 183 184 185
    
    reg     [21:3] qw_datawr_left;
    reg     [ 3:0] qw_datawr_burst;
    reg            qw_datawr_last; 
    
186 187 188 189 190 191 192 193 194 195 196 197 198 199
    wire           data_afi_re;
    
    reg     [15:0] prds_left;
    reg            last_prd;
    
    reg     [1:0]  afi_rd_ctl; // read non-data (CT or PRD)
    reg     [1:0]  ct_busy_r;
    reg            prd_rd_busy; // reading PRD
    
    reg            dev_wr_mclk;
    reg            dev_wr_hclk;
    reg            prd_wr;    // write PRD data to memory
    reg            prd_rd;    // read  PRD data from memory
    wire     [3:0] afi_wstb4;
200 201 202 203 204 205

    wire           done_dev_wr; // finished PRD mem -> device
    wire           done_dev_rd; // finished PRD device -> mem
    wire           done_flush;  // done flushing last partial dword
    wire           cmd_done_hclk;
    wire           ct_done_mclk;
206 207
    reg      [3:0] afi_alen;
    wire           afi_wcount_many = !afi_wcount[7] && !(afi_wcount[6:4]);
208
    
209 210
    reg            data_next_burst;
    
211 212 213
//    wire           raddr_prd_rq = (|prds_left) && (ct_done || prd_done);
    wire           raddr_prd_rq = (|prds_left) && (first_prd_fetch || prd_done);
    
214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231
    reg            raddr_prd_pend;
            
    wire           raddr_ct_rq = cmd_start_hclk;
    reg            raddr_ct_pend;
     
    wire           addr_data_rq = (wcount_set || data_next_burst);
     
    wire           waddr_data_rq =  !dev_wr_hclk && addr_data_rq;
    reg            waddr_data_pend;
    
    wire           raddr_data_rq =   dev_wr_hclk && addr_data_rq;
    reg            raddr_data_pend;
    // count different types of AXI ID separately - just for debugging
    reg      [3:0] ct_id;
    reg      [3:0] prd_id;
    reg      [3:0] dev_wr_id;
    reg      [3:0] dev_rd_id;
    reg      [5:0] afi_id; // common for 3 channels
232
    
233 234
    wire           fifo_nempty_mclk;
    reg            en_extra_din_r;
235 236
    reg     [31:0] ct_data_reg;
    
237 238
      
//    assign prd_done = done_dev_wr || done_dev_rd;
239 240
    assign cmd_done_hclk = ((ct_busy_r==2'b10) && (prdtl_mclk == 0)) || done_flush || done_dev_rd;
    assign ct_done = (ct_busy_r == 2'b10);
241
    assign first_prd_fetch = ct_over_prd_enabled == 2'b01;
242 243 244 245 246
    assign axi_set_raddr_w = axi_set_raddr_ready && (raddr_ct_pend || raddr_prd_pend || raddr_data_pend);    
    assign axi_set_waddr_w = axi_set_raddr_ready && raddr_data_pend;    
    assign axi_set_addr_data_w = (axi_set_raddr_ready && raddr_data_pend) || (axi_set_waddr_ready && waddr_data_pend);
    
    
247 248
    assign afi_awaddr = afi_addr;
    assign afi_araddr = afi_addr;
249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277
    assign afi_arlen  = afi_alen;
    assign afi_awlen  = afi_alen;
    assign afi_arvalid = axi_set_raddr_r;
    assign afi_awvalid = axi_set_waddr_r;
    assign afi_rready = afi_rd_ctl[0] || data_afi_re;
    assign afi_wstrb = {{2{afi_wstb4[3]}},{2{afi_wstb4[2]}},{2{afi_wstb4[1]}},{2{afi_wstb4[0]}}};
    assign afi_wlast = qw_datawr_last;

    assign afi_awid = afi_id;
    assign afi_wid =  afi_id;
    assign afi_arid = afi_id;

// Unused or static output signals
    assign afi_bready = 1'b1;
    assign afi_awlock =        2'h0;
//    assign afi_awcache =       4'h3;
    assign afi_awprot =        3'h0;
    assign afi_awsize =        2'h3;
    assign afi_awburst =       2'h1;
    assign afi_awqos =         4'h0;
    assign afi_wrissuecap1en = 1'b0;

    assign afi_arlock =        2'h0;
//    assign afi_arcache =       4'h3;
    assign afi_arprot =        3'h0;
    assign afi_arsize =        2'h3;
    assign afi_arburst =       2'h1;
    assign afi_arqos =         4'h0;
    assign afi_rdissuecap1en = 1'b0;
278
    assign extra_din = en_extra_din_r && fifo_nempty_mclk;
279
//    reg             [31:0] ct_data_reg;
280
    always @ (posedge mclk) begin
281 282 283
        if (ct_re[0]) ct_data_reg <=  ct_data_ram[ct_addr];
        if (ct_re[1]) ct_data <=      ct_data_reg;
        
284 285 286
        if (ctba_ld) ctba_r <=        ctba[31:7];
        if (cmd_start) prdtl_mclk <=  prdtl;
        if (cmd_start) dev_wr_mclk <= dev_wr;
287 288 289 290 291 292 293 294
        
        if      (mrst)      cmd_busy <= 0;
        else if (cmd_start) cmd_busy <= 1; 
        else if (cmd_done)  cmd_busy <= 0;

        if      (mrst)         ct_busy <= 0;
        else if (cmd_start)    ct_busy <= 1; 
        else if (ct_done_mclk) ct_busy <= 0;
295 296 297 298 299 300
        
        if      (mrst)                  afi_arcache <= 4'h3;
        else if (set_axi_rd_cache_mode) afi_arcache <= axi_rd_cache_mode;

        if      (mrst)                  afi_awcache <= 4'h3;
        else if (set_axi_wr_cache_mode) afi_awcache <= axi_wr_cache_mode;
301 302
        
        prd_start_r <= prd_start;
303 304 305 306 307 308 309
        
        if (mrst || prd_irq_clear ||cmd_start || cmd_abort) prd_irq_pend <= 0; 
        else if (data_irq && prd_done)                      prd_irq_pend <= 1;
        
        if (mrst || cmd_start || cmd_abort) en_extra_din_r <= 0; 
        else if (cmd_done)                  en_extra_din_r <= 1;
        
310

311 312 313 314 315
    end
       
        
        
    always @ (posedge hclk) begin
316 317 318 319 320 321 322
        prd_start_hclk_r <= prd_start_hclk;
        
        if      (hrst || cmd_abort_hclk) prd_enabled <= 0;
        else if (prd_start_hclk_r)       prd_enabled <= 1; // presedence over  cmd_start_hclk
        else if (cmd_start_hclk)         prd_enabled <= 0;
    
    
323
        if (cmd_start_hclk)  ct_maddr[31:4] <= {ctba_r[31:7],3'b0};
324 325
        else if (ct_done)    ct_maddr[31:4] <= ct_maddr[31:4] + 16;
        else if (wcount_set) ct_maddr[31:4] <= ct_maddr[31:4] + 1;
326
        
327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360
        // overall sequencing makes sure that there will be no new requests until older served
        // additionally they are mutuially exclusive - only one may be pending at a time
        if      (hrst)                raddr_ct_pend <= 0;
        else if (raddr_ct_rq)         raddr_ct_pend <= 1;
        else if (axi_set_raddr_ready) raddr_ct_pend <= 0;
        
        if      (hrst)                raddr_prd_pend <= 0;
        else if (raddr_prd_rq)        raddr_prd_pend <= 1;
        else if (axi_set_raddr_ready) raddr_prd_pend <= 0;
        
        if      (hrst)                raddr_data_pend <= 0;
        else if (raddr_data_rq)       raddr_data_pend <= 1;
        else if (axi_set_raddr_ready) raddr_data_pend <= 0;
        
        if      (hrst)                waddr_data_pend <= 0;
        else if (waddr_data_rq)       waddr_data_pend <= 1;
        else if (axi_set_waddr_ready) waddr_data_pend <= 0;
        
        if (hrst)                                           {is_ct_addr, is_prd_addr, is_data_addr} <= 0;
        else if (raddr_ct_rq || raddr_prd_rq || wcount_set) {is_ct_addr, is_prd_addr, is_data_addr} <= {raddr_ct_rq, raddr_prd_rq, wcount_set};
        
        if (axi_set_raddr_w || axi_set_waddr_w) begin
            if (raddr_data_pend || waddr_data_pend)  afi_addr <= {data_addr[31:3], 3'b0};
            else                                     afi_addr <= {ct_maddr[31:4],  4'b0};

            if (raddr_data_pend || waddr_data_pend)  afi_alen <= data_len;
            else if (raddr_ct_pend)                  afi_alen <= 4'hf; // 16 QWORDS (128 bytes)
            else                                     afi_alen <= 4'h1; // 2 QWORDS
            
            if (raddr_data_pend || waddr_data_pend)  afi_id <= raddr_data_pend ? {2'h2, dev_rd_id} : {2'h3, dev_wr_id};
            else                                     afi_id <= raddr_ct_pend   ? {2'h0, ct_id} :     {2'h1, prd_id};
        end    
        
        
361 362
        if (hrst) axi_set_raddr_r <= 0;
        else      axi_set_raddr_r <= axi_set_raddr_w;
363 364 365

        if (hrst) axi_set_waddr_r <= 0;
        else      axi_set_waddr_r <= axi_set_waddr_w;
366
        
367
        if (addr_data_rq) data_len <= ((|qwcount[21:7]) || (&qwcount[6:3]))? 4'hf: qwcount[6:3];       // early calculate
368 369


370 371
        if      (wcount_set)          qwcount[21:1] <= wcount[21:1] + data_addr[2:1]; //minus 1
        else if (axi_set_addr_data_w) qwcount[21:7] <= qwcount[21:7] - 1; // may get negative
372
        
373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389
        data_next_burst <= axi_set_addr_data_w && ((|qwcount[21:7]) || (&qwcount[6:3])); // same time as afi_awvalid || afi_arvalid
        
// Get PRD data
        // store data address from PRD, increment when needed
        if (afi_rd_ctl[0] && is_prd_addr && (!int_data_addr[0])) data_addr[31:1] <= afi_rdata[31:1];
        if (axi_set_addr_data_w) data_addr[31:7] <= data_addr[31:7] + 1;

        if (afi_rd_ctl[0] && is_prd_addr && (int_data_addr[0])) data_irq <=     afi_rdata[63];

        if (afi_rd_ctl[0] && is_prd_addr && (int_data_addr[0])) wcount[21:1] <= afi_rdata[37:17];

        wcount_set <= afi_rd_ctl[0] && is_prd_addr && (int_data_addr[0]);

        if      (cmd_start_hclk)  prds_left  <= prdtl_mclk;
        else if (raddr_prd_rq)    prds_left  <= prds_left  - 1;

        if      (raddr_prd_rq)    last_prd  <= prds_left[15:1] == 0;
390
        
391 392 393
        // Set/increment address to store (internally) CT and PRD data 
        if      (axi_set_raddr_r)                int_data_addr <= 0;
        else if (afi_rd_ctl[0] && !is_data_addr) int_data_addr <= int_data_addr + 1;
394
        
395
        if (afi_rd_ctl[0] && is_ct_addr) {ct_data_ram[{int_data_addr,1'b1}],ct_data_ram[{int_data_addr,1'b0}]} <= afi_rdata; // make sure it is synthesized correctly
396
        
397 398 399 400
        // generate busy for command table (CT) read
        if      (hrst)                                            ct_busy_r[0] <= 0;
        else if (cmd_start_hclk)                                  ct_busy_r[0] <= 1;
        else if (afi_rd_ctl[0] && is_ct_addr && (&int_data_addr)) ct_busy_r[0] <= 0;
401 402
        ct_busy_r[1] <= ct_busy_r[0]; // delayed version to detect end of command
        
403 404 405 406
        if (hrst || ct_busy_r[0])                   ct_over_prd_enabled[0] <= 0;
        else if (prd_enabled)                       ct_over_prd_enabled[0] <= 1;
        ct_over_prd_enabled[1] <= ct_over_prd_enabled[0];  // detecting 0->1 transition
        
407
        // generate busy for PRD table entry read
408 409 410 411 412
        if      (hrst)        prd_rd_busy <= 0;
        else if (prd_rd_busy) prd_rd_busy <= 1;
        else if (wcount_set)  prd_rd_busy <= 0;
        
        
413
        if (cmd_start_hclk) dev_wr_hclk <= dev_wr_mclk; // 1: memory -> device, 0: device -> memory
414
        
415 416
        prd_wr <= wcount_set && !dev_wr_hclk;
        prd_rd <= wcount_set &&  dev_wr_hclk;
417
        
418 419 420 421 422 423 424 425 426 427
        afi_rd_ctl <= { afi_rd_ctl[0],(ct_busy_r[0] || prd_rd_busy) && ((|afi_rcount[7:SAFE_RD_BITS]) || (afi_rvalid && !(|afi_rd_ctl)))};
        
        // calculate afi_wlast - it is (qw_datawr_burst == 0), just use register qw_datawr_last
        
        if      (prd_wr)     qw_datawr_last <= qwcount[21:3] == 0;
        else if (afi_wvalid) qw_datawr_last <= qw_datawr_burst == 1;        
        
        if      (prd_wr)                                                      qw_datawr_burst <= (|qwcount[21:7])? 4'hf: qwcount[6:3];
        else if (afi_wvalid && qw_datawr_last && (qw_datawr_left[21:7] == 0)) qw_datawr_burst <= qw_datawr_left[6:3]; // if not last roll over to 'hf
        else if (afi_wvalid)                                                  qw_datawr_burst <= qw_datawr_burst - 1;
428
        
429 430 431 432 433 434
        if      (prd_wr)                       qw_datawr_left[21:3] <= qwcount[21:3];
        else if (afi_wvalid && qw_datawr_last) qw_datawr_left[21:7] <= qw_datawr_left[21:7] - 1;
        
        // Count AXI IDs
        if      (hrst)             ct_id <= 0;
        else if (ct_busy_r==2'b10) ct_id <= ct_id + 1;
435

436 437
        if      (hrst)             prd_id <= 0;
        else if (wcount_set)       prd_id <= prd_id + 1;
438

439 440
        if      (hrst)             dev_wr_id <= 0;
        else if (done_dev_wr)      dev_wr_id <= dev_wr_id + 1;
441

442 443
        if      (hrst)             dev_rd_id <= 0;
        else if (done_dev_rd)      dev_rd_id <= dev_rd_id + 1;
444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463
        
        
    end
    
    ahci_dma_rd_fifo #( // memory to device
        .WCNT_BITS    (21),
        .ADDRESS_BITS (3)
    ) ahci_dma_rd_fifo_i (
        .mrst         (mrst),                        // input
        .hrst         (hrst),                        // input
        .mclk         (mclk),                        // input
        .hclk         (hclk),                        // input
        .wcnt         (wcount[21:1]),                // input[20:0] 
        .woffs        (data_addr[2:1]),              // input[1:0] 
        .start        (prd_rd),                      // input
        .din          (afi_rdata),                   // input[63:0] 
        .din_av       (afi_rvalid),                  // input
        .din_av_many  (|afi_rcount[7:SAFE_RD_BITS]), // input
        .last_prd     (last_prd),                    // input
        .din_re       (data_afi_re),                 // output
464 465
        .done         (done_dev_wr),                 // output reg // @ hclk
        .done_flush   (done_flush),                  // output     // @ hclk
466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483
        .dout         (sys_out),                     // output[31:0] 
        .dout_vld     (sys_dav),                     // output
        .dout_re      (sys_re)                       // input
    );
    
    ahci_dma_wr_fifo #( // device to memory
        .WCNT_BITS    (21),
        .ADDRESS_BITS (3)
    ) ahci_dma_wr_fifo_i (
        .mrst         (mrst),           // input
        .hrst         (hrst),           // input
        .mclk         (mclk),           // input
        .hclk         (hclk),           // input
        .wcnt         (wcount[21:1]),   // input[20:0] 
        .woffs        (data_addr[2:1]), // input[1:0] 
        .init         (cmd_start_hclk), // input
        .start        (prd_wr),         // input
        .dout         (afi_wdata),      // output[63:0] reg 
484 485
//        .dout_av      (), // input
        .dout_av_many (afi_wcount_many),// input
486 487 488
        .last_prd     (last_prd),       // input
        .dout_we      (afi_wvalid),     // output
        .dout_wstb    (afi_wstb4),      // output[3:0] reg 
489
        .done         (done_dev_rd),    // output reg 
490
        .busy         (), // output
491
        .fifo_nempty_mclk  (fifo_nempty_mclk), // output reg 
492 493 494 495 496
        .din          (sys_in),         // input[31:0] 
        .din_rdy      (sys_nfull),      // output
        .din_avail    (sys_we)          // input
    );
    
497
    
498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518
    // mclk -> hclk cross-clock synchronization
    pulse_cross_clock #(
        .EXTRA_DLY(0)
    ) cmd_start_hclk_i (
        .rst       (mrst),            // input
        .src_clk   (mclk),            // input
        .dst_clk   (hclk),            // input
        .in_pulse  (cmd_start),       // input
        .out_pulse (cmd_start_hclk),    // output
        .busy()                       // output
    );
    pulse_cross_clock #(
        .EXTRA_DLY(0)
    ) cmd_abort_hclk_i (
        .rst       (mrst),            // input
        .src_clk   (mclk),            // input
        .dst_clk   (hclk),            // input
        .in_pulse  (cmd_abort),       // input
        .out_pulse (cmd_abort_hclk),    // output
        .busy()                       // output
    );
519 520 521 522 523 524 525 526 527 528 529 530
    pulse_cross_clock #(
        .EXTRA_DLY(0)
    ) prd_start_hclk_i (
        .rst       (mrst),            // input
        .src_clk   (mclk),            // input
        .dst_clk   (hclk),            // input
        .in_pulse  (prd_start_r),     // input
        .out_pulse (prd_start_hclk),  // output
        .busy()                       // output
    );


531 532 533 534 535 536 537 538 539 540 541 542
    
    // hclk -> mclk;
    pulse_cross_clock #(
        .EXTRA_DLY(0)
    ) cmd_done_i (
        .rst       (hrst),            // input
        .src_clk   (hclk),            // input
        .dst_clk   (mclk),            // input
        .in_pulse  (cmd_done_hclk),            // input
        .out_pulse (cmd_done),       // output
        .busy()                       // output
    );
543

544 545 546 547 548 549 550 551 552 553
    pulse_cross_clock #(
        .EXTRA_DLY(0)
    ) ct_done_mclk_i (
        .rst       (hrst),            // input
        .src_clk   (hclk),            // input
        .dst_clk   (mclk),            // input
        .in_pulse  (ct_done),         // input
        .out_pulse (ct_done_mclk),    // output
        .busy()                       // output
    );
554

555 556 557 558 559 560 561 562 563 564 565 566 567 568
    pulse_cross_clock #(
        .EXTRA_DLY(0)
    ) prd_done_mclk_i (
        .rst       (hrst),            // input
        .src_clk   (hclk),            // input
        .dst_clk   (mclk),            // input
        .in_pulse  (done_dev_wr || done_dev_rd),         // input
        .out_pulse (prd_done),        // output
        .busy()                       // output
    );




569 570
endmodule