Commit 3bccc0bb authored by Andrey Filippov's avatar Andrey Filippov

ported xdct module, added to compressor chain

parent c96dad8c
......@@ -290,18 +290,66 @@ module jp_channel#(
.caddrw (caddrw), // input[7:0]
.cwe (cwe), // input
.signed_c (signed_c), // input[8:0]
.do (yc_nodc), // output[9:0]
.avr (yc_avr), // output[8:0]
.dv (yc_nodc_dv), // output
.ds (yc_nodc_ds), // output
.tn (yc_nodc_tn), // output[2:0]
.first (yc_nodc_first), // output reg
.last (yc_nodc_last), // output reg
.do (yc_nodc), // output[9:0]
.avr (yc_avr), // output[8:0]
.dv (yc_nodc_dv), // output
.ds (yc_nodc_ds), // output
.tn (yc_nodc_tn), // output[2:0]
.first (yc_nodc_first), // output reg
.last (yc_nodc_last), // output reg
.component_num (yc_nodc_component_num), // output[2:0]
.component_color (yc_nodc_component_color), // output
.component_first (yc_nodc_component_first), // output
.component_lastinmb (yc_nodc_component_lastinmb) // output reg
);
// wire [ 9:0] yc_nodc; // [9:0] data out (4:2:0) (signed, average=0)
wire dct_last_in;
wire dct_pre_first_out;
wire dct_dv;
wire [12:0] dct_out;
xdct393 xdct393_i (
.clk (xclk), // input
.en (frame_en), // input if zero will reset transpose memory page numbers
.start (yc_nodc_ds), // input single-cycle start pulse that goes with the first pixel data. Other 63 should follow
.xin (yc_nodc), // input[9:0]
.last_in (dct_last_in), // output reg output high during input of the last of 64 pixels in a 8x8 block //
.pre_first_out (dct_pre_first_out), // outpu 1 cycle ahead of the first output in a 64 block
.dv (dct_dv), // output data output valid. Will go high on the 94-th cycle after the start (now - on 95-th?)
.d_out (dct_out) // output[12:0]
);
reg quant_start;
always @ (posedge xclk) quant_start <= dct_pre_first_out;
/*
xdct i_xdct ( .clk(clk), // top level module
.en(cmprs_en), // if zero will reset transpose memory page numbers
.start(dct_start), // single-cycle start pulse that goes with the first pixel data. Other 63 should follow
.xin(color_d[9:0]), // [7:0] - input data
.last_in(dct_last_in), // output high during input of the last of 64 pixels in a 8x8 block //
.pre_first_out(dct_pre_first_out),// 1 cycle ahead of the first output in a 64 block
.dv(dct_dv), // data output valid. Will go high on the 94-th cycle after the start
.d_out(dct_out[12:0]));// [12:0]output data
// probably dcc things are not needed anymore
always @ (posedge clk) quant_start <= dct_pre_first_out;
always @ (posedge clk) begin
if (!dccout) dcc_en <=1'b0;
else if (dct_start && color_first && (color_tn[2:0]==3'b001)) dcc_en <=1'b1; // 3'b001 - closer to the first "start" in quantizator
end
wire [15:0] quant_dc_tdo;// MSB aligned coefficient for the DC component (used in focus module)
wire [2:0] coring_num;
FDE_1 i_coring_num0 (.C(clk2x),.CE(wr_quantizer_mode),.D(di[ 0]),.Q(coring_num[0]));
FDE_1 i_coring_num1 (.C(clk2x),.CE(wr_quantizer_mode),.D(di[ 1]),.Q(coring_num[1]));
FDE_1 i_coring_num2 (.C(clk2x),.CE(wr_quantizer_mode),.D(di[ 2]),.Q(coring_num[2]));
*/
endmodule
/**********************************************************************
** -----------------------------------------------------------------------------**
** xdct393.v
**
** 8x8 discrete Cosine Transform
**
** Copyright (C) 2002-2015 Elphel, Inc
**
** -----------------------------------------------------------------------------**
** xdct393 is free software - hardware description language (HDL) code.
**
** This program is free software: you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation, either version 3 of the License, or
** (at your option) any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program. If not, see <http://www.gnu.org/licenses/>.
** -----------------------------------------------------------------------------**
**
** Modified by Andrey Filippov - goal to make it work in start/stop mode, using
** "start" input (going together with the first data, no restriction on the gap between 64-pixel blocks (>=0)
** Removed "RST" input ("en" is only used to reset ping-pong transpose memory address)
** Split module in 2 stages
** Also saved some area - original design compiled by XST to 865 slices (XC2S300e), this one - 780!
**
** It is based on the original design (Xilix app. note XAPP610) by:
** Author: Latha Pillai
** Senior Applications Engineer
**
** Video Applications
** Advanced Products Group
** Xilinx, Inc.
**
** Copyright (c) 2001 Xilinx, Inc.
** All rights reserved
**
** Date: Feb. 10, 2002
**
** RESTRICTED RIGHTS LEGEND
**
** This software has not been published by the author, and
** has been disclosed to others for the purpose of enhancing
** and promoting design productivity in Xilinx products.
**
** Therefore use, duplication or disclosure, now and in the
** future should give consideration to the productivity
** enhancements afforded the user of this code by the author's
** efforts. Thank you for using our products !
**
** Disclaimer: THESE DESIGNS ARE PROVIDED "AS IS" WITH NO WARRANTY
** WHATSOEVER AND XILINX SPECIFICALLY DISCLAIMS ANY
** IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR
** A PARTICULAR PURPOSE, OR AGAINST INFRINGEMENT.
***********************************************************************/
/*
after I added DC subtraction before DCT I got 9-bit (allthough not likely to go out of 8bit range) signed data.
also increased transpose memory to 9 bits (anyway it is 16-bit wide) - see if it will help to prevent saturation
without significant increase in gates
Saturatuion is still visible on real pictures, but there was a bug - addsub<i>a_comp, addsub<i>b_comp where not using their
MSB. I added 1 more bit to add_sub<i>a and add_sub<i>b and fixed that bug. Only 2 mofre slices were used
Device utilization summary:
Number of External GCLKIOBs 1 out of 4 25%
Number of External IOBs 23 out of 178 12%
Number of LOCed External IOBs 0 out of 23 0%
Number of BLOCKRAMs 1 out of 16 6%
Number of SLICEs 855 out of 3072 27%
Number of GCLKs 1 out of 4 25%
*/
// still not enough - maybe one tiny bit more??
/*
Device utilization summary:
Number of External GCLKIOBs 1 out of 4 25%
Number of External IOBs 26 out of 178 14%
Number of LOCed External IOBs 0 out of 26 0%
Number of BLOCKRAMs 1 out of 16 6%
Number of SLICEs 837 out of 3072 27%
Number of GCLKs 1 out of 4 25%
*/
`timescale 1ns/1ps
// For xdct353 - increasing data in 9 bits -> 10 bits, out 12 bits ->13 bits
module xdct393 (
input clk, // system clock, posedge
input en, // if zero will reset transpose memory page njumbers
input start, // single-cycle start pulse that goes with the first pixel data. Other 63 should follow
input [9:0] xin, // [7:0] - input data
output reg last_in, // output high during input of the last of 64 pixels in a 8x8 block
output pre_first_out, // 1 cycle ahead of the first output in a 64 block
output dv, // data output valid. Will go high on the 94-th cycle after the start
output [12:0] d_out); // [8:0]output data
wire stage1_done;
wire tm_page;
wire tm_we;
wire [6:0] tm_ra;
wire [6:0] tm_wa;
wire [15:0] tm_out;
wire [15:0] tm_di;
reg stage1_done_r; // delay by one clock to use memory output register
wire tm_re=1'b1; // TODO: generate, for now just 1'b1
reg tm_regen;
always @ (posedge clk) begin
last_in <= (tm_wa[5:0]== 6'h30);
stage1_done_r <= stage1_done_r;
tm_regen <= tm_re;
end
dct393_stage1 i_dct_stage1(
.clk(clk),
.en(en),
.start(start),
.xin(xin), // [7:0]
.we(tm_we), // write to transpose memory
.wr_cntr(tm_wa), // [6:0] transpose memory write address
.z_out(tm_di[15:0]),
.page(tm_page),
.done(stage1_done));
dct393_stage2 i_dct_stage2(
.clk(clk),
.en(en),
.start(stage1_done), // stage 1 finished, data available in transpose memory
.page(tm_page), // transpose memory page finished, valid at start
.rd_cntr(tm_ra[6:0]), // [6:0] transpose memory read address
.tdin(tm_out[15:0]), // [7:0] - data from transpose memory
.endv(pre_first_out),
.dv(dv), // data output valid
.dct2_out(d_out[12:0]));// [10:0]output data
ram18_var_w_var_r #(
.REGISTERS (1),
.LOG2WIDTH_WR (4),
.LOG2WIDTH_RD (4),
.DUMMY(0)
) i_transpose_mem (
.rclk (clk), // input
.raddr ({3'b0,tm_ra[6:0]}), // input[9:0]
.ren (tm_re), // input
.regen (tm_regen), // input
.data_out (tm_out[15:0]), // output[15:0]
.wclk (clk), // input
.waddr ({3'b0,tm_wa[6:0]}), // input[9:0]
.we (tm_we), // input
.web (4'hf), // input[3:0]
.data_in (tm_di[15:0]) // input[15:0]
);
endmodule
// 01/24/2004: Moved all clocks in stage 1 to "negedge" to reduce current pulses
module dct393_stage1 (
input clk, // system clock, posedge
input en,
input start, // single-cycle start pulse to replace RST
input [ 9:0] xin, // [7:0]
output reg we, // write to transpose memory
output reg [ 6:0] wr_cntr, // [6:0] transpose memory write address
output reg [15:0] z_out, //data to transpose memory
output reg page, // transpose memory page just filled (valid @ done)
output reg done); // last cycle writing to transpose memory - may use after it (move it earlier?)
/* constants */
localparam C3= 16'd54491;
localparam S3= 16'd36410;
localparam C4= 16'd46341;
localparam C6= 16'd25080;
localparam S6= 16'd60547;
localparam C7= 16'd12785;
localparam S7= 16'd64277;
reg [16:0] memory1a, memory2a, memory3a, memory4a;
/* 1D section */
/* The max value of a pixel after processing (to make their expected mean to zero)
is 127. If all the values in a row are 127, the max value of the product terms
would be (127*2)*(23170/256) and that of z_out_int would be (127*8)*23170/256.
This value divided by 2raised to 8 is equivalent to ignoring the 8 lsb bits of the value */
reg [ 9:0] xa0_in, xa1_in, xa2_in, xa3_in, xa4_in, xa5_in, xa6_in, xa7_in;
reg [ 9:0] xa0_reg, xa1_reg, xa2_reg, xa3_reg, xa4_reg, xa5_reg, xa6_reg, xa7_reg;
// reg [ 9:0] addsub1a_comp, addsub2a_comp, addsub3a_comp, addsub4a_comp;
reg [10:0] addsub1a_comp, addsub2a_comp, addsub3a_comp, addsub4a_comp; // AF2015: increasing width - was limiting
reg [10:0] add_sub1a, add_sub2a, add_sub3a, add_sub4a;
reg save_sign1a, save_sign2a, save_sign3a, save_sign4a;
reg [17:0] p1a, p2a, p3a, p4a;
wire [35:0] p1a_all, p2a_all, p3a_all, p4a_all;
reg toggleA;
reg [18:0] z_out_int1, z_out_int2;
reg [18:0] z_out_int;
wire [15:0] z_out_prelatch;
reg [ 2:0] indexi;
/* clks and counters */
reg [ 6:0] wr_cntr_prelatch;
/* memory section */
reg done_prelatch;
reg we_prelatch;
wire enwe;
wire pre_sxregs;
reg sxregs;
reg page_prelatch;
// TODO: See if negedge is needed
wire nclk = ~clk; // seems that everything here is running at negedge (and delays too), but not the transpose memory
// to conserve energy by disabling toggleA
wire sxregs_d8;
reg enable_toggle;
// SRL16_1 i_sxregs_d8 (.Q(sxregs_d8), .A0(1'b1), .A1(1'b1), .A2(1'b1), .A3(1'b0), .CLK(clk),.D(sxregs)); // dly=7+1
dly_16 #(.WIDTH(1)) i_sxregs_d8(.clk(nclk),.rst(1'b0), .dly(7), .din(sxregs), .dout(sxregs_d8)); // dly=7+1
// SRL16_1 i_pre_sxregs (.Q(pre_sxregs), .A0(1'b0), .A1(1'b1), .A2(1'b1), .A3(1'b0), .CLK(clk), .D(start)); // dly=6+1
dly_16 #(.WIDTH(1)) i_pre_sxregs(.clk(nclk),.rst(1'b0), .dly(6), .din(start), .dout(pre_sxregs)); // dly=6+1
// SRL16_1 i_enwe (.Q(enwe), .A0(1'b1), .A1(1'b0), .A2(1'b1), .A3(1'b0), .CLK(clk), .D(pre_sxregs)); // dly=5+1
dly_16 #(.WIDTH(1)) i_enwe(.clk(nclk),.rst(1'b0), .dly(5), .din(pre_sxregs), .dout(enwe)); // dly=5+1
always @ (posedge nclk) begin
enable_toggle <= en && (sxregs || (enable_toggle && !sxregs_d8));
done_prelatch<= (wr_cntr_prelatch[5:0]==6'h3f);
if (wr_cntr_prelatch[5:0]==6'h3f) page_prelatch <= wr_cntr_prelatch[6];
we_prelatch<= enwe || (en && we_prelatch && (wr_cntr_prelatch[5:0]!=6'h3f));
if (!en) wr_cntr_prelatch <= 7'b0;
else if (we_prelatch) wr_cntr_prelatch <= wr_cntr_prelatch + 1;
sxregs <= pre_sxregs || ((wr_cntr_prelatch[2:0]==3'h1) && (wr_cntr_prelatch[5:3]!=3'h7));
toggleA <= sxregs || (enable_toggle && (~toggleA));
if (sxregs) indexi <= 3'h7;
else if (enable_toggle) indexi<=indexi+1;
end
/* 1D-DCT BEGIN */
// store 1D-DCT constant coeeficient values for multipliers */
always @ (posedge nclk) begin
case (indexi)
0 : begin memory1a <= {1'b0,C4}; //8'd91
memory2a <= {1'b0,C4}; //8'd91
memory3a <= {1'b0,C4}; //8'd91
memory4a <= {1'b0,C4}; //8'd91
end
1 : begin memory1a <= {1'b0,S7}; //8'd126;
memory2a <= {1'b0,C3}; //8'd106;
memory3a <= {1'b0,S3}; //8'd71;
memory4a <= {1'b0,C7}; //8'd25;
end
2 : begin memory1a <= {1'b0,S6}; //8'd118;
memory2a <= {1'b0,C6}; //8'd49;
memory3a <= {1'b1,C6}; //-8'd49;
memory4a <= {1'b1,S6}; //-8'd118
end
3 : begin memory1a <= {1'b0,C3}; // 8'd106;
memory2a <= {1'b1,C7}; //-8'd25;
memory3a <= {1'b1,S7}; //-8'd126;
memory4a <= {1'b1,S3}; //-8'd71;
end
4 : begin memory1a <= {1'b0,C4}; // 8'd91;
memory2a <= {1'b1,C4}; //-8'd91;
memory3a <= {1'b1,C4}; //-8'd91;
memory4a <= {1'b0,C4}; // 8'd91;
end
5 : begin memory1a <= {1'b0,S3}; // 8'd71;
memory2a <= {1'b1,S7}; //-8'd126;
memory3a <= {1'b0,C7}; // 8'd25;
memory4a <= {1'b0,C3}; // 8'd106;
end
6 : begin memory1a <= {1'b0,C6}; // 8'd49;
memory2a <= {1'b1,S6}; //-8'd118;
memory3a <= {1'b0,S6}; // 8'd118;
memory4a <= {1'b1,C6}; //-8'd49;
end
7 : begin memory1a <= {1'b0,C7}; // 8'd25;
memory2a <= {1'b1,S3}; //-8'd71;
memory3a <= {1'b0,C3}; // 8'd106;
memory4a <= {1'b1,S7}; //-8'd126;
end
endcase
end
/* 8-bit input shifted 8 times through a shift register*/
// xa0_in will see output registers from posedge, may be replaced by latches if needed - but currently delay is under 5ns
always @ (posedge nclk) begin
xa0_in <= xin;
xa1_in <= xa0_in;
xa2_in <= xa1_in;
xa3_in <= xa2_in;
xa4_in <= xa3_in;
xa5_in <= xa4_in;
xa6_in <= xa5_in;
xa7_in <= xa6_in;
end
/* shifted inputs registered every 8th clk (using cntr8)*/
always @ (posedge nclk) if (sxregs) begin
xa0_reg <= xa0_in;
xa1_reg <= xa1_in;
xa2_reg <= xa2_in;
xa3_reg <= xa3_in;
xa4_reg <= xa4_in;
xa5_reg <= xa5_in;
xa6_reg <= xa6_in;
xa7_reg <= xa7_in;
end
/* adder / subtractor block */
always @ (negedge clk)
if (toggleA == 1'b1) begin
add_sub1a <= {xa7_reg[9],xa7_reg[9:0]} + {xa0_reg[9],xa0_reg[9:0]};
add_sub2a <= {xa6_reg[9],xa6_reg[9:0]} + {xa1_reg[9],xa1_reg[9:0]};
add_sub3a <= {xa5_reg[9],xa5_reg[9:0]} + {xa2_reg[9],xa2_reg[9:0]};
add_sub4a <= {xa4_reg[9],xa4_reg[9:0]} + {xa3_reg[9],xa3_reg[9:0]};
end else begin
add_sub1a <= {xa7_reg[9],xa7_reg[9:0]} - {xa0_reg[9],xa0_reg[9:0]};
add_sub2a <= {xa6_reg[9],xa6_reg[9:0]} - {xa1_reg[9],xa1_reg[9:0]};
add_sub3a <= {xa5_reg[9],xa5_reg[9:0]} - {xa2_reg[9],xa2_reg[9:0]};
add_sub4a <= {xa4_reg[9],xa4_reg[9:0]} - {xa3_reg[9],xa3_reg[9:0]};
end
// First valid add_sub appears at the 10th clk (8 clks for shifting inputs,
// 9th clk for registering shifted input and 10th clk for add_sub
// to synchronize the i value to the add_sub value, i value is incremented
// only after 10 clks
always @ (posedge nclk) begin
save_sign1a <= add_sub1a[10];
save_sign2a <= add_sub2a[10];
save_sign3a <= add_sub3a[10];
save_sign4a <= add_sub4a[10];
addsub1a_comp <= add_sub1a[10]? (-add_sub1a) : add_sub1a;
addsub2a_comp <= add_sub2a[10]? (-add_sub2a) : add_sub2a;
addsub3a_comp <= add_sub3a[10]? (-add_sub3a) : add_sub3a;
addsub4a_comp <= add_sub4a[10]? (-add_sub4a) : add_sub4a;
end
assign p1a_all = addsub1a_comp * memory1a; //[15:0]; // TODO: Check - memory is [16:0] !
assign p2a_all = addsub2a_comp * memory2a; //[15:0];
assign p3a_all = addsub3a_comp * memory3a; //[15:0];
assign p4a_all = addsub4a_comp * memory4a; //[15:0];
always @ (posedge nclk)
begin
p1a <= (save_sign1a ^ memory1a[16]) ? (-p1a_all[26:9]) :(p1a_all[26:9]);
p2a <= (save_sign2a ^ memory2a[16]) ? (-p2a_all[26:9]) :(p2a_all[26:9]);
p3a <= (save_sign3a ^ memory3a[16]) ? (-p3a_all[26:9]) :(p3a_all[26:9]);
p4a <= (save_sign4a ^ memory4a[16]) ? (-p4a_all[26:9]) :(p4a_all[26:9]);
end
/* Final adder. Adding the ouputs of the 4 multipliers */
always @ (posedge nclk) begin
z_out_int1 <= ({p1a[17],p1a} + {p2a[17],p2a});
z_out_int2 <= ({p3a[17],p3a} + {p4a[17],p4a});
z_out_int <= (z_out_int1 + z_out_int2);
end
// rounding of the value
assign z_out_prelatch[15:0] = z_out_int[18:3]+ z_out_int[2]; // correct rounding
// outputs from output latches to cross clock edge boundary
always @ (posedge clk) begin
z_out[15:0] <= z_out_prelatch[15:0];
wr_cntr[6:0] <= wr_cntr_prelatch[6:0];
done <= done_prelatch;
we <= we_prelatch;
page <= page_prelatch;
end
/* 1D-DCT END */
endmodule
module dct393_stage2 (
input clk, // system clock, posedge
input en,
input start, // stage 1 finished, data available in transpose memory
input page, // transpose memory page finished, valid at start
output [6:0] rd_cntr, // [6:0] transpose memory read address
input [15:0] tdin, // [15:0] - data from transpose memory, added 6 bit fractional part
output endv, // one cycle ahead of starting (continuing) dv
output reg dv, // data output valid
output [12:0] dct2_out);// [8:0]output data
/* constants */
localparam C3= 16'd54491;
localparam S3= 16'd36410;
localparam C4= 16'd46341;
localparam C6= 16'd25080;
localparam S6= 16'd60547;
localparam C7= 16'd12785;
localparam S7= 16'd64277;
reg [16:0] memory1a, memory2a, memory3a, memory4a;
reg [2:0] indexi;
/* 2D section */
reg [15:0] xb0_in, xb1_in, xb2_in, xb3_in, xb4_in, xb5_in, xb6_in, xb7_in;
reg [15:0] xb0_reg, xb1_reg, xb2_reg, xb3_reg, xb4_reg, xb5_reg, xb6_reg, xb7_reg;
reg [16:0] add_sub1b, add_sub2b, add_sub3b, add_sub4b;
// reg [15:0] addsub1b_comp, addsub2b_comp, addsub3b_comp, addsub4b_comp;
reg [16:0] addsub1b_comp, addsub2b_comp, addsub3b_comp, addsub4b_comp; // AF2015: increased to match result
reg save_sign1b, save_sign2b, save_sign3b, save_sign4b;
reg [18:0] p1b, p2b, p3b, p4b;
wire [35:0] p1b_all, p2b_all, p3b_all, p4b_all;
reg toggleB;
reg [19:0] dct2d_int1, dct2d_int2;
reg [20:0] dct_2d_int;
wire [12:0] dct_2d_rnd;
// transpose memory read address
reg [ 5:0] rd_cntrs;
reg rd_page;
// start with the same as stage1
wire sxregs;
// to conserve energy by disabling toggleB
wire sxregs_d8;
reg enable_toggle;
reg en_started;
wire disdv; // AF2015: was missing
// SRL16 i_endv (.Q(endv), .A0(1'b0), .A1(1'b1), .A2(1'b1), .A3(1'b1), .CLK(clk), .D(start)); // dly=14+1
dly_16 #(.WIDTH(1)) i_endv(.clk(clk),.rst(1'b0), .dly(14), .din(start), .dout(endv)); // dly=14+1
// SRL16 i_disdv (.Q(disdv), .A0(1'b0), .A1(1'b1), .A2(1'b1), .A3(1'b1), .CLK(clk), .D(rd_cntr[5:0]==6'h3f)); // dly=14+1
dly_16 #(.WIDTH(1)) i_disdv(.clk(clk),.rst(1'b0), .dly(14), .din(rd_cntr[5:0]==6'h3f), .dout(disdv)); // dly=14+1
// SRL16 i_sxregs (.Q(sxregs), .A0(1'b0), .A1(1'b0), .A2(1'b0), .A3(1'b1), .CLK(clk),.D((rd_cntr[5:3]==3'h0) && en_started)); // dly=8+1
dly_16 #(.WIDTH(1)) i_sxregs(.clk(clk),.rst(1'b0), .dly(8), .din((rd_cntr[5:3]==3'h0) && en_started), .dout(sxregs)); // dly=8+1
// SRL16 i_sxregs_d8 (.Q(sxregs_d8), .A0(1'b1), .A1(1'b1), .A2(1'b1), .A3(1'b0), .CLK(clk),.D(sxregs && en_started)); // dly=7+1
dly_16 #(.WIDTH(1)) i_sxregs_d8(.clk(clk),.rst(1'b0), .dly(7), .din(sxregs && en_started), .dout(sxregs_d8)); // dly=7+1
always @ (posedge clk) begin
enable_toggle <= en && (sxregs || (enable_toggle && !sxregs_d8));
en_started <= en && (start || en_started);
dv <= en && (endv || (dv && ~disdv));
toggleB <= sxregs || (enable_toggle && (~toggleB));
if (sxregs) indexi <= 3'h7;
else if (enable_toggle) indexi<=indexi+1;
if (start) rd_page <= page;
if (start) rd_cntrs[5:0] <=6'b0; // will always count, but that does not matter- What about saving energy ;-) ? Saved...
else if (rd_cntrs[5:0]!=6'h3f) rd_cntrs[5:0] <= rd_cntrs[5:0]+1;
end
assign rd_cntr[6:0]= {rd_page,rd_cntrs[2:0],rd_cntrs[5:3]}; // transposed counter
// duplicate memory<i>a from stage 1
// store 1D-DCT constant coeeficient values for multipliers */
always @ (posedge clk) begin
case (indexi)
0 : begin memory1a <= {1'b0,C4}; //8'd91
memory2a <= {1'b0,C4}; //8'd91
memory3a <= {1'b0,C4}; //8'd91
memory4a <= {1'b0,C4}; //8'd91
end
1 : begin memory1a <= {1'b0,S7}; //8'd126;
memory2a <= {1'b0,C3}; //8'd106;
memory3a <= {1'b0,S3}; //8'd71;
memory4a <= {1'b0,C7}; //8'd25;
end
2 : begin memory1a <= {1'b0,S6}; //8'd118;
memory2a <= {1'b0,C6}; //8'd49;
memory3a <= {1'b1,C6}; //-8'd49;
memory4a <= {1'b1,S6}; //-8'd118
end
3 : begin memory1a <= {1'b0,C3}; // 8'd106;
memory2a <= {1'b1,C7}; //-8'd25;
memory3a <= {1'b1,S7}; //-8'd126;
memory4a <= {1'b1,S3}; //-8'd71;
end
4 : begin memory1a <= {1'b0,C4}; // 8'd91;
memory2a <= {1'b1,C4}; //-8'd91;
memory3a <= {1'b1,C4}; //-8'd91;
memory4a <= {1'b0,C4}; // 8'd91;
end
5 : begin memory1a <= {1'b0,S3}; // 8'd71;
memory2a <= {1'b1,S7}; //-8'd126;
memory3a <= {1'b0,C7}; // 8'd25;
memory4a <= {1'b0,C3}; // 8'd106;
end
6 : begin memory1a <= {1'b0,C6}; // 8'd49;
memory2a <= {1'b1,S6}; //-8'd118;
memory3a <= {1'b0,S6}; // 8'd118;
memory4a <= {1'b1,C6}; //-8'd49;
end
7 : begin memory1a <= {1'b0,C7}; // 8'd25;
memory2a <= {1'b1,S3}; //-8'd71;
memory3a <= {1'b0,C3}; // 8'd106;
memory4a <= {1'b1,S7}; //-8'd126;
end
endcase
end
always @ (posedge clk) begin
xb0_in <= tdin;
xb1_in <= xb0_in;
xb2_in <= xb1_in;
xb3_in <= xb2_in;
xb4_in <= xb3_in;
xb5_in <= xb4_in;
xb6_in <= xb5_in;
xb7_in <= xb6_in;
end
/* register inputs, inputs read in every eighth clk*/
always @ (posedge clk) if (sxregs) begin
xb0_reg <= xb0_in;
xb1_reg <= xb1_in;
xb2_reg <= xb2_in;
xb3_reg <= xb3_in;
xb4_reg <= xb4_in;
xb5_reg <= xb5_in;
xb6_reg <= xb6_in;
xb7_reg <= xb7_in;
end
always @ (posedge clk)
if (toggleB == 1'b1) begin
add_sub1b <= {xb7_reg[15],xb7_reg[15:0]} + {xb0_reg[15],xb0_reg[15:0]};
add_sub2b <= {xb6_reg[15],xb6_reg[15:0]} + {xb1_reg[15],xb1_reg[15:0]};
add_sub3b <= {xb5_reg[15],xb5_reg[15:0]} + {xb2_reg[15],xb2_reg[15:0]};
add_sub4b <= {xb4_reg[15],xb4_reg[15:0]} + {xb3_reg[15],xb3_reg[15:0]};
end else begin
add_sub1b <= {xb7_reg[15],xb7_reg[15:0]} - {xb0_reg[15],xb0_reg[15:0]};
add_sub2b <= {xb6_reg[15],xb6_reg[15:0]} - {xb1_reg[15],xb1_reg[15:0]};
add_sub3b <= {xb5_reg[15],xb5_reg[15:0]} - {xb2_reg[15],xb2_reg[15:0]};
add_sub4b <= {xb4_reg[15],xb4_reg[15:0]} - {xb3_reg[15],xb3_reg[15:0]};
end
always @ (posedge clk) begin
save_sign1b <= add_sub1b[16];
save_sign2b <= add_sub2b[16];
save_sign3b <= add_sub3b[16];
save_sign4b <= add_sub4b[16];
addsub1b_comp <= add_sub1b[16]? (-add_sub1b) : add_sub1b;
addsub2b_comp <= add_sub2b[16]? (-add_sub2b) : add_sub2b;
addsub3b_comp <= add_sub3b[16]? (-add_sub3b) : add_sub3b;
addsub4b_comp <= add_sub4b[16]? (-add_sub4b) : add_sub4b;
end
// assign p1b_all = addsub1b_comp[15:0] * memory1a[15:0]; // TODO: Check - memory is 16:0
// assign p2b_all = addsub2b_comp[15:0] * memory2a[15:0];
// assign p3b_all = addsub3b_comp[15:0] * memory3a[15:0];
// assign p4b_all = addsub4b_comp[15:0] * memory4a[15:0];
assign p1b_all = addsub1b_comp * memory1a; // AF2015: TODO: Check - memory is 16:0
assign p2b_all = addsub2b_comp * memory2a;
assign p3b_all = addsub3b_comp * memory3a;
assign p4b_all = addsub4b_comp * memory4a;
always @ (posedge clk) begin
/// Next line was simulated differently in Icarus 0.9 (wrong?) than in Icarus 0.8 (right?)
/// Xilinx probably did as 0.8
/// p1b_all[31:14] - 18-bit number, p1b - 19-bit. in 0.9 (-p1b_all[31:14]) was also 18, not expand to 19 bits, 0.8 - did
/// p1b <= (save_sign1b ^ memory1a[16]) ? (-p1b_all[31:14]) :(p1b_all[31:14]);
p1b[18:0] <= (save_sign1b ^ memory1a[16]) ? (-p1b_all[32:14]) :(p1b_all[32:14]);
p2b[18:0] <= (save_sign2b ^ memory2a[16]) ? (-p2b_all[32:14]) :(p2b_all[32:14]);
p3b[18:0] <= (save_sign3b ^ memory3a[16]) ? (-p3b_all[32:14]) :(p3b_all[32:14]);
p4b[18:0] <= (save_sign4b ^ memory4a[16]) ? (-p4b_all[32:14]) :(p4b_all[32:14]);
end
/* multiply the outputs of the add/sub block with the 8 sets of stored coefficients */
/* Final adder. Adding the ouputs of the 4 multipliers */
always @ (posedge clk) begin
dct2d_int1 <= ({p1b[18],p1b[18:0]} + {p2b[18],p2b[18:0]});
dct2d_int2 <= ({p3b[18],p3b[18:0]} + {p4b[18],p4b[18:0]});
dct_2d_int <= ({dct2d_int1[19],dct2d_int1[19:0]} + {dct2d_int2[19],dct2d_int2[19:0]});
end
assign dct_2d_rnd[12:0] = dct_2d_int[20:8];
assign dct2_out[12:0] = dct_2d_rnd[12:0] + dct_2d_int[7];
endmodule
/*******************************************************************************
* Module: ram18_var_w_var_r
* Date:2015-06-16
* Author: andrey
* Description: Half-BRAM module wrapper to use as a variable width R/W, no parity
*
* Copyright (c) 2015 <set up in Preferences-Verilog/VHDL Editor-Templates> .
* ram18_var_w_var_r.v is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* ram18_var_w_var_r.v is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/> .
*******************************************************************************/
`timescale 1ns/1ps
/*
Address/data widths
Connect unused data to 1b0, unused addresses - to 1'b1
RAMB18E1 in True Dual Port (TDP) Mode - each port individually
+-----------+---------+---------+---------+
|Data Width | Address | Data | Parity |
+-----------+---------+---------+---------+
| 1 | A[13:0] | D[0] | --- |
| 2 | A[13:1] | D[1:0] | --- |
| 4 | A[13:2] | D[3:0[ | --- |
| 9 | A[13:3] | D[7:0] | DP[0] |
| 18 | A[13:4] | D[15:0] | DP[1:0] |
+-----------+---------+---------+---------+
RAMB18E1 in Simple Dual Port (SDP) Mode
one of the ports (r or w) - 32/36 bits, other - variable
+------------+---------+---------+---------+
|Data Widths | Address | Data | Parity |
+------------+---------+---------+---------+
| 32/ 1 | A[13:0] | D[0] | --- |
| 32/ 2 | A[13:1] | D[1:0] | --- |
| 32/ 4 | A[13:2] | D[3:0[ | --- |
| 36/ 9 | A[13:3] | D[7:0] | DP[0] |
| 36/ 18 | A[13:4] | D[15:0] | DP[1:0] |
| 36/ 36 | A[13:5] | D[31:0] | DP[3:0] |
+------------+---------+---------+---------+
RAMB36E1 in True Dual Port (TDP) Mode - each port individually
+-----------+---------+---------+---------+
|Data Width | Address | Data | Parity |
+-----------+---------+---------+---------+
| 1 | A[14:0] | D[0] | --- |
| 2 | A[14:1] | D[1:0] | --- |
| 4 | A[14:2] | D[3:0[ | --- |
| 9 | A[14:3] | D[7:0] | DP[0] |
| 18 | A[14:4] | D[15:0] | DP[1:0] |
| 36 | A[14:5] | D[31:0] | DP[3:0] |
|1(Cascade) | A[15:0] | D[0] | --- |
+-----------+---------+---------+---------+
RAMB36E1 in Simple Dual Port (SDP) Mode
one of the ports (r or w) - 64/72 bits, other - variable
+------------+---------+---------+---------+
|Data Widths | Address | Data | Parity |
+------------+---------+---------+---------+
| 64/ 1 | A[14:0] | D[0] | --- |
| 64/ 2 | A[14:1] | D[1:0] | --- |
| 64/ 4 | A[14:2] | D[3:0[ | --- |
| 64/ 9 | A[14:3] | D[7:0] | DP[0] |
| 64/ 18 | A[14:4] | D[15:0] | DP[1:0] |
| 64/ 36 | A[14:5] | D[31:0] | DP[3:0] |
| 64/ 72 | A[14:6] | D[63:0] | DP[7:0] |
+------------+---------+---------+---------+
*/
module ram18_var_w_var_r
#(
parameter integer REGISTERS = 0, // 1 - registered output
parameter integer LOG2WIDTH_WR = 5, // WIDTH= 9 << (LOG2WIDTH - 3)
parameter integer LOG2WIDTH_RD = 5, // WIDTH= 9 << (LOG2WIDTH - 3)
parameter DUMMY = 0
)
(
input rclk, // clock for read port
// input [ 9:0] raddr, // read address
input [13-LOG2WIDTH_RD:0] raddr, // read address
input ren, // read port enable
input regen, // output register enable
output [(1 << LOG2WIDTH_RD)-1:0] data_out, // data out
input wclk, // clock for read port
input [13-LOG2WIDTH_WR:0] waddr, // write address
input we, // write port enable
input [ 3:0] web, // write byte enable
input [(1 << LOG2WIDTH_WR)-1:0] data_in // data out
);
generate
if (DUMMY)
ram18_dummy #(
.LOG2WIDTH_RD(LOG2WIDTH_RD)
) ram18_dummy_i (
.data_out(data_out)
);
else if ((LOG2WIDTH_WR == 5) && (LOG2WIDTH_RD == 5))
ram18_32w_32r #(
.REGISTERS (REGISTERS)
) ram_i (
.rclk (rclk), // input
.raddr (raddr), // input[8:0]
.ren (ren), // input
.regen (regen), // input
.data_out (data_out), // output[35:0]
.wclk (wclk), // input
.waddr (waddr), // input[8:0]
.we (we), // input
.web (web), // input[3:0]
.data_in (data_in) // input[35:0]
);
else if ((LOG2WIDTH_WR == 5) && (LOG2WIDTH_RD < 5))
ram18_32w_lt32r #(
.REGISTERS (REGISTERS),
.LOG2WIDTH_RD (LOG2WIDTH_RD)
) ram_i (
.rclk (rclk), // input
.raddr (raddr), // input[(>8):0]
.ren (ren), // input
.regen (regen), // input
.data_out (data_out), // output[(<35):0]
.wclk (wclk), // input
.waddr (waddr), // input[8:0]
.we (we), // input
.web (web), // input[3:0]
.data_in (data_in) // input[35:0]
);
else if ((LOG2WIDTH_WR < 5) && (LOG2WIDTH_RD == 5))
ram18_lt32w_32r #(
.REGISTERS (REGISTERS),
.LOG2WIDTH_WR (LOG2WIDTH_WR)
) ram_i (
.rclk (rclk), // input
.raddr (raddr), // input[8:0]
.ren (ren), // input
.regen (regen), // input
.data_out (data_out), // output[35:0]
.wclk (wclk), // input
.waddr (waddr), // input[(>8):0]
.we (we), // input
.web (web), // input[3:0]
.data_in (data_in) // input[(<35):0]
);
else if ((LOG2WIDTH_WR < 5) && (LOG2WIDTH_RD < 5))
ram18_lt32w_lt32r #(
.REGISTERS (REGISTERS),
.LOG2WIDTH_WR (LOG2WIDTH_WR),
.LOG2WIDTH_RD (LOG2WIDTH_RD)
) ram_i (
.rclk (rclk), // input
.raddr (raddr), // input[(>8):0]
.ren (ren), // input
.regen (regen), // input
.data_out (data_out), // output[(<35):0]
.wclk (wclk), // input
.waddr (waddr), // input[(>8):0]
.we (we), // input
.web (web), // input[7:0]
.data_in (data_in) // input[(<35):0]
);
endgenerate
endmodule
// Both ports with 32 bit widths
module ram18_32w_32r
#(
parameter integer REGISTERS = 0 // 1 - registered output
)
(
input rclk, // clock for read port
input [8:0] raddr, // read address
input ren, // read port enable
input regen, // output register enable
output [31:0] data_out, // data out
input wclk, // clock for read port
input [ 8:0] waddr, // write address
input we, // write port enable
input [ 3:0] web, // write byte enable
input [31:0] data_in // data out
);
localparam PWIDTH_WR=72;
localparam PWIDTH_RD=72;
RAMB18E1
#(
.RSTREG_PRIORITY_A ("RSTREG"), // Valid: "RSTREG" or "REGCE"
.RSTREG_PRIORITY_B ("RSTREG"), // Valid: "RSTREG" or "REGCE"
.DOA_REG (REGISTERS), // Valid: 0 (no output registers) and 1 - one output register (in SDP - to lower 18)
.DOB_REG (REGISTERS), // Valid: 0 (no output registers) and 1 - one output register (in SDP - to lower 18)
.READ_WIDTH_A (PWIDTH_RD), // Valid: 0,1,2,4,9,18 and in SDP mode - 36 (should be 0 if port is not used)
.READ_WIDTH_B (0), // Valid: 0,1,2,4,9,18 and in SDP mode - 36 (should be 0 if port is not used)
.WRITE_WIDTH_A (0), // Valid: 0,1,2,4,9,18 and in SDP mode - 36 (should be 0 if port is not used)
.WRITE_WIDTH_B (PWIDTH_WR), // Valid: 0,1,2,4,9,18 and in SDP mode - 36 (should be 0 if port is not used)
.RAM_MODE ("SDP"), // Valid "TDP" (true dual-port) and "SDP" - simple dual-port
.WRITE_MODE_A ("WRITE_FIRST"), // Valid: "WRITE_FIRST", "READ_FIRST", "NO_CHANGE"
.WRITE_MODE_B ("WRITE_FIRST"), // Valid: "WRITE_FIRST", "READ_FIRST", "NO_CHANGE"
.RDADDR_COLLISION_HWCONFIG ("DELAYED_WRITE"),// Valid: "DELAYED_WRITE","PERFORMANCE" (no access to the same page)
.SIM_COLLISION_CHECK ("ALL"), // Valid: "ALL", "GENERATE_X_ONLY", "NONE", and "WARNING_ONLY"
.INIT_FILE ("NONE"), // "NONE" or filename with initialization data
.SIM_DEVICE ("7SERIES") // Simulation device family - "VIRTEX6", "VIRTEX5" and "7_SERIES" // "7SERIES"
) RAMB36E1_i
(
// Port A (Read port in SDP mode):
.DOADO (data_out[15:0]), // Port A data/LSB data[15:0], output
.DOPADOP (data_out[17:16]),// Port A parity/LSB parity[2:0], output
.DIADI (data_in[15:0]), // Port A data/LSB data[15:0], input
.DIPADIP (data_in[17:16]), // Port A parity/LSB parity[2:0], input
.ADDRARDADDR ({raddr[8:0],5'b11111}), // Port A (read port in SDP) address [13:0], unused should be high, input
.CLKARDCLK (rclk), // Port A (read port in SDP) clock, input
.ENARDEN (ren), // Port A (read port in SDP) Enable, input
.REGCEAREGCE (regen), // Port A (read port in SDP) register enable, input
.RSTRAMARSTRAM (1'b0), // Port A (read port in SDP) set/reset, input
.RSTREGARSTREG (1'b0), // Port A (read port in SDP) register set/reset, input
.WEA (2'b0), // Port A (read port in SDP) Write Enable[2:0], input
// Port B
.DOBDO (data_out[31:16]),// Port B data/MSB data[15:0], output
.DOPBDOP (), // Port B parity/MSB parity[2:0], output
.DIBDI (data_in[31:16]), // Port B data/MSB data[31:0], input
.DIPBDIP (2'b0), // Port B parity/MSB parity[1:0], input
.ADDRBWRADDR ({waddr[8:0],5'b11111}), // Port B (write port in SDP) address [13:0], unused should be high, input
.CLKBWRCLK (wclk), // Port B (write port in SDP) clock, input
.ENBWREN (we), // Port B (write port in SDP) Enable, input
.REGCEB (1'b0), // Port B (write port in SDP) register enable, input
.RSTRAMB (1'b0), // Port B (write port in SDP) set/reset, input
.RSTREGB (1'b0), // Port B (write port in SDP) register set/reset, input
.WEBWE (web) // Port B (write port in SDP) Write Enable[7:0], input
);
endmodule
// Both ports with less than 32 bit widths
module ram18_lt32w_lt32r
#(
parameter integer REGISTERS = 0, // 1 - registered output
parameter integer LOG2WIDTH_WR = 4, // WIDTH= 1 << LOG2WIDTH
parameter integer LOG2WIDTH_RD = 4 // WIDTH= 1 << LOG2WIDTH
)
(
input rclk, // clock for read port
input [13-LOG2WIDTH_RD:0] raddr, // read address
input ren, // read port enable
input regen, // output register enable
output [(1 << LOG2WIDTH_RD)-1:0] data_out, // data out
input wclk, // clock for read port
input [13-LOG2WIDTH_WR:0] waddr, // write address
input we, // write port enable
input [ 3:0] web, // write byte enable
input [(1 << LOG2WIDTH_WR)-1:0] data_in // data out
);
localparam PWIDTH_WR = (LOG2WIDTH_WR > 2)? (9 << (LOG2WIDTH_WR - 3)): (1 << LOG2WIDTH_WR);
localparam PWIDTH_RD = (LOG2WIDTH_RD > 2)? (9 << (LOG2WIDTH_RD - 3)): (1 << LOG2WIDTH_RD);
localparam WIDTH_WR = 1 << LOG2WIDTH_WR;
localparam WIDTH_RD = 1 << LOG2WIDTH_RD;
wire [15:0] data_out16;
assign data_out=data_out16[WIDTH_RD-1:0];
wire [WIDTH_WR+15:0] data_in_ext = {16'b0,data_in[WIDTH_WR-1:0]};
wire [15:0] data_in16=data_in_ext[15:0];
RAMB18E1
#(
.RSTREG_PRIORITY_A ("RSTREG"), // Valid: "RSTREG" or "REGCE"
.RSTREG_PRIORITY_B ("RSTREG"), // Valid: "RSTREG" or "REGCE"
.DOA_REG (REGISTERS), // Valid: 0 (no output registers) and 1 - one output register (in SDP - to lower 18)
.DOB_REG (REGISTERS), // Valid: 0 (no output registers) and 1 - one output register (in SDP - to lower 18)
.READ_WIDTH_A (PWIDTH_RD), // Valid: 0,1,2,4,9,18 and in SDP mode - 36 (should be 0 if port is not used)
.READ_WIDTH_B (0), // Valid: 0,1,2,4,9,18 and in SDP mode - 36 (should be 0 if port is not used)
.WRITE_WIDTH_A (0), // Valid: 0,1,2,4,9,18 and in SDP mode - 36 (should be 0 if port is not used)
.WRITE_WIDTH_B (PWIDTH_WR), // Valid: 0,1,2,4,9,18 and in SDP mode - 36 (should be 0 if port is not used)
.RAM_MODE ("TDP"), // Valid "TDP" (true dual-port) and "SDP" - simple dual-port
.WRITE_MODE_A ("WRITE_FIRST"), // Valid: "WRITE_FIRST", "READ_FIRST", "NO_CHANGE"
.WRITE_MODE_B ("WRITE_FIRST"), // Valid: "WRITE_FIRST", "READ_FIRST", "NO_CHANGE"
.RDADDR_COLLISION_HWCONFIG ("DELAYED_WRITE"),// Valid: "DELAYED_WRITE","PERFORMANCE" (no access to the same page)
.SIM_COLLISION_CHECK ("ALL"), // Valid: "ALL", "GENERATE_X_ONLY", "NONE", and "WARNING_ONLY"
.INIT_FILE ("NONE"), // "NONE" or filename with initialization data
.SIM_DEVICE ("7SERIES") // Simulation device family - "VIRTEX6", "VIRTEX5" and "7_SERIES" // "7SERIES"
) RAMB36E1_i
(
// Port A (Read port in SDP mode):
.DOADO (data_out16), // Port A data/LSB data[15:0], output
.DOPADOP (), // Port A parity/LSB parity[1:0], output
.DIADI (16'h0), // Port A data/LSB data[15:0], input
.DIPADIP (2'h0), // Port A parity/LSB parity[1:0], input
.ADDRARDADDR ({raddr,{LOG2WIDTH_RD{1'b1}}}), // Port A (read port in SDP) address [13:0], unused should be high, input
.CLKARDCLK (rclk), // Port A (read port in SDP) clock, input
.ENARDEN (ren), // Port A (read port in SDP) Enable, input
.REGCEAREGCE (regen), // Port A (read port in SDP) register enable, input
.RSTRAMARSTRAM (1'b0), // Port A (read port in SDP) set/reset, input
.RSTREGARSTREG (1'b0), // Port A (read port in SDP) register set/reset, input
.WEA (2'b0), // Port A (read port in SDP) Write Enable[3:0], input
// Port B
.DOBDO (), // Port B data/MSB data[31:0], output
.DOPBDOP (), // Port B parity/MSB parity[3:0], output
.DIBDI (data_in16), // Port B data/MSB data[31:0], input
.DIPBDIP (2'b0), // Port B parity/MSB parity[3:0], input
.ADDRBWRADDR ({waddr,{LOG2WIDTH_WR{1'b1}}}), // Port B (write port in SDP) address [13:0], unused should be high, input
.CLKBWRCLK (wclk), // Port B (write port in SDP) clock, input
.ENBWREN (we), // Port B (write port in SDP) Enable, input
.REGCEB (1'b0), // Port B (write port in SDP) register enable, input
.RSTRAMB (1'b0), // Port B (write port in SDP) set/reset, input
.RSTREGB (1'b0), // Port B (write port in SDP) register set/reset, input
.WEBWE (web[3:0]) // Port B (write port in SDP) Write Enable[3:0], input
);
endmodule
// Write port less than 32bits, read port 32 bit widths
module ram18_lt32w_32r
#(
parameter integer REGISTERS = 0, // 1 - registered output
parameter integer LOG2WIDTH_WR = 4 // WIDTH= 1 << LOG2WIDTH
)
(
input rclk, // clock for read port
input [8:0] raddr, // read address
input ren, // read port enable
input regen, // output register enable
output [31:0] data_out, // data out
input wclk, // clock for read port
input [13-LOG2WIDTH_WR:0] waddr, // write address
input we, // write port enable
input [ 3:0] web, // write byte enable
input [(1 << LOG2WIDTH_WR)-1:0] data_in // data out
);
localparam PWIDTH_WR = (LOG2WIDTH_WR > 2)? (9 << (LOG2WIDTH_WR - 3)): (1 << LOG2WIDTH_WR);
localparam PWIDTH_RD = 36;
localparam WIDTH_WR = 1 << LOG2WIDTH_WR;
wire [WIDTH_WR+15:0] data_in_ext = {16'b0,data_in[WIDTH_WR-1:0]};
wire [15:0] data_in16=data_in_ext[15:0];
RAMB18E1
#(
.RSTREG_PRIORITY_A ("RSTREG"), // Valid: "RSTREG" or "REGCE"
.RSTREG_PRIORITY_B ("RSTREG"), // Valid: "RSTREG" or "REGCE"
.DOA_REG (REGISTERS), // Valid: 0 (no output registers) and 1 - one output register (in SDP - to lower 18)
.DOB_REG (REGISTERS), // Valid: 0 (no output registers) and 1 - one output register (in SDP - to lower 18)
.READ_WIDTH_A (PWIDTH_RD), // Valid: 0,1,2,4,9,18 and in SDP mode - 36 (should be 0 if port is not used)
.READ_WIDTH_B (0), // Valid: 0,1,2,4,9,18 and in SDP mode - 36 (should be 0 if port is not used)
.WRITE_WIDTH_A (0), // Valid: 0,1,2,4,9,18 and in SDP mode - 36 (should be 0 if port is not used)
.WRITE_WIDTH_B (PWIDTH_WR), // Valid: 0,1,2,4,9,18 and in SDP mode - 36 (should be 0 if port is not used)
.RAM_MODE ("SDP"), // Valid "TDP" (true dual-port) and "SDP" - simple dual-port
.WRITE_MODE_A ("WRITE_FIRST"), // Valid: "WRITE_FIRST", "READ_FIRST", "NO_CHANGE"
.WRITE_MODE_B ("WRITE_FIRST"), // Valid: "WRITE_FIRST", "READ_FIRST", "NO_CHANGE"
.RDADDR_COLLISION_HWCONFIG ("DELAYED_WRITE"),// Valid: "DELAYED_WRITE","PERFORMANCE" (no access to the same page)
.SIM_COLLISION_CHECK ("ALL"), // Valid: "ALL", "GENERATE_X_ONLY", "NONE", and "WARNING_ONLY"
.INIT_FILE ("NONE"), // "NONE" or filename with initialization data
.SIM_DEVICE ("7SERIES") // Simulation device family - "VIRTEX6", "VIRTEX5" and "7_SERIES" // "7SERIES"
) RAMB36E1_i
(
// Port A (Read port in SDP mode):
.DOADO (data_out[15:0]), // Port A data/LSB data[15:0], output
.DOPADOP (data_out[17:16]),// Port A parity/LSB parity[3:0], output
.DIADI (16'h0), // Port A data/LSB data[31:0], input
.DIPADIP (2'h0), // Port A parity/LSB parity[3:0], input
.ADDRARDADDR ({raddr[8:0],5'b11111}), // Port A (read port in SDP) address [15:0]. used from [14] down, unused should be high, input
.CLKARDCLK (rclk), // Port A (read port in SDP) clock, input
.ENARDEN (ren), // Port A (read port in SDP) Enable, input
.REGCEAREGCE (regen), // Port A (read port in SDP) register enable, input
.RSTRAMARSTRAM (1'b0), // Port A (read port in SDP) set/reset, input
.RSTREGARSTREG (1'b0), // Port A (read port in SDP) register set/reset, input
.WEA (2'b0), // Port A (read port in SDP) Write Enable[3:0], input
// Port B
.DOBDO (data_out[31:16]),// Port B data/MSB data[31:0], output
.DOPBDOP (), // Port B parity/MSB parity[3:0], output
.DIBDI (data_in16), // Port B data/MSB data[31:0], input
.DIPBDIP (2'b0), // Port B parity/MSB parity[3:0], input
.ADDRBWRADDR ({waddr,{LOG2WIDTH_WR{1'b1}}}), // Port B (write port in SDP) address [15:0]. used from [14] down, unused should be high, input
.CLKBWRCLK (wclk), // Port B (write port in SDP) clock, input
.ENBWREN (we), // Port B (write port in SDP) Enable, input
.REGCEB (1'b0), // Port B (write port in SDP) register enable, input
.RSTRAMB (1'b0), // Port B (write port in SDP) set/reset, input
.RSTREGB (1'b0), // Port B (write port in SDP) register set/reset, input
.WEBWE (web[3:0]) // Port B (write port in SDP) Write Enable[7:0], input
);
endmodule
// Write port 64 bita, read port - less than 64 bits
module ram18_32w_lt32r
#(
parameter integer REGISTERS = 0, // 1 - registered output
// parameter integer LOG2WIDTH_WR = 4, // WIDTH= 1 << LOG2WIDTH
parameter integer LOG2WIDTH_RD = 4 // WIDTH= 1 << LOG2WIDTH
)
(
input rclk, // clock for read port
input [13-LOG2WIDTH_RD:0] raddr, // read address
input ren, // read port enable
input regen, // output register enable
output [(1 << LOG2WIDTH_RD)-1:0] data_out, // data out
input wclk, // clock for read port
input [8:0] waddr, // write address
input we, // write port enable
input [ 3:0] web, // write byte enable
input [31:0] data_in // data out
);
localparam PWIDTH_WR = 72;
localparam PWIDTH_RD = (LOG2WIDTH_RD > 2)? (9 << (LOG2WIDTH_RD - 3)): (1 << LOG2WIDTH_RD);
localparam WIDTH_RD = 1 << LOG2WIDTH_RD;
wire [15:0] data_out16;
assign data_out=data_out16[WIDTH_RD-1:0];
RAMB18E1
#(
.RSTREG_PRIORITY_A ("RSTREG"), // Valid: "RSTREG" or "REGCE"
.RSTREG_PRIORITY_B ("RSTREG"), // Valid: "RSTREG" or "REGCE"
.DOA_REG (REGISTERS), // Valid: 0 (no output registers) and 1 - one output register (in SDP - to lower 18)
.DOB_REG (REGISTERS), // Valid: 0 (no output registers) and 1 - one output register (in SDP - to lower 18)
.READ_WIDTH_A (PWIDTH_RD), // Valid: 0,1,2,4,9,18 and in SDP mode - 36 (should be 0 if port is not used)
.READ_WIDTH_B (0), // Valid: 0,1,2,4,9,18 and in SDP mode - 36 (should be 0 if port is not used)
.WRITE_WIDTH_A (0), // Valid: 0,1,2,4,9,18 and in SDP mode - 36 (should be 0 if port is not used)
.WRITE_WIDTH_B (PWIDTH_WR), // Valid: 0,1,2,4,9,18 and in SDP mode - 36 (should be 0 if port is not used)
.RAM_MODE ("SDP"), // Valid "TDP" (true dual-port) and "SDP" - simple dual-port
.WRITE_MODE_A ("WRITE_FIRST"), // Valid: "WRITE_FIRST", "READ_FIRST", "NO_CHANGE"
.WRITE_MODE_B ("WRITE_FIRST"), // Valid: "WRITE_FIRST", "READ_FIRST", "NO_CHANGE"
.RDADDR_COLLISION_HWCONFIG ("DELAYED_WRITE"),// Valid: "DELAYED_WRITE","PERFORMANCE" (no access to the same page)
.SIM_COLLISION_CHECK ("ALL"), // Valid: "ALL", "GENERATE_X_ONLY", "NONE", and "WARNING_ONLY"
.INIT_FILE ("NONE"), // "NONE" or filename with initialization data
.SIM_DEVICE ("7SERIES") // Simulation device family - "VIRTEX6", "VIRTEX5" and "7_SERIES" // "7SERIES"
) RAMB36E1_i
(
// Port A (Read port in SDP mode):
.DOADO (data_out16), // Port A data/LSB data[15:0], output
.DOPADOP (), // Port A parity/LSB parity[1:0], output
.DIADI (data_in[15:0]), // Port A data/LSB data[15:0], input
.DIPADIP (2'b0), // Port A parity/LSB parity[1:0], input
.ADDRARDADDR ({raddr,{LOG2WIDTH_RD{1'b1}}}), // Port A (read port in SDP) address [13:0], unused should be high, input
.CLKARDCLK (rclk), // Port A (read port in SDP) clock, input
.ENARDEN (ren), // Port A (read port in SDP) Enable, input
.REGCEAREGCE (regen), // Port A (read port in SDP) register enable, input
.RSTRAMARSTRAM (1'b0), // Port A (read port in SDP) set/reset, input
.RSTREGARSTREG (1'b0), // Port A (read port in SDP) register set/reset, input
.WEA (2'b0), // Port A (read port in SDP) Write Enable[1:0], input
// Port B
.DOBDO (), // Port B data/MSB data[15:0], output
.DOPBDOP (), // Port B parity/MSB parity[1:0], output
.DIBDI (data_in[31:16]), // Port B data/MSB data[15:0], input
.DIPBDIP (2'b0), // Port B parity/MSB parity[1:0], input
.ADDRBWRADDR({waddr[8:0],5'b11111}), // Port B (write port in SDP) address [13:0], unused should be high, input
.CLKBWRCLK (wclk), // Port B (write port in SDP) clock, input
.ENBWREN (we), // Port B (write port in SDP) Enable, input
.REGCEB (1'b0), // Port B (write port in SDP) register enable, input
.RSTRAMB (1'b0), // Port B (write port in SDP) set/reset, input
.RSTREGB (1'b0), // Port B (write port in SDP) register set/reset, input
.WEBWE (web[3:0]) // Port B (write port in SDP) Write Enable[7:0], input
);
endmodule
module ram18_dummy
#(
parameter integer LOG2WIDTH_RD = 4 // WIDTH= 1 << LOG2WIDTH
)
(
output [(1 << LOG2WIDTH_RD)-1:0] data_out // data out
);
assign data_out=0;
endmodule
endmodule
......@@ -100,7 +100,7 @@ module ram18p_var_w_var_r
.data_out(data_out)
);
else if ((LOG2WIDTH_WR == 5) && (LOG2WIDTH_RD == 5))
ramp_32w_32r #(
ram18p_32w_32r #(
.REGISTERS (REGISTERS)
) ram_i (
.rclk (rclk), // input
......@@ -131,7 +131,7 @@ module ram18p_var_w_var_r
.data_in (data_in) // input[35:0]
);
else if ((LOG2WIDTH_WR < 5) && (LOG2WIDTH_RD == 5))
ramp_lt32w_32r #(
ram18p_lt32w_32r #(
.REGISTERS (REGISTERS),
.LOG2WIDTH_WR (LOG2WIDTH_WR)
) ram_i (
......@@ -147,7 +147,7 @@ module ram18p_var_w_var_r
.data_in (data_in) // input[(<35):0]
);
else if ((LOG2WIDTH_WR < 5) && (LOG2WIDTH_RD < 5))
ramp_lt32w_lt32r #(
ram18p_lt32w_lt32r #(
.REGISTERS (REGISTERS),
.LOG2WIDTH_WR (LOG2WIDTH_WR),
.LOG2WIDTH_RD (LOG2WIDTH_RD)
......@@ -168,7 +168,7 @@ endmodule
// Both ports with 32 bit widths
module ramp_32w_32r
module ram18p_32w_32r
#(
parameter integer REGISTERS = 0 // 1 - registered output
)
......@@ -237,7 +237,7 @@ endmodule
// Both ports with less than 32 bit widths
module ramp_lt32w_lt32r
module ram18p_lt32w_lt32r
#(
parameter integer REGISTERS = 0, // 1 - registered output
parameter integer LOG2WIDTH_WR = 4, // WIDTH= 1 << LOG2WIDTH
......@@ -321,7 +321,7 @@ module ramp_lt32w_lt32r
endmodule
// Write port less than 32bits, read port 32 bit widths
module ramp_lt32w_32r
module ram18p_lt32w_32r
#(
parameter integer REGISTERS = 0, // 1 - registered output
parameter integer LOG2WIDTH_WR = 4 // WIDTH= 1 << LOG2WIDTH
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment