Commit ca269a20 authored by Andrey Filippov's avatar Andrey Filippov

Implemented parallel 32-bit escaping of 0xff bytes

parent 3a04a2c6
......@@ -28,11 +28,11 @@ module bit_stuffer_27_32#(
input [DIN_LEN-1:0] din, // input data, MSB aligned
input [4:0] dlen, // input data width
input ds, // input data valid
input flush_in, // flush remaining data
input flush_in, // flush remaining data - should be after last ds. Also prepares for the next block
output [31:0] d_out, // outpt 32-bit data
output reg [2:0] bytes_out, // bytes left when flush?
output reg [1:0] bytes_out, // (0 means 4) valid with dv
output reg dv, // output data valid
output flush_out // delayed flush in matching the data latency
output reg flush_out // delayed flush in matching the data latency
);
localparam DATA1_LEN = DIN_LEN + 32 - 8;
localparam DATA2_LEN = DIN_LEN + 32 - 2;
......@@ -40,30 +40,32 @@ module bit_stuffer_27_32#(
reg [DATA1_LEN-1:0] data1; // first stage of the barrel shifter
reg [DATA2_LEN-1:0] data2; // second stage of the barrel shifter
reg [DATA3_LEN-1:0] data3; // second stage of the barrel shifter/ output register
// reg dv_r;
// assign dv = dv_r;
reg [5:0] early_length; // number of bits in the last word (mod 32)
reg [5:0] dlen1; // use for the stage 2, MSB - carry out
reg [5:0] dlen2; // use for the satge 3
reg [5:0] dlen2; // use for the stege 3
reg [31:0] dmask2_rom; // data mask (sync with data2) - 1 use new data, 0 - use old data. Use small ROM?
reg [2:0] stage; // enable shifter stage
wire [5:0] pre_bits_out_w = dlen2[4:0] + 5'h7;
reg [1:0] stage; // delayed ds or flush
reg [1:0] ds_stage;
reg [2:0] flush_stage;
wire [4:0] pre_bits_out_w = dlen2[4:0] + 5'h7;
assign d_out = data3[DATA3_LEN-1 -: 32];
always @ (posedge xclk) begin
if (rst) bytes_out <= 0;
else if (stage[1]) bytes_out <= pre_bits_out_w[5:3];
if (rst) stage <= 0;
else stage <= {stage[1:0], ds};
else stage <= {stage[0], ds | flush_in};
if (rst) ds_stage <= 0;
else ds_stage <= {ds_stage[0], ds};
if (rst) early_length <= 0;
if (rst) flush_stage <= 0;
else flush_stage <= {flush_stage[1:0], ds};
if (rst || flush_in) early_length <= 0;
else if (ds) early_length <= early_length[4:0] + dlen; // early_length[5] is not used in calculations, it is just carry out
if (rst) dlen1 <= 0;
......@@ -72,6 +74,7 @@ module bit_stuffer_27_32#(
if (rst) dlen2 <= 0;
else if (stage[0]) dlen2 <= dlen1; // previous value (position)
// barrel shifter stage 1 (0/8/16/24)
if (ds) case (early_length[4:3])
2'h0: data1 <= { din, 24'b0};
......@@ -122,13 +125,18 @@ module bit_stuffer_27_32#(
5'h1f: dmask2_rom <= 32'h80000000;
endcase
// barrel shifter stage 3 (0/1), combined with output/hold register
if (stage[1]) begin
if (ds_stage[1]) begin
data3[DATA3_LEN-1 -: 32] <= (~dmask2_rom & (dlen2[5] ? {data3[DATA3_LEN-1-32 : 0],6'b0}: data3[DATA3_LEN-1 -: 32])) |
( dmask2_rom & (dlen2[0] ? {1'b0,data2[DATA2_LEN-1 -: 31]} : data2[DATA2_LEN-1 -: 32]));
data3[DATA3_LEN-1-32: 0] <= dlen2[0] ? data2[DATA2_LEN-31-1 : 0] : {data2[DATA2_LEN-32-1 : 0], 1'b0};
end
dv <= stage[1] && dlen2[5];
dv <= (ds_stage[1] && dlen2[5]) || (flush_stage[1] && !(|data3[DATA3_LEN-1 -: 32]));
if (rst || ds_stage[1]) bytes_out <= 0; // if the dv was caused by 32 bits full - output 4 bytes
else if (flush_stage[1]) bytes_out <= pre_bits_out_w[4:3];
flush_out <= flush_stage[2];
end
......
/*******************************************************************************
* Module: bit_stuffer_escape
* Date:2015-10-24
* Author: andrey
* Description: Escapes each 0xff with 0x00, 32-bit input and output
*
* Copyright (c) 2015 Elphel, Inc .
* bit_stuffer_escape.v is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* bit_stuffer_escape.v is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/> .
*******************************************************************************/
`timescale 1ns/1ps
module bit_stuffer_escape(
input xclk, // pixel clock, sync to incoming data
input rst, // @xclk
// data from external FIFO (35x16 should be OK)
input [31:0] din, // input data, MSB aligned
input [1:0] bytes_in, // number of bytes, valid @ ds (0 means 4)
input flush_in, // end of input data (ignore din/bytes_in)
input in_stb, // input data/bytes_in/flush_in strobe
output reg [31:0] d_out, // output 32-bit data
output reg [1:0] bytes_out, // valid @dv(only), 0 means 4 bytes
output reg dv, // output data valid
output reg flush_out // delayed flush in matching the data latency
);
wire [3:0] in_ff = {&din[31:24],&din[23:16],&din[15:8],&din[7:0]};
wire [3:0] fifo_nempty;
wire [3:0] fifo_ff;
wire [3:0] fifo_re;
wire [31:0] fifo_pre_out;
// mask output for flushing
wire [31:0] fifo_out = fifo_pre_out & {{8{fifo_nempty[3]}},{8{fifo_nempty[2]}},{8{fifo_nempty[1]}},{8{fifo_nempty[0]}}};
reg [2:0] flush_pend;
reg [3:0] bytes_in_mask_w;
always @* case (bytes_in)
2'h0 : bytes_in_mask_w <= 4'b1111;
2'h1 : bytes_in_mask_w <= 4'b1000;
2'h2 : bytes_in_mask_w <= 4'b1100;
2'h3 : bytes_in_mask_w <= 4'b1110;
endcase
generate
genvar i;
for (i = 0; i < 4; i = i+1) begin: byte_fifo_block
fifo_same_clock #(
.DATA_WIDTH(9),
.DATA_DEPTH(4)
) fifo_same_clock_i (
.rst (1'b0), // input
.clk (xclk), // input
.sync_rst (rst), // input
.we (in_stb && bytes_in_mask_w[i]), // input
.re (fifo_re[i]), // input
.data_in ({in_ff[i],din[8*i +: 8]}), // input[15:0]
.data_out ({fifo_ff[i],fifo_pre_out[8*i +: 8]}), // output[15:0]
.nempty (fifo_nempty[i]), // output
.half_full () // output reg
);
end
endgenerate
reg cry_ff; // 0xff was the last byte in the previous word
reg [1:0] fifo_byte_pntr; // byte pointer in fifo output, starting from MSB (0)
wire [3:0] fifo_ff_barrel_w = fifo_byte_pntr[1]?
(fifo_byte_pntr[0]?{fifo_ff[0],fifo_ff[3:1]}:{fifo_ff[1:0],fifo_ff[3:2]}):
(fifo_byte_pntr[0]?{fifo_ff[2:0],fifo_ff[3]}:fifo_ff[3:0]);
wire [3:0] fifo_nempty_barrel_w = fifo_byte_pntr[1]?
(fifo_byte_pntr[0]?{fifo_nempty[0],fifo_nempty[3:1]}:{fifo_nempty[1:0],fifo_nempty[3:2]}):
(fifo_byte_pntr[0]?{fifo_nempty[2:0],fifo_nempty[3]}:fifo_nempty[3:0]);
wire [31:0] fifo_out_barrel_w = fifo_byte_pntr[1]?
(fifo_byte_pntr[0]?{fifo_out[7:0], fifo_out[31: 8]}:{fifo_out[15:0],fifo_out[31:16]}):
(fifo_byte_pntr[0]?{fifo_out[23:0],fifo_out[31:24]}:fifo_out[31:0]);
// folowing registers are combinatorial signals
reg sel3_w; // select source for byte3 (MSB) from the barrel-shifted:0, it's own, 1 - zero (escape)
reg [1:0] sel2_w; // select source for byte2 from the barrel-shifted: 0, it's own, 1 - next higher byte, 3 - zero (escape)
reg [1:0] sel1_w; // select source for byte1 from the barrel-shifted: 0, it's own, 1 - next higher byte, 3 - zero (escape)
reg [1:0] sel0_w; // select source for byte0 (LSB) from the barrel-shifted: 0, it's own, 1 - next higher byte, 2 - two bytes higher,
// 3 - zero (escape)
reg cry_ff_w; // next value for cry_ff
reg [3:0] bytes_rdy_w; // data is available to generate an output word
wire rdy_w = &bytes_rdy_w;
reg [1:0] num_zeros_w; // number of escape zeros in the output word
reg [3:0] fifo_re_mask_w; // which fifo to read, bitmask (to be AND-ed with &bytes_rdy_w[3:0]}
always @* casex ({cry_ff,fifo_ff_barrel_w})
5'b0xxxx: sel3_w <= 0;
default: sel3_w <= 1;
endcase
always @* casex ({cry_ff,fifo_ff_barrel_w})
5'b00xxx: sel2_w <= 0;
5'b1xxxx: sel2_w <= 1;
default: sel2_w <= 3;
endcase
always @* casex ({cry_ff,fifo_ff_barrel_w})
5'b000xx: sel1_w <= 0;
5'b01xxx: sel1_w <= 1;
5'b10xxx: sel1_w <= 1;
default: sel1_w <= 3;
endcase
always @* casex ({cry_ff,fifo_ff_barrel_w})
5'b0000x: sel0_w <= 0;
5'b001xx: sel0_w <= 1;
5'b010xx: sel0_w <= 1;
5'b100xx: sel0_w <= 1;
5'b11xxx: sel0_w <= 2;
default: sel0_w <= 3;
endcase
always @* casex ({cry_ff,fifo_ff_barrel_w})
5'b00001: cry_ff_w <= 1;
5'b0011x: cry_ff_w <= 1;
5'b0101x: cry_ff_w <= 1;
5'b1001x: cry_ff_w <= 1;
5'b111xx: cry_ff_w <= 1;
default: cry_ff_w <= 0;
endcase
always @* case (sel3_w)
1'b0 : bytes_rdy_w[3] <= fifo_nempty_barrel_w[3];
1'b1 : bytes_rdy_w[3] <= 1;
endcase
always @* case (sel2_w)
2'b00 : bytes_rdy_w[2] <= fifo_nempty_barrel_w[2];
2'b01 : bytes_rdy_w[2] <= fifo_nempty_barrel_w[3];
2'b11 : bytes_rdy_w[2] <= 1;
default : bytes_rdy_w[2] <= 'bx;
endcase
always @* case (sel1_w)
2'b00 : bytes_rdy_w[1] <= fifo_nempty_barrel_w[1];
2'b01 : bytes_rdy_w[1] <= fifo_nempty_barrel_w[2];
2'b11 : bytes_rdy_w[1] <= 1;
default : bytes_rdy_w[1] <= 'bx;
endcase
always @* case (sel0_w)
2'b00 : bytes_rdy_w[0] <= fifo_nempty_barrel_w[0];
2'b01 : bytes_rdy_w[0] <= fifo_nempty_barrel_w[1];
2'b10 : bytes_rdy_w[0] <= fifo_nempty_barrel_w[2];
2'b11 : bytes_rdy_w[0] <= 1;
endcase
always @* casex ({cry_ff,fifo_ff_barrel_w})
5'b0001x: num_zeros_w <= 1;
5'b001xx: num_zeros_w <= 1;
5'b010xx: num_zeros_w <= 1;
5'b011xx: num_zeros_w <= 2;
5'b100xx: num_zeros_w <= 1;
5'b101xx: num_zeros_w <= 2;
5'b110xx: num_zeros_w <= 2;
default: num_zeros_w <= 0;
endcase
always @* casex ({num_zeros_w,fifo_byte_pntr})
4'b00xx: fifo_re_mask_w <= 4'b1111;
4'b0100: fifo_re_mask_w <= 4'b1110;
4'b0101: fifo_re_mask_w <= 4'b0111;
4'b0110: fifo_re_mask_w <= 4'b1011;
4'b0111: fifo_re_mask_w <= 4'b1101;
4'b1000: fifo_re_mask_w <= 4'b1100;
4'b1001: fifo_re_mask_w <= 4'b0110;
4'b1010: fifo_re_mask_w <= 4'b0011;
4'b1011: fifo_re_mask_w <= 4'b1001;
default: fifo_re_mask_w <= 'bx; // impossible num_zeros_w
endcase
assign fifo_re = flush_pend[1]? fifo_nempty : (rdy_w ? fifo_re_mask_w : 4'b0); // when flushing read whatever is left
always @(posedge xclk) begin
if (rst) cry_ff <= 0;
else if (rdy_w) cry_ff <=cry_ff_w;
if (rst) fifo_byte_pntr <= 0;
else if (rdy_w) fifo_byte_pntr <= fifo_byte_pntr - num_zeros_w;
dv <= rdy_w || (flush_pend[1] && (cry_ff || (|fifo_nempty)));
if (rdy_w || (flush_pend[1] && (cry_ff || (|fifo_nempty)))) begin
case (sel3_w)
1'b0 : d_out[31:24] <= fifo_out_barrel_w[31:24];
1'b1 : d_out[31:24] <= 8'b0;
endcase
case (sel2_w)
2'b00 : d_out[23:16] <= fifo_out_barrel_w[23:16];
2'b01 : d_out[23:16] <= fifo_out_barrel_w[31:24];
2'b11 : d_out[23:16] <= 8'b0;
default : d_out[23:16] <= 'bx;
endcase
case (sel1_w)
2'b00 : d_out[15: 8] <= fifo_out_barrel_w[15: 8];
2'b01 : d_out[15: 8] <= fifo_out_barrel_w[23:16];
2'b11 : d_out[15: 8] <= 8'b0;
default : d_out[15: 8] <= 'bx;
endcase
case (sel0_w)
2'b00 : d_out[ 7: 0] <= fifo_out_barrel_w[ 7: 0];
2'b01 : d_out[ 7: 0] <= fifo_out_barrel_w[15: 8];
2'b01 : d_out[ 7: 0] <= fifo_out_barrel_w[23:16];
2'b11 : d_out[ 7: 0] <= 8'b0;
default : d_out[ 7: 0] <= 'bx;
endcase
end
if (rst) flush_pend[0] <= 0;
else if (flush_in) flush_pend[0] <= 1;
else if (flush_pend[1]) flush_pend[0] <= 0;
if (rst) flush_pend[1] <= 0;
else flush_pend[1] <= flush_pend[0] &&!flush_pend[1] && !rdy_w;
flush_pend[2] <= flush_pend[1];
flush_out <= flush_pend[2];
if ( rdy_w || flush_pend[1]) casex(bytes_rdy_w[3:0])
4'b10xx : bytes_out <= 1;
4'b110x : bytes_out <= 2;
4'b1110 : bytes_out <= 3;
default : bytes_out <= 0; // all 4 bytes
endcase
end
endmodule
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment