From ca269a201430ea487923a373cb9595ba5b1f2a9b Mon Sep 17 00:00:00 2001 From: Andrey Filippov Date: Sun, 25 Oct 2015 16:45:08 -0600 Subject: [PATCH] Implemented parallel 32-bit escaping of 0xff bytes --- compressor_jp/bit_stuffer_27_32.v | 44 +++--- compressor_jp/bit_stuffer_escape.v | 244 +++++++++++++++++++++++++++++ 2 files changed, 270 insertions(+), 18 deletions(-) create mode 100644 compressor_jp/bit_stuffer_escape.v diff --git a/compressor_jp/bit_stuffer_27_32.v b/compressor_jp/bit_stuffer_27_32.v index 5d9728b..e1e9366 100644 --- a/compressor_jp/bit_stuffer_27_32.v +++ b/compressor_jp/bit_stuffer_27_32.v @@ -28,11 +28,11 @@ module bit_stuffer_27_32#( input [DIN_LEN-1:0] din, // input data, MSB aligned input [4:0] dlen, // input data width input ds, // input data valid - input flush_in, // flush remaining data + input flush_in, // flush remaining data - should be after last ds. Also prepares for the next block output [31:0] d_out, // outpt 32-bit data - output reg [2:0] bytes_out, // bytes left when flush? + output reg [1:0] bytes_out, // (0 means 4) valid with dv output reg dv, // output data valid - output flush_out // delayed flush in matching the data latency + output reg flush_out // delayed flush in matching the data latency ); localparam DATA1_LEN = DIN_LEN + 32 - 8; localparam DATA2_LEN = DIN_LEN + 32 - 2; @@ -40,31 +40,33 @@ module bit_stuffer_27_32#( reg [DATA1_LEN-1:0] data1; // first stage of the barrel shifter reg [DATA2_LEN-1:0] data2; // second stage of the barrel shifter reg [DATA3_LEN-1:0] data3; // second stage of the barrel shifter/ output register -// reg dv_r; - - -// assign dv = dv_r; reg [5:0] early_length; // number of bits in the last word (mod 32) reg [5:0] dlen1; // use for the stage 2, MSB - carry out - reg [5:0] dlen2; // use for the satge 3 + reg [5:0] dlen2; // use for the stege 3 + reg [31:0] dmask2_rom; // data mask (sync with data2) - 1 use new data, 0 - use old data. Use small ROM? - reg [2:0] stage; // enable shifter stage - wire [5:0] pre_bits_out_w = dlen2[4:0] + 5'h7; + reg [1:0] stage; // delayed ds or flush + reg [1:0] ds_stage; + reg [2:0] flush_stage; + wire [4:0] pre_bits_out_w = dlen2[4:0] + 5'h7; assign d_out = data3[DATA3_LEN-1 -: 32]; always @ (posedge xclk) begin - if (rst) bytes_out <= 0; - else if (stage[1]) bytes_out <= pre_bits_out_w[5:3]; - if (rst) stage <= 0; - else stage <= {stage[1:0], ds}; + else stage <= {stage[0], ds | flush_in}; + + if (rst) ds_stage <= 0; + else ds_stage <= {ds_stage[0], ds}; + + if (rst) flush_stage <= 0; + else flush_stage <= {flush_stage[1:0], ds}; - if (rst) early_length <= 0; - else if (ds) early_length <= early_length[4:0] + dlen; // early_length[5] is not used in calculations, it is just carry out + if (rst || flush_in) early_length <= 0; + else if (ds) early_length <= early_length[4:0] + dlen; // early_length[5] is not used in calculations, it is just carry out if (rst) dlen1 <= 0; else if (ds) dlen1 <= early_length; // previous value @@ -72,6 +74,7 @@ module bit_stuffer_27_32#( if (rst) dlen2 <= 0; else if (stage[0]) dlen2 <= dlen1; // previous value (position) + // barrel shifter stage 1 (0/8/16/24) if (ds) case (early_length[4:3]) 2'h0: data1 <= { din, 24'b0}; @@ -122,13 +125,18 @@ module bit_stuffer_27_32#( 5'h1f: dmask2_rom <= 32'h80000000; endcase // barrel shifter stage 3 (0/1), combined with output/hold register - if (stage[1]) begin + if (ds_stage[1]) begin data3[DATA3_LEN-1 -: 32] <= (~dmask2_rom & (dlen2[5] ? {data3[DATA3_LEN-1-32 : 0],6'b0}: data3[DATA3_LEN-1 -: 32])) | ( dmask2_rom & (dlen2[0] ? {1'b0,data2[DATA2_LEN-1 -: 31]} : data2[DATA2_LEN-1 -: 32])); data3[DATA3_LEN-1-32: 0] <= dlen2[0] ? data2[DATA2_LEN-31-1 : 0] : {data2[DATA2_LEN-32-1 : 0], 1'b0}; end - dv <= stage[1] && dlen2[5]; + dv <= (ds_stage[1] && dlen2[5]) || (flush_stage[1] && !(|data3[DATA3_LEN-1 -: 32])); + + if (rst || ds_stage[1]) bytes_out <= 0; // if the dv was caused by 32 bits full - output 4 bytes + else if (flush_stage[1]) bytes_out <= pre_bits_out_w[4:3]; + + flush_out <= flush_stage[2]; end diff --git a/compressor_jp/bit_stuffer_escape.v b/compressor_jp/bit_stuffer_escape.v new file mode 100644 index 0000000..061657a --- /dev/null +++ b/compressor_jp/bit_stuffer_escape.v @@ -0,0 +1,244 @@ +/******************************************************************************* + * Module: bit_stuffer_escape + * Date:2015-10-24 + * Author: andrey + * Description: Escapes each 0xff with 0x00, 32-bit input and output + * + * Copyright (c) 2015 Elphel, Inc . + * bit_stuffer_escape.v is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * bit_stuffer_escape.v is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + *******************************************************************************/ +`timescale 1ns/1ps + +module bit_stuffer_escape( + input xclk, // pixel clock, sync to incoming data + input rst, // @xclk + // data from external FIFO (35x16 should be OK) + input [31:0] din, // input data, MSB aligned + input [1:0] bytes_in, // number of bytes, valid @ ds (0 means 4) + input flush_in, // end of input data (ignore din/bytes_in) + input in_stb, // input data/bytes_in/flush_in strobe + output reg [31:0] d_out, // output 32-bit data + output reg [1:0] bytes_out, // valid @dv(only), 0 means 4 bytes + output reg dv, // output data valid + output reg flush_out // delayed flush in matching the data latency +); + wire [3:0] in_ff = {&din[31:24],&din[23:16],&din[15:8],&din[7:0]}; + wire [3:0] fifo_nempty; + wire [3:0] fifo_ff; + wire [3:0] fifo_re; + wire [31:0] fifo_pre_out; + // mask output for flushing + wire [31:0] fifo_out = fifo_pre_out & {{8{fifo_nempty[3]}},{8{fifo_nempty[2]}},{8{fifo_nempty[1]}},{8{fifo_nempty[0]}}}; + reg [2:0] flush_pend; + + reg [3:0] bytes_in_mask_w; + always @* case (bytes_in) + 2'h0 : bytes_in_mask_w <= 4'b1111; + 2'h1 : bytes_in_mask_w <= 4'b1000; + 2'h2 : bytes_in_mask_w <= 4'b1100; + 2'h3 : bytes_in_mask_w <= 4'b1110; + endcase + + + generate + genvar i; + for (i = 0; i < 4; i = i+1) begin: byte_fifo_block + fifo_same_clock #( + .DATA_WIDTH(9), + .DATA_DEPTH(4) + ) fifo_same_clock_i ( + .rst (1'b0), // input + .clk (xclk), // input + .sync_rst (rst), // input + .we (in_stb && bytes_in_mask_w[i]), // input + .re (fifo_re[i]), // input + .data_in ({in_ff[i],din[8*i +: 8]}), // input[15:0] + .data_out ({fifo_ff[i],fifo_pre_out[8*i +: 8]}), // output[15:0] + .nempty (fifo_nempty[i]), // output + .half_full () // output reg + ); + end + endgenerate + + reg cry_ff; // 0xff was the last byte in the previous word + reg [1:0] fifo_byte_pntr; // byte pointer in fifo output, starting from MSB (0) + wire [3:0] fifo_ff_barrel_w = fifo_byte_pntr[1]? + (fifo_byte_pntr[0]?{fifo_ff[0],fifo_ff[3:1]}:{fifo_ff[1:0],fifo_ff[3:2]}): + (fifo_byte_pntr[0]?{fifo_ff[2:0],fifo_ff[3]}:fifo_ff[3:0]); + + wire [3:0] fifo_nempty_barrel_w = fifo_byte_pntr[1]? + (fifo_byte_pntr[0]?{fifo_nempty[0],fifo_nempty[3:1]}:{fifo_nempty[1:0],fifo_nempty[3:2]}): + (fifo_byte_pntr[0]?{fifo_nempty[2:0],fifo_nempty[3]}:fifo_nempty[3:0]); + + wire [31:0] fifo_out_barrel_w = fifo_byte_pntr[1]? + (fifo_byte_pntr[0]?{fifo_out[7:0], fifo_out[31: 8]}:{fifo_out[15:0],fifo_out[31:16]}): + (fifo_byte_pntr[0]?{fifo_out[23:0],fifo_out[31:24]}:fifo_out[31:0]); + +// folowing registers are combinatorial signals + reg sel3_w; // select source for byte3 (MSB) from the barrel-shifted:0, it's own, 1 - zero (escape) + reg [1:0] sel2_w; // select source for byte2 from the barrel-shifted: 0, it's own, 1 - next higher byte, 3 - zero (escape) + reg [1:0] sel1_w; // select source for byte1 from the barrel-shifted: 0, it's own, 1 - next higher byte, 3 - zero (escape) + reg [1:0] sel0_w; // select source for byte0 (LSB) from the barrel-shifted: 0, it's own, 1 - next higher byte, 2 - two bytes higher, + // 3 - zero (escape) + reg cry_ff_w; // next value for cry_ff + reg [3:0] bytes_rdy_w; // data is available to generate an output word + wire rdy_w = &bytes_rdy_w; + reg [1:0] num_zeros_w; // number of escape zeros in the output word + reg [3:0] fifo_re_mask_w; // which fifo to read, bitmask (to be AND-ed with &bytes_rdy_w[3:0]} + + always @* casex ({cry_ff,fifo_ff_barrel_w}) + 5'b0xxxx: sel3_w <= 0; + default: sel3_w <= 1; + endcase + + always @* casex ({cry_ff,fifo_ff_barrel_w}) + 5'b00xxx: sel2_w <= 0; + 5'b1xxxx: sel2_w <= 1; + default: sel2_w <= 3; + endcase + + always @* casex ({cry_ff,fifo_ff_barrel_w}) + 5'b000xx: sel1_w <= 0; + 5'b01xxx: sel1_w <= 1; + 5'b10xxx: sel1_w <= 1; + default: sel1_w <= 3; + endcase + + always @* casex ({cry_ff,fifo_ff_barrel_w}) + 5'b0000x: sel0_w <= 0; + 5'b001xx: sel0_w <= 1; + 5'b010xx: sel0_w <= 1; + 5'b100xx: sel0_w <= 1; + 5'b11xxx: sel0_w <= 2; + default: sel0_w <= 3; + endcase + + always @* casex ({cry_ff,fifo_ff_barrel_w}) + 5'b00001: cry_ff_w <= 1; + 5'b0011x: cry_ff_w <= 1; + 5'b0101x: cry_ff_w <= 1; + 5'b1001x: cry_ff_w <= 1; + 5'b111xx: cry_ff_w <= 1; + default: cry_ff_w <= 0; + endcase + + always @* case (sel3_w) + 1'b0 : bytes_rdy_w[3] <= fifo_nempty_barrel_w[3]; + 1'b1 : bytes_rdy_w[3] <= 1; + endcase + + always @* case (sel2_w) + 2'b00 : bytes_rdy_w[2] <= fifo_nempty_barrel_w[2]; + 2'b01 : bytes_rdy_w[2] <= fifo_nempty_barrel_w[3]; + 2'b11 : bytes_rdy_w[2] <= 1; + default : bytes_rdy_w[2] <= 'bx; + endcase + + always @* case (sel1_w) + 2'b00 : bytes_rdy_w[1] <= fifo_nempty_barrel_w[1]; + 2'b01 : bytes_rdy_w[1] <= fifo_nempty_barrel_w[2]; + 2'b11 : bytes_rdy_w[1] <= 1; + default : bytes_rdy_w[1] <= 'bx; + endcase + + always @* case (sel0_w) + 2'b00 : bytes_rdy_w[0] <= fifo_nempty_barrel_w[0]; + 2'b01 : bytes_rdy_w[0] <= fifo_nempty_barrel_w[1]; + 2'b10 : bytes_rdy_w[0] <= fifo_nempty_barrel_w[2]; + 2'b11 : bytes_rdy_w[0] <= 1; + endcase + + + always @* casex ({cry_ff,fifo_ff_barrel_w}) + 5'b0001x: num_zeros_w <= 1; + 5'b001xx: num_zeros_w <= 1; + 5'b010xx: num_zeros_w <= 1; + 5'b011xx: num_zeros_w <= 2; + 5'b100xx: num_zeros_w <= 1; + 5'b101xx: num_zeros_w <= 2; + 5'b110xx: num_zeros_w <= 2; + default: num_zeros_w <= 0; + endcase + + + always @* casex ({num_zeros_w,fifo_byte_pntr}) + 4'b00xx: fifo_re_mask_w <= 4'b1111; + 4'b0100: fifo_re_mask_w <= 4'b1110; + 4'b0101: fifo_re_mask_w <= 4'b0111; + 4'b0110: fifo_re_mask_w <= 4'b1011; + 4'b0111: fifo_re_mask_w <= 4'b1101; + 4'b1000: fifo_re_mask_w <= 4'b1100; + 4'b1001: fifo_re_mask_w <= 4'b0110; + 4'b1010: fifo_re_mask_w <= 4'b0011; + 4'b1011: fifo_re_mask_w <= 4'b1001; + default: fifo_re_mask_w <= 'bx; // impossible num_zeros_w + endcase + + assign fifo_re = flush_pend[1]? fifo_nempty : (rdy_w ? fifo_re_mask_w : 4'b0); // when flushing read whatever is left + + always @(posedge xclk) begin + if (rst) cry_ff <= 0; + else if (rdy_w) cry_ff <=cry_ff_w; + + if (rst) fifo_byte_pntr <= 0; + else if (rdy_w) fifo_byte_pntr <= fifo_byte_pntr - num_zeros_w; + + dv <= rdy_w || (flush_pend[1] && (cry_ff || (|fifo_nempty))); + if (rdy_w || (flush_pend[1] && (cry_ff || (|fifo_nempty)))) begin + case (sel3_w) + 1'b0 : d_out[31:24] <= fifo_out_barrel_w[31:24]; + 1'b1 : d_out[31:24] <= 8'b0; + endcase + case (sel2_w) + 2'b00 : d_out[23:16] <= fifo_out_barrel_w[23:16]; + 2'b01 : d_out[23:16] <= fifo_out_barrel_w[31:24]; + 2'b11 : d_out[23:16] <= 8'b0; + default : d_out[23:16] <= 'bx; + endcase + case (sel1_w) + 2'b00 : d_out[15: 8] <= fifo_out_barrel_w[15: 8]; + 2'b01 : d_out[15: 8] <= fifo_out_barrel_w[23:16]; + 2'b11 : d_out[15: 8] <= 8'b0; + default : d_out[15: 8] <= 'bx; + endcase + case (sel0_w) + 2'b00 : d_out[ 7: 0] <= fifo_out_barrel_w[ 7: 0]; + 2'b01 : d_out[ 7: 0] <= fifo_out_barrel_w[15: 8]; + 2'b01 : d_out[ 7: 0] <= fifo_out_barrel_w[23:16]; + 2'b11 : d_out[ 7: 0] <= 8'b0; + default : d_out[ 7: 0] <= 'bx; + endcase + end + + if (rst) flush_pend[0] <= 0; + else if (flush_in) flush_pend[0] <= 1; + else if (flush_pend[1]) flush_pend[0] <= 0; + + if (rst) flush_pend[1] <= 0; + else flush_pend[1] <= flush_pend[0] &&!flush_pend[1] && !rdy_w; + + flush_pend[2] <= flush_pend[1]; + + flush_out <= flush_pend[2]; + + if ( rdy_w || flush_pend[1]) casex(bytes_rdy_w[3:0]) + 4'b10xx : bytes_out <= 1; + 4'b110x : bytes_out <= 2; + 4'b1110 : bytes_out <= 3; + default : bytes_out <= 0; // all 4 bytes + endcase + end + +endmodule + -- 2.18.1