quantizer393.v 16.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
/*
** -----------------------------------------------------------------------------**
** quantizator353.v
**
** Quantizer module for JPEG compressor
**
** Copyright (C) 2002-2015 Elphel, Inc
**
** -----------------------------------------------------------------------------**
**  quantizer393.v is free software - hardware description language (HDL) code.
** 
**  This program is free software: you can redistribute it and/or modify
**  it under the terms of the GNU General Public License as published by
**  the Free Software Foundation, either version 3 of the License, or
**  (at your option) any later version.
**
**  This program is distributed in the hope that it will be useful,
**  but WITHOUT ANY WARRANTY; without even the implied warranty of
**  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
**  GNU General Public License for more details.
**
**  You should have received a copy of the GNU General Public License
**  along with this program.  If not, see <http://www.gnu.org/licenses/>.
** -----------------------------------------------------------------------------**
**
*/
27
`include "system_defines.vh" 
28 29 30 31 32 33 34
`timescale 1ns/1ps

// will add extracted DC (8 bits) to data from DCT here that will make data 12 bits (signed) long.
// It will be possible to make a sequintial multiplier for DC - but I'll skip this now.
module quantizer393(
    input             clk,           // pixel clock, posedge
    input             en,   // enable (0 resets counter)
35 36 37 38 39
    input             mclk, // system clock to write tables
    input             tser_qe, // enable write to a quantization table
    input             tser_ce, // enable write to a coring table
    input             tser_a_not_d,  // address/not data distributed to submodules
    input      [ 7:0] tser_d,        // byte-wide serialized tables address/data to submodules
40 41 42 43 44 45 46 47 48
    input             ctypei,   // component type input (Y/C)
    input      [ 8:0] dci,      // [7:0]   - average value in a block - subtracted before DCT. now normal signed number
    input             first_stb, //this is first stb pulse in a frame
    input             stb,      // strobe that writes ctypei, dci
    input      [ 2:0] tsi,   // table (quality) select [2:0]
    input             pre_start,// marks first input pixel (one before)
    input             first_in, // first block in (valid @ start)
    output reg        first_out, // valid @ ds
    input      [12:0] di,    // [11:0] pixel data in (signed)
49 50 51
    output reg [12:0] do,    // [11:0] pixel data out (AC is only 9 bits long?) - changed to 10
    output            dv,    // data out valid
    output            ds,  // data out strobe (one ahead of the start of dv)
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
    output reg [15:0] dc_tdo, //[15:0], MSB aligned coefficient for the DC component (used in focus module)
    input             dcc_en,  // enable dcc (sync to beginning of a new frame)
    input      [ 2:0] hfc_sel, // hight frequency components select [2:0] (includes components with both numbers >=hfc_sel
                               // hfc_sel == 3'h7 - now high frequency output - just 3 words - brightness and 2 color diffs
    input             color_first, // first MCU in a frame
    input      [ 2:0] coring_num, // coring table pair number (0..7)
    output reg        dcc_vld, // single cycle when dcc_data is valid
    output     [15:0] dcc_data,  // [15:0] dc component data out (for reading by software) 
    input      [ 7:0] n000,      // input [7:0] number of zero pixels (255 if 256) - to be multiplexed with dcc
    input      [ 7:0] n255);     // input [7:0] number of 0xff pixels (255 if 256) - to be multiplexed with dcc

    
    wire       [3:0] tdco; // coring table output
    reg        [3:0] tbac; // coring memory table number (LSB - color)
    reg              coring_range; // input <16, use coring LUT
    wire      [15:0] tdo;
    reg       [ 9:0] tba;   // table output (use) address   
    wire      [15:0] zigzag_q;
    reg              wpage;
    reg              rpage;
    wire      [ 5:0] zwa;
    reg       [ 5:0] zra;
    reg       [12:0] qdo;
    reg       [12:0] qdo0;
    reg              zwe;
    reg       [12:0] d1;
    reg       [12:0] d2,d3; // registered data in, converted to sign+ absolute value
    wire      [27:0] qmul;
    wire             start_a;
    reg       [15:0] tdor;
    reg       [20:0] qmulr; // added 7 bits to total8 fractional for biasing/zero bin
    wire             start_out;
    wire             start_z;
    reg       [ 8:0] dc1;   // registered DC average - with restored sign   

// for fifo for ctype, dc
    wire            ctype;
    wire     [ 8:0] dc;
90 91 92 93 94
//    wire            next_dv;
//    reg      [ 2:0] last_dv; // last dv cycle (will turn of unless new ds)
    reg      [ 2:0] ds_r;
    reg      [ 3:0] ren;
    
95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
    reg      [ 5:0] start;
    wire            dcc_stb;
    reg             dcc_run;
    reg             dcc_first;
    reg             dcc_Y;
    reg      [ 1:0] ctype_prev;
    reg      [12:0] dcc_acc;
    reg      [12:0] hfc_acc;
    wire            hfc_en;
    reg             hfc_copy; // copy hfc_acc to dcc_acc
    wire     [10:0] d2_dct;   // 11 bits enough, convetred to positive (before - 0 was in the middle - pixel value 128) - dcc only
    reg             sel_satnum; // select saturation numbers - dcc only
    reg      [15:0] pre_dc_tdo;
    wire            copy_dc_tdo;

    reg             first_interm; // valid @ ds

    wire     [ 2:0] ts;
    wire     [ 2:0] coring_sel;

    reg      [ 2:0] block_mem_ra;
    reg      [ 2:0] block_mem_wa;
    reg      [ 2:0] block_mem_wa_save;
118 119
    reg      [15:0] block_mem_ram[0:7];
    wire     [15:0] block_mem_o=block_mem_ram[block_mem_ra[2:0]];
120 121 122 123 124 125 126 127 128

    assign dc[8:0] =          block_mem_o[8:0];
    assign ctype =            block_mem_o[9];
    assign ts[2:0] =          block_mem_o[12:10];
    assign coring_sel[2:0] =  block_mem_o[15:13];

    assign start_a = start[5];
    assign start_z = start[4];
    assign dcc_stb = start[2];
129 130
    assign ds = ds_r[2];
    assign dv = ren[3];
131
    always @ (posedge clk) begin
132
        if (stb) block_mem_ram[block_mem_wa[2:0]] <= {coring_num[2:0],tsi[2:0], ctypei, dci[8:0]};
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147

        if      (!en) block_mem_wa[2:0] <= 3'h0;
        else if (stb) block_mem_wa[2:0] <= block_mem_wa[2:0] +1;

        if (stb && first_stb)  block_mem_wa_save[2:0] <= block_mem_wa[2:0];

        if      (!en)       block_mem_ra[2:0] <= 3'h0;
        else if (pre_start) block_mem_ra[2:0] <= first_in?block_mem_wa_save[2:0]:(block_mem_ra[2:0] +1);
    end
 
    assign        d2_dct[10:0]={!d2[11] ^ ctype_prev[0], d2[9:0]}; 

    assign        dcc_data[15:0]=sel_satnum?
                    {n255[7:0],n000[7:0]}:
                    {dcc_first || (!dcc_Y && dcc_acc[12]) ,(!dcc_Y && dcc_acc[12]), (!dcc_Y && dcc_acc[12]), dcc_acc[12:0]};
148
//    assign         do[12:0]=zigzag_q[12:0];
149 150 151 152 153
    assign        qmul[27:0]=tdor[15:0]*d3[11:0];

    assign         start_out =   zwe && (zwa[5:0]== 6'h3f);   //adjust?
    assign         copy_dc_tdo = zwe && (zwa[5:0]== 6'h37);   // not critical

154
//    assign next_dv=en && (ds || (dv && (zra[5:0]!=6'h00)));    
155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191
    always @ (posedge clk) begin
        d1[12:0]      <= di[12:0];
//inv_sign
        dc1[8:0] <= start[0]?dc[8:0]:9'b0;   // sync to d1[8:0]ctype valid at start, not later
        d2[12:0] <= {dc1[8],dc1[8:0],3'b0} + d1[12:0];
        d3[12]   <= d2[12];
        d3[11:0] <= d2[12]? -d2[11:0]:d2[11:0];

        if (start[0] || !en) tba[9:6] <= {ts[2:0],ctype};
      
/// TODO - make sure ctype switches at needed time (compensate if needed) *****************************************
        if (start[3] || !en) tbac[3:0] <= {coring_sel[2:0],ctype}; // table number to use

        if      (start[0])        tba[5:0] <= 6'b0;
        else if (tba[5:0]!=6'h3f) tba[5:0] <= tba[5:0]+1;
        
        tdor[15:0]  <= tdo[15:0]; // registered table data out
        
        if (start[3])  pre_dc_tdo[15:0] <= tdor[15:0]; //16-bit q. tables)
        
        if (copy_dc_tdo) dc_tdo[15:0]     <= pre_dc_tdo[15:0];
        
        qmulr[19:0] <= qmul[27:8]; // absolute value
        qmulr[20]   <= d3[12];     // sign
        qdo0[12]    <= qmulr[20];  // sign
      
// tdco[3:0] - same timing as qdo0;      
// use lookup table from 8 bits of absolute value (4.4 - 4 fractional) to calculate 4 bit coring output that would replace output
// if input is less thahn 16. For larger values the true rounding will be used.

// Absolute values here have quantization coefficients already applied, so we can use the same coring table for all DCT coefficients.
// there are be 16 tables - 8 Y/C pairs to switch
        qdo0[11:0]  <= qmulr[19:8] + qmulr[7]; // true rounding of the absolute value 
        coring_range<= !(|qmulr[19:12]) && !(&qmulr[11:7]) ; // valid with qdo0
        qdo[11:0]   <= coring_range? (qdo0[12]?-{8'h0,tdco[3:0]}:{8'h0,tdco[3:0]}):(qdo0[12]?-qdo0[11:0]:qdo0[11:0]);
        qdo[12]     <= qdo0[12] && (!coring_range || (tdco[3:0]!=4'h0)); 

192 193 194 195 196 197 198
        if (start_out)    rpage <= wpage;

//        last_dv <= {last_dv[1:0], en && (zra[5:0] == 6'h3f)};
        
        if (start_out)    zra[5:0] <= 6'b0;
//        else if (zra[5:0]!=6'h3f)   zra[5:0] <= zra[5:0]+1; // conserving energy
        else if (ren[0])  zra[5:0] <= zra[5:0]+1; // conserving energy
199
        
200 201 202 203 204 205 206 207 208 209
        ds_r    <= {ds_r[1:0], en && start_out};
        
        if      (!en)                 ren[0] <= 0;
        else if (start_out)           ren[0] <= 1;
        else if ((zra[5:0] == 6'h3f)) ren[0] <= 0;
        
        if      (!en)                 ren[3:1] <= 0;
        else                          ren[3:1] <= ren [2:0]; 
        
        if (ren[2])                   do[12:0] <= zigzag_q[12:0];
210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245
        
        if (start_a)   first_interm <= first_in;
        if (start_out) first_out    <=first_interm;
// zwe???
        zwe <= en && (start_a || (zwe && (zwa[5:0]!=6'h3f)));
        if          (!en) wpage <= 1'b0;
        else if (start_a) wpage <= ~wpage;
    end


    always @ (posedge clk) begin
        sel_satnum <= dcc_run && (start[0]? (ctype_prev[1:0]==2'b10): sel_satnum);
        
        hfc_copy <= dcc_run && (hfc_sel[2:0]!=3'h7) && (tba[5:0]==6'h1f) && ctype_prev[0] && ctype_prev[1];
        
        start[5:0] <= {start[4:0], pre_start}; // needed?
        
        if    (!dcc_en) dcc_run <= 1'b0;
        else if (start[0]) dcc_run <= 1'b1;
        
        if (!dcc_en)    ctype_prev[1:0] <= 2'b11;
        else if (start[0]) ctype_prev[1:0] <= {ctype_prev[0],ctype && dcc_run}; 
        
        if (dcc_stb || hfc_copy) dcc_acc[12:0] <= hfc_copy?
                                                hfc_acc[12:0]:
                                               {(d2_dct[10]&&ctype_prev[0]),(d2_dct[10]&&ctype_prev[0]),d2_dct[10:0]}+((ctype_prev[0] || ctype_prev[1])?13'h0:dcc_acc[12:0]);
                                               
        if (!dcc_run || hfc_copy) hfc_acc <=13'b0;
        else if (hfc_en) hfc_acc <= hfc_acc + {2'b0, d3[10:0]};
        
        if (dcc_stb) dcc_first <= color_first && dcc_run && dcc_stb && ctype && !ctype_prev[0];
        
        if (dcc_stb) dcc_Y <= dcc_run && dcc_stb && ctype && !ctype_prev[0];
        
        dcc_vld <= (dcc_run && dcc_stb && (ctype || ctype_prev[0] || sel_satnum)) || hfc_copy;
    end
246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264
    
    wire          twqe;
    wire          twce;
    wire  [15:0]  tdi;
    wire  [22:0]  ta;
    

    table_ad_receive #( // here may be changed to 8-bit from 16-bit
        .MODE_16_BITS (1),
        .NUM_CHN      (2)
    ) table_ad_receive_i (
        .clk       (mclk),              // input
        .a_not_d   (tser_a_not_d),      // input
        .ser_d     (tser_d),            // input[7:0] 
        .dv        ({tser_ce,tser_qe}), // input[1:0] 
        .ta        (ta), // output[22:0] 
        .td        (tdi), // output[15:0] 
        .twe       ({twce,twqe}) // output[1:0] 
    );
265 266 267 268 269 270 271


//    SRL16 i_hfc_en (.Q(hfc_en), .A0(1'b1), .A1(1'b0), .A2(1'b0), .A3(1'b0), .CLK(clk),
//                    .D(((tba[2:0]>hfc_sel[2:0]) || (tba[5:3]>hfc_sel[2:0])) && dcc_run && !ctype_prev[0])); // dly=1+1
    dly_16 #(.WIDTH(1)) i_hfc_en (
        .clk(clk),
        .rst(1'b0),
272
        .dly(4'd1),
273 274 275 276 277 278 279 280 281 282 283 284 285
        .din(((tba[2:0]>hfc_sel[2:0]) || (tba[5:3]>hfc_sel[2:0])) && dcc_run && !ctype_prev[0]),
        .dout(hfc_en));   // dly=1+1

    zigzag393 i_zigzag(   .clk(clk),
                     .start(start_z),
                      .q(zwa[5:0]));

    // All memories below are non-registered, see if they can be made registered
    ram18_var_w_var_r #(
        .REGISTERS    (0),
        .LOG2WIDTH_WR (4),
        .LOG2WIDTH_RD (4),
        .DUMMY        (0)
Andrey Filippov's avatar
Andrey Filippov committed
286 287 288
`ifdef PRELOAD_BRAMS
    `include "includes/quantization_100.dat.vh"
`endif
289 290 291 292
    ) i_quant_table (
        .rclk         (clk),                          // input
        .raddr        ({tba[9:6],tba[2:0],tba[5:3]}), // input[8:0] 
        .ren          (1'b1),                         // input
293
        .regen        (1'b1),                         // input
294
        .data_out     (tdo[15:0]),                    // output[15:0] 
295 296 297
        .wclk         (mclk),                         // input
        .waddr        (ta[9:0]),                      // input[8:0] 
        .we           (twqe),                         // input
298 299 300 301 302 303 304 305 306
        .web          (4'hf),                         // input[3:0] 
        .data_in      (tdi[15:0])                     // input[15:0] 
    );

    ram18_var_w_var_r #(
        .REGISTERS    (0),
        .LOG2WIDTH_WR (4),
        .LOG2WIDTH_RD (2),
        .DUMMY        (0)
Andrey Filippov's avatar
Andrey Filippov committed
307 308 309
`ifdef PRELOAD_BRAMS
    `include "includes/coring.dat.vh"
`endif
310 311 312
    ) i_coring_table (
        .rclk         (clk), // input
        .raddr        ({tbac[3:0],qmulr[11:4]}), // input[10:0] 
313 314 315 316 317 318 319 320
        .ren          (1'b1),                    // input
        .regen        (1'b1),                    // input
        .data_out     (tdco[3:0]),               // output[3:0] 
        .wclk         (mclk),                    // input
        .waddr        (ta[9:0]),                 // input[9:0] 
        .we           (twce),                    // input
        .web          (4'hf),                    // input[3:0] 
        .data_in      (tdi[15:0])                // input[15:0] 
321 322 323
    );

    ram18_var_w_var_r #(
324
        .REGISTERS    (1),
325 326 327 328
        .LOG2WIDTH_WR (4),
        .LOG2WIDTH_RD (4),
        .DUMMY        (0)
    ) i_zigzagbuf (
329 330
        .rclk         (clk),                     // input
        .raddr        ({3'b0,rpage,zra[5:0]}),   // input[9:0] 
331 332
        .ren          (ren[0]),                  // input
        .regen        (ren[1]),                  // input
333 334 335 336 337 338
        .data_out     (zigzag_q[15:0]),          // output[15:0] 
        .wclk         (clk),                     // input
        .waddr        ({3'b0,wpage,zwa[5:0]}),   // input[9:0] 
        .we           (zwe),                     // input
        .web          (4'hf),                    // input[3:0] 
        .data_in      ({3'b0,qdo[12:0]})         // input[15:0] 
339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371
    );


endmodule

module zigzag393 (
    input            clk,           // system clock, posedge
    input            start,
    output reg [5:0] q);

    reg        [5:0] a;
    wire      [ 4:0] rom_a;
    reg       [ 5:0] rom_q;

    assign   rom_a[4:0]=a[5]?(~a[4:0]):a[4:0];

    always @ (posedge clk) begin
        if (start)   a[5:0] <= 6'b0;
        else   if (a[5:0]!=6'h3f) a[5:0] <= a[5:0]+1;
    end

    // ROM (combinatorial)
    always @(rom_a) case (rom_a)
        5'h00: rom_q <= 6'h00;
        5'h01: rom_q <= 6'h02;
        5'h02: rom_q <= 6'h03;
        5'h03: rom_q <= 6'h09;
        5'h04: rom_q <= 6'h0a;
        5'h05: rom_q <= 6'h14;
        5'h06: rom_q <= 6'h15;
        5'h07: rom_q <= 6'h23;
        5'h08: rom_q <= 6'h01;
        5'h09: rom_q <= 6'h04;
372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393
        5'h0a: rom_q <= 6'h08;
        5'h0b: rom_q <= 6'h0b;
        5'h0c: rom_q <= 6'h13;
        5'h0d: rom_q <= 6'h16;
        5'h0e: rom_q <= 6'h22;
        5'h0f: rom_q <= 6'h24;
        5'h10: rom_q <= 6'h05;
        5'h11: rom_q <= 6'h07;
        5'h12: rom_q <= 6'h0c;
        5'h13: rom_q <= 6'h12;
        5'h14: rom_q <= 6'h17;
        5'h15: rom_q <= 6'h21;
        5'h16: rom_q <= 6'h25;
        5'h17: rom_q <= 6'h30;
        5'h18: rom_q <= 6'h06;
        5'h19: rom_q <= 6'h0d;
        5'h1a: rom_q <= 6'h11;
        5'h1b: rom_q <= 6'h18;
        5'h1c: rom_q <= 6'h20;
        5'h1d: rom_q <= 6'h26;
        5'h1e: rom_q <= 6'h2f;
        5'h1f: rom_q <= 6'h31;
394 395 396 397
    endcase
    // add symmetrical part
    always @ (posedge clk) q[5:0]   <= a[5]? (~rom_q[5:0]):rom_q[5:0];
endmodule