Commit 43f67702 authored by Andrey Filippov's avatar Andrey Filippov

ported all other submodules for the JPEG/JP4 compressor

parent b040c02d
/*******************************************************************************
* Module: dcc_sync393
* Date:2015-06-17
* Author: andrey
* Description: Synchronises output of DC components
* Syncronizes dcc data with dma1 output, adds 16..31 16-bit zero words for Axis DMA
* Was not used in late NC353 camera (DMA channel used fro IMU logger)
*
* Copyright (c) 2015 <set up in Preferences-Verilog/VHDL Editor-Templates> .
* dcc_sync393.v is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* dcc_sync393.v is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/> .
*******************************************************************************/
`timescale 1ns/1ps
module dcc_sync393(
input sclk, // system clock: twe, ta,tdi - valid @negedge (ra, tdi - 2 cycles ahead)
input dcc_en, // clk rising, sync with start of the frame
input finish_dcc, // sclk rising
input dcc_vld, // clk rising
input [15:0] dcc_data, //[15:0] clk risimg
output reg statistics_dv, //sclk
output reg [15:0] statistics_do); //[15:0] sclk
reg statistics_we;
reg dcc_run;
reg dcc_finishing;
reg skip16; // output just 16 zero words (data was multiple of 16 words)
reg [ 4:0] dcc_cntr;
always @ (posedge sclk) begin
dcc_run <= dcc_en;
statistics_we <= dcc_run && dcc_vld && !statistics_we;
statistics_do[15:0] <= statistics_we?dcc_data[15:0]:16'h0;
statistics_dv <= statistics_we || dcc_finishing;
skip16 <= finish_dcc && (statistics_dv?(dcc_cntr[3:0]==4'hf):(dcc_cntr[3:0]==4'h0) );
if (!dcc_run) dcc_cntr[3:0] <= 4'h0;
else if (statistics_dv) dcc_cntr[3:0] <= dcc_cntr[3:0]+1;
dcc_cntr[4] <= dcc_run && ((dcc_finishing && ((dcc_cntr[3:0]==4'hf)^dcc_cntr[4]) || skip16));
dcc_finishing <= dcc_run && (finish_dcc || (dcc_finishing && (dcc_cntr[4:1]!=4'hf)));
end
endmodule
/*
** -----------------------------------------------------------------------------**
** encoderDCAC393.v
**
** RLL encoder for JPEG compressor
**
** Copyright (C) 2002-2015 Elphel, Inc
**
** -----------------------------------------------------------------------------**
** encoderDCAC393.v is free software - hardware description language (HDL) code.
** This program is free software: you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation, either version 3 of the License, or
** (at your option) any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program. If not, see <http://www.gnu.org/licenses/>.
** -----------------------------------------------------------------------------**
**
*/
// Accepts 13-bits signed data (only 12-bit can be ecoded), so DC difference (to be encoded) is limited (saturated) to 12 bits, not the value itself
// AC - always limited to 800 .. 7ff
module encoderDCAC393(
input clk, // pixel clock, posedge
input en, // enable (0 resets)
input lasti, // was "last MCU in a frame" (@ stb)
input first_blocki, // first block in frame - save fifo write address (@ stb)
input [ 2:0] comp_numberi, // [2:0] component number 0..2 in color, 0..3 - in jp4diff, >= 4 - don't use (@ stb)
input comp_firsti, // first this component in a frame (reset DC) (@ stb)
input comp_colori, // use color - huffman? (@ stb)
input comp_lastinmbi, // last component in a macroblock (@ stb) is it needed?
input stb, // strobe that writes firsti, lasti, tni,average
input [12:0] zdi, // [11:0] zigzag-reordered data input
input first_blockz, // first block input (@zds)
input zds, // strobe - one ahead of the DC component output
output reg last, //
output reg [15:0] do,
output reg dv);
// 8x13 DC storage memory
reg [12:0] dc_mem[7:0];
reg [12:0] dc_diff0, dc_diff;
wire [11:0] dc_diff_limited= (dc_diff[12]==dc_diff[11])?
dc_diff[11:0] :
{~dc_diff[11],{11{dc_diff[11]}}}; // difference (to be encoded) limited to fit 12 bits
reg [12:0] dc_restored; // corrected DC value of the current block, compensated to fit difference to 12 bits
reg [ 5:0] rll_cntr;
reg [5:0] cntr;
reg [11:0] ac_in;
wire izero=(ac_in[11:0]==12'b0);
reg [14:0] val_r; // DC diff/AC values to be sent out, registered
reg DCACen; // enable DC/AC (2 cycles ahead of do
wire rll_out;
wire pre_dv;
reg was_nonzero_AC;
reg [12:0] zdi_d;
reg [3:0] zds_d;
wire DC_tosend= zds_d[2];
wire pre_DCACen= zds_d[1];
wire [2:0] comp_numbero; // [2:0] component number 0..2 in color, 0..3 - in jp4diff, >= 4 - don't use
wire comp_firsto; // first this component in a frame (reset DC)
wire comp_coloro; // use color - huffman?
wire comp_lastinmbo; // last component in a macroblock
wire lasto; // last macroblock in a frame
reg [2:0] block_mem_ra;
reg [2:0] block_mem_wa;
reg [2:0] block_mem_wa_save;
reg [6:0] block_mem[0:7];
wire [6:0] block_mem_o=block_mem[block_mem_ra[2:0]];
assign comp_numbero[2:0]= block_mem_o[2:0];
assign comp_firsto= block_mem_o[3];
assign comp_coloro= block_mem_o[4];
assign comp_lastinmbo= block_mem_o[5];
assign lasto= block_mem_o[6];
always @ (posedge clk) begin
if (stb) block_mem[block_mem_wa[2:0]] <= {lasti, comp_lastinmbi, comp_colori,comp_firsti,comp_numberi[2:0]};
if (!en) block_mem_wa[2:0] <= 3'h0;
else if (stb) block_mem_wa[2:0] <= block_mem_wa[2:0] +1;
if (stb && first_blocki) block_mem_wa_save[2:0] <= block_mem_wa[2:0];
if (!en) block_mem_ra[2:0] <= 3'h0;
else if (zds) block_mem_ra[2:0] <= first_blockz?block_mem_wa_save[2:0]:(block_mem_ra[2:0] +1);
end
assign rll_out= ((val_r[12] && !val_r[14]) || (ac_in[11:0]!=12'b0)) && (rll_cntr[5:0]!=6'b0);
assign pre_dv=rll_out || val_r[14] || was_nonzero_AC;
always @ (posedge clk) begin
val_r[14:0] <={ DC_tosend?
{en,
comp_coloro,
comp_lastinmbo && lasto, // last component's in a frame DC coefficient
dc_diff_limited[11:0]}:
{2'b0,
(cntr[5:0]==6'h3f),
ac_in[11:0]}};
was_nonzero_AC <= en && (ac_in[11:0]!=12'b0) && DCACen;
if (pre_dv) do <= rll_out? {3'b0,val_r[12],6'b0,rll_cntr[5:0]}:{1'b1,val_r[14:0]};
dv <= pre_dv;
DCACen <= en && (pre_DCACen || (DCACen && (cntr[5:0]!=6'h3f))); // adjust
if (!DCACen) cntr[5:0] <=6'b0;
else cntr[5:0] <=cntr[5:0]+1;
end
always @ (posedge clk) begin
zdi_d[12:0] <= zdi[12:0];
ac_in[11:0] <= (zdi_d[12]==zdi_d[11])? zdi_d[11:0]:{~zdi_d[11],{11{zdi_d[11]}}}; // always // delay + saturation
if (DC_tosend || !izero || !DCACen) rll_cntr[5:0] <= 6'h0;
else if (DCACen) rll_cntr[5:0] <= rll_cntr[5:0] +1 ;
if (DC_tosend) last <= lasto;
end
// DC components
always @ (posedge clk) begin
zds_d[3:0] <= {zds_d[2:0], zds};
if (zds_d[0]) dc_diff0[12:0] <= comp_firsto?13'b0:dc_mem[comp_numbero[2:0]];
if (zds_d[1]) dc_diff [12:0] <= zdi_d[12:0]-dc_diff0[12:0];
if (zds_d[2]) dc_restored[12:0] <= dc_diff0[12:0] + {dc_diff_limited[11],dc_diff_limited[11:0]};
if (zds_d[3]) dc_mem[comp_numbero[2:0]] <= dc_restored[12:0];
end
// Generate output stream to facilitate huffman encoding. The data will go to FIFO (16x) to compensate for possible long Huffman codes
// and/or zero-byte insertions
// format:
// {2'b11, color,last block, dc[11:0]} - DC data
// {2'b10, 1'b0, last coeff, ac[11:0]} - AC data (last coeff is set if it is last- 63-rd AC coefficient)
// {2'h00, 2'b00, 6'b0,rll[ 5:0]} - RLL zeroes.
// {2'h00, 2'b01, 6'b0,rll[ 5:0]} - end of block. lower 6 bits will have length that should be ignored
endmodule
/*
** -----------------------------------------------------------------------------**
** focus_sharp393.v
**
** Module to determine focus sharpness on by integrating
** DCT coefficient, multiplied my 8x8 array and squared
**
** Copyright (C) 2008-2015 Elphel, Inc
**
** -----------------------------------------------------------------------------**
** ocus_sharp393.v is free software - hardware description language (HDL) code.
**
** This program is free software: you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation, either version 3 of the License, or
** (at your option) any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program. If not, see <http://www.gnu.org/licenses/>.
** -----------------------------------------------------------------------------**
**
*/
`timescale 1ns/1ps
//TODO: Modify to work with other modes (now only on color)
module focus_sharp393(
input clk, // pixel clock, posedge
input en, // enable (0 resets)
input sclk, // system clock: twe, ta,tdi - valid @negedge (ra, tdi - 2 cycles ahead)
input twe, // enable write to a table
input [ 9:0] ta, // [9:0] table address
input [15:0] tdi, // [15:0] table data in (8 LSBs - quantization data)
input [ 1:0] mode, // focus mode (combine image with focus info) - 0 - none, 1 - replace, 2 - combine all, 3 - combine woi
input firsti, // first macroblock
input lasti, // last macroblock
input [ 2:0] tni, // block number in a macronblock - 0..3 - Y, >=4 - color (sync to stb)
input stb, // strobe that writes ctypei, dci
input start, // marks first input pixel (needs 1 cycle delay from previous DCT stage)
input [12:0] di, // [11:0] pixel data in (signed)
input quant_ds, // quantizator ds
input [12:0] quant_d, // [11:0]quantizator data output
input [15:0] quant_dc_tdo, // [15:0], MSB aligned coefficient for the DC component (used in focus module)
output reg [12:0] do, // [11:0] pixel data out, make timing ignore (valid 1.5 clk earlier that Quantizer output)
output reg ds, // data out strobe (one ahead of the start of dv)
output reg [31:0] hifreq); //[31:0]) // accumulated high frequency components in a frame sub-window
wire [15:0] tdo;
reg [ 5:0] tba;
reg [11:0] wnd_reg; // intermediate register
reg wnd_wr; // writing window
reg [ 2:0] wnd_a; // window register address
// next measured in 8x8 blocks, totalwidth - write one less than needed (i.e. 511 fro the 512-wide window)
// blocks on the border are included
reg [ 8:0] wnd_left;
reg [ 8:0] wnd_right;
reg [ 8:0] wnd_top;
reg [ 8:0] wnd_bottom;
reg [ 8:1] wnd_totalwidth;
reg [ 3:0] filt_sel0; // select filter number, 0..14 (15 used for window parameters)
reg [ 3:0] filt_sel; // select filter number, 0..14 (15 used for window parameters)
reg stren; // strength (visualization)
reg [ 2:0] ic;
reg [ 2:0] oc;
wire first,last; //valid at start (with first di word), switches immediately after
wire [ 2:0] tn;
reg [39:0] acc_frame;
reg [12:0] pre_do;
reg pre_ds;
reg need_corr_max; // limit output by quant_dc_tdo
reg [11:0] fdo; // focus data output
reg start_d; //start delayed by 1
reg [ 2:0] tn_d; //tn delayed by 1
wire out_mono;
wire out_window;
wire [12:0] combined_qf;
wire [12:0] next_do;
wire [12:0] fdo_minus_max;
reg [11:0] di_d;
reg [11:0] d1;
reg [8:0] start2;
reg [7:0] finish2;
reg [5:0] use_k_dly;
reg [23:0] acc_blk; // accumulator for the sum ((a[i]*d[i])^2)
reg [22:0] sum_blk; // accumulator for the sum ((a[i]*d[i])^2), copied at block end
reg acc_ldval; // value to load to acc_blk: 0 - 24'h0, 1 - 24'h7fffff
wire acc_clear=start2[8];
wire acc_add=use_k_dly[4];
wire acc_corr=use_k_dly[5];
wire acc_to_out=finish2[6];
wire [17:0] mult_a;
wire [17:0] mult_b;
wire [35:0] mult_p;
reg [17:0] mult_s; //truncated and saturated (always positive) multiplier result (before calculating squared)
reg next_ac; // next will be AC component
reg use_coef; // use multiplier for the first operation - DCT coeff. by table elements
reg started_luma;// started Luma block
reg luma_dc_out; // 1 cycle ahead of the luma DC component out (optionally combined with the WOI (mode=3))
reg luma_dc_acc; // 1 cycle ahead of the luma DC component out (always combined with the WOI)
reg was_last_luma;
reg copy_acc_frame;
assign fdo_minus_max[12:0]= {1'b0,fdo[11:0]}-{1'b0,quant_dc_tdo[15:5]};
assign combined_qf[12:0]=stren?({quant_d[12:0]}+{1'b0,fdo[11:0]}): //original image plus positive
({quant_d[12],quant_d[12:1]}+ // half original
{fdo_minus_max[12],fdo_minus_max[12:1]}); // plus half signed
assign next_do[12:0] = (mode[1:0]==2'h1)?(luma_dc_out?fdo_minus_max[12:0]:13'h0):
((mode[1] && luma_dc_out )? combined_qf[12:0]: {quant_d[12:0]} );
always @ (posedge clk) begin
if (!en) ic[2:0] <= 3'b0;
else if (stb) ic[2:0] <= ic[2:0]+1;
if (!en) oc[2:0] <= 3'b0;
else if (start) oc[2:0] <= oc[2:0]+1;
end
// writing window parameters in the last bank of a table
always @ (negedge sclk) begin
if (twe) begin
wnd_reg[11:0] <= tdi[11:0] ;
wnd_a <= ta[2:0];
end
wnd_wr <= twe && (ta[9:3]==7'h78) ; // first 8 location in the last 64-word bank
if (wnd_wr) begin
case (wnd_a[2:0])
3'h0: wnd_left[8:0] <= wnd_reg[11:3] ;
3'h1: wnd_right[8:0] <= wnd_reg[11:3] ;
3'h2: wnd_top[8:0] <= wnd_reg[11:3] ;
3'h3: wnd_bottom[8:0] <= wnd_reg[11:3] ;
3'h4: wnd_totalwidth[8:1] <= wnd_reg[11:4] ;
3'h5: filt_sel0[3:0] <= wnd_reg[3:0] ;
3'h6: stren <= wnd_reg[0] ;
endcase
end
end
// determine if this block needs to be processed (Y, inside WOI)
reg [ 7:0] mblk_hor; //horizontal macroblock (2x2 blocks) counter
reg [ 7:0] mblk_vert; //vertical macroblock (2x2 blocks) counter
wire start_of_line= (first || (mblk_hor[7:0] == wnd_totalwidth[8:1]));
wire first_in_macro= (tn[2:0]==3'h0);
reg in_woi; // maybe specified as slow
always @(posedge clk) begin
if (first_in_macro && start) mblk_hor[7:0] <= start_of_line? 8'h0:(mblk_hor[7:0]+1);
if (first_in_macro && start && start_of_line) mblk_vert[7:0] <= first? 8'h0:(mblk_vert[7:0]+1);
start_d <= start;
tn_d[2:0] <= tn[2:0];
if (start_d) in_woi <= !tn_d[2] &&
({mblk_hor [7:0],tn_d[0]} >= wnd_left[8:0]) &&
({mblk_hor [7:0],tn_d[0]} <= wnd_right[8:0]) &&
({mblk_vert[7:0],tn_d[1]} >= wnd_top[8:0]) &&
({mblk_vert[7:0],tn_d[1]} <= wnd_bottom[8:0]);
end
//Will use posedge sclk to balance huffman and system
// wire clkdiv2;
// FD i_clkdiv2(.C(clk), .D(!clkdiv2), .Q(clkdiv2));
reg clkdiv2=0;
always @ (posedge clk) begin
clkdiv2 <= ~clkdiv2;
end
reg [2:0] clksync;
wire csync=clksync[2];
always @ (posedge sclk) begin
clksync[2:0] <= {(clksync[1]==clksync[0]),clksync[0],clkdiv2};
end
always @ (posedge clk) begin
if (di[11]==di[12]) di_d[11:0] <=di[11:0];
else di_d[11:0] <= {~di[11],{11{di[11]}}}; //saturate
end
assign mult_a[17:0] = use_coef ? {1'b0,tdo[15:0],1'b0}: mult_s[17:0];
assign mult_b[17:0] = use_coef ? {d1[10:0],{7{d1[0]}}}: mult_s[17:0];
always @ (posedge sclk) begin
filt_sel[3:0] <= filt_sel0[3:0];
if (clksync[2]) d1[11:0]<=di_d[11:0];
start2[8:0] <= {start2[7:0], start && csync};
finish2[7:0]<= {finish2[6:0],use_coef && !next_ac};
if (!en || start2[0]) tba[5:0] <= 6'h0;
else if (!csync && (tba[5:0] != 6'h3f)) tba[5:0] <= tba[5:0] + 1;
mult_s[17:0] <= (&mult_p[35:31] || !(&mult_p[35:31]))?mult_p[31:14]:18'h1ffff;
next_ac <= en && (start2[3] || (next_ac && ((tba[5:0] != 6'h3f) || csync )));
use_coef <= next_ac && !csync;
use_k_dly[5:0] <= {use_k_dly[4:0],use_coef};
acc_ldval <= !(|start2[7:6]);
if (acc_clear || (acc_corr && acc_blk[23])) acc_blk[23:0] <= {1'b0,{23{acc_ldval}}};
else if (acc_add) acc_blk[23:0] <= acc_blk[23:0] + mult_p[31:8]; // mult_p[35:8];
if (acc_to_out) fdo[11:0] <= (|acc_blk[23:20])?12'hfff:acc_blk[19:8]; // positive, 0..0xfff
if (acc_to_out) sum_blk[22:0] <= acc_blk[22:0]; // accumulator for the sum ((a[i]*d[i])^2), copied at block end
end
// acc_blk will (after corr) be always with MSB=0 - max 24'h7fffff
// for image output - max 24'h0fffff->12 bit signed, shifted
// combining output
//assign combined_qf[12:0]={quant_d[11],quant_d[11:0]}+{fdo[11],fdo[11:0]};
// SRL16 i_out_mono (.Q(out_mono), .A0(1'b1), .A1(1'b1), .A2(1'b1), .A3(1'b1), .CLK(clk), .D(started_luma)); // timing not critical
// SRL16 i_out_window (.Q(out_window), .A0(1'b1), .A1(1'b1), .A2(1'b1), .A3(1'b1), .CLK(clk), .D(in_woi)); // timing not critical
dly_16 #(.WIDTH(1)) i_out_mono(.clk(clk), .rst(1'b0), .dly(15), .din(started_luma), .dout(out_mono)); // timing not critical
dly_16 #(.WIDTH(1)) i_out_window(.clk(clk),.rst(1'b0), .dly(15), .din(in_woi), .dout(out_window)); // timing not critical
always @ (posedge clk) begin
if (start) started_luma <= !tn[2];
luma_dc_out <= quant_ds && out_mono && ((mode[1:0]!=3) || out_window);
luma_dc_acc <= quant_ds && out_mono && out_window;
was_last_luma <= en && last && out_mono;
copy_acc_frame <= was_last_luma && !out_mono;
if (first && first_in_macro) acc_frame[39:0] <= 40'h0;
else if (luma_dc_acc) acc_frame[39:0] <= acc_frame[39:0] + sum_blk[22:0];
if (copy_acc_frame) hifreq[31:0] <= acc_frame[39:8];
pre_ds <= quant_ds;
ds <= pre_ds;
pre_do[12:0] <= next_do[12:0];
need_corr_max <=luma_dc_out && (mode[1:0]!=2'h0);
do[12:0] <= (need_corr_max && !pre_do[12] && (pre_do[11] || (pre_do[10:0]>quant_dc_tdo[15:5])) )?
{2'b0,quant_dc_tdo[15:5]} :
pre_do[12:0];
end
/*
MULT18X18SIO #(
.AREG(1), // Enable the input registers on the A port (1=on, 0=off)
.BREG(1), // Enable the input registers on the B port (1=on, 0=off)
.B_INPUT("DIRECT"), // B cascade input "DIRECT" or "CASCADE"
.PREG(1) // Enable the input registers on the P port (1=on, 0=off)
) i_focus_mult (
.BCOUT(), // 18-bit cascade output
.P(mult_p), // 36-bit multiplier output
.A(mult_a), // 18-bit multiplier input
.B(mult_b), // 18-bit multiplier input
.BCIN(18'h0), // 18-bit cascade input
.CEA(en), // Clock enable input for the A port
.CEB(en), // Clock enable input for the B port
.CEP(en), // Clock enable input for the P port
.CLK(sclk), // Clock input
.RSTA(1'b0), // Synchronous reset input for the A port
.RSTB(1'b0), // Synchronous reset input for the B port
.RSTP(1'b0) // Synchronous reset input for the P port
);
*/
reg [35:0] mult_p_r;
reg [17:0] mult_a_r;
reg [17:0] mult_b_r;
assign mult_p = mult_p_r;
always @(posedge sclk) begin
mult_a_r <= mult_a;
mult_b_r <= mult_b;
mult_p_r <= mult_a_r * mult_b_r;
end
/*
RAM16X1D i_tn0 (.D(tni[0]),.DPO(tn[0]),.A0(ic[0]),.A1(ic[1]),.A2(1'b0),.A3(1'b0),.DPRA0(oc[0]),.DPRA1(oc[1]),.DPRA2(1'b0),.DPRA3(1'b0),.WCLK(clk),.WE(stb));
RAM16X1D i_tn1 (.D(tni[1]),.DPO(tn[1]),.A0(ic[0]),.A1(ic[1]),.A2(1'b0),.A3(1'b0),.DPRA0(oc[0]),.DPRA1(oc[1]),.DPRA2(1'b0),.DPRA3(1'b0),.WCLK(clk),.WE(stb));
RAM16X1D i_tn2 (.D(tni[2]),.DPO(tn[2]),.A0(ic[0]),.A1(ic[1]),.A2(1'b0),.A3(1'b0),.DPRA0(oc[0]),.DPRA1(oc[1]),.DPRA2(1'b0),.DPRA3(1'b0),.WCLK(clk),.WE(stb));
RAM16X1D i_first (.D(firsti),.DPO(first),.A0(ic[0]),.A1(ic[1]),.A2(1'b0),.A3(1'b0),.DPRA0(oc[0]),.DPRA1(oc[1]),.DPRA2(1'b0),.DPRA3(1'b0),.WCLK(clk),.WE(stb));
RAM16X1D i_last (.D(lasti), .DPO(last), .A0(ic[0]),.A1(ic[1]),.A2(1'b0),.A3(1'b0),.DPRA0(oc[0]),.DPRA1(oc[1]),.DPRA2(1'b0),.DPRA3(1'b0),.WCLK(clk),.WE(stb));
*/
reg [ 4:0] ram4[0:3];
always @ (posedge clk) begin
ram4[ic[1:0]] <= {lasti,firsti,tni[2:0]};
end
assign {last,first,tn[2:0]} = ram4[oc[1:0]];
// is it correct posedge sclk on rd, negedge on wr and no xclk?
/*
RAMB16_S18_S18 i_focus_dct_tab (
.DOA(tdo[15:0]), // Port A 16-bit Data Output
.DOPA(), // Port A 2-bit Parity Output
.ADDRA({filt_sel[3:0],tba[2:0],tba[5:3]}), // Port A 10-bit Address Input
.CLKA(sclk), // Port A Clock
.DIA(16'b0), // Port A 16-bit Data Input
.DIPA(2'b0), // Port A 2-bit parity Input
.ENA(1'b1), // Port A RAM Enable Input
.SSRA(1'b0), // Port A Synchronous Set/Reset Input
.WEA(1'b0), // Port A Write Enable Input
.DOB(), // Port B 16-bit Data Output
.DOPB(), // Port B 4-bit Parity Output
.ADDRB({ta[9:0]}), // Port B 2-bit Address Input
.CLKB(!sclk), // Port B Clock
.DIB(tdi[15:0]), // Port B 16-bit Data Input
.DIPB(2'b0), // Port-B 2-bit parity Input
.ENB(1'b1), // PortB RAM Enable Input
.SSRB(1'b0), // Port B Synchronous Set/Reset Input
.WEB(twe) // Port B Write Enable Input
);
*/
ram18_var_w_var_r #(
.REGISTERS (0),
.LOG2WIDTH_WR (4),
.LOG2WIDTH_RD (4),
.DUMMY (0)
) i_focus_dct_tab (
.rclk (clk), // input
.raddr ({filt_sel[3:0],tba[2:0],tba[5:3]}), // input[9:0]
.ren (1'b1), // input
.regen (1'b1), // input
.data_out (tdo[15:0]), // output[31:0]
.wclk (!sclk), // input
.waddr ({ta[9:0]}), // input[8:0]
.we (!sclk), // input
.web (4'hf), // input[3:0]
.data_in (tdi[15:0]) // input[31:0]
);
endmodule
/*
** -----------------------------------------------------------------------------**
** huff_fifo393.v
**
** Part of Huffman encoder for JPEG compressor - FIFO for Huffman encoder
**
** Copyright (C) 2002-2015 Elphel, Inc
**
** -----------------------------------------------------------------------------**
** huff_fifo393.v is free software - hardware description language (HDL) code.
**
** This program is free software: you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation, either version 3 of the License, or
** (at your option) any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program. If not, see <http://www.gnu.org/licenses/>.
** -----------------------------------------------------------------------------**
**
*/
//used the other edge of the clk2x
module huff_fifo393 (
input xclk, // pixel clock, posedge
input xclk2x, // twice frequency - uses negedge inside
input en, // will reset if ==0 (sync to xclk)
input [15:0] di, // data in (sync to xclk)
input ds, // din valid (sync to xclk)
input want_read,
input want_read_early,
output reg dav, // FIFO output latch has data (fifo_or_full)
output reg [15:0] q_latch); // output data
reg [9:0] wa;
reg [9:0] sync_wa; // delayed wa, re_latch-calculated at output clock
reg [9:0] ra_r;
reg [9:0] ra_latch;
reg load_q;
wire [15:0] fifo_o;
reg ds1; // ds delayed by one xclk to give time to block ram to write data. Not needed likely.
reg synci;
reg [1:0] synco;
reg sync_we; // single xclk2x period pulse for each ds@xclk
reg en2x; // en sync to xclk2x;
reg re_r;
reg re_latch;
reg fifo_dav; // RAM output reg has data
reg dav_and_fifo_dav;
wire ram_dav; // RAM has data inside
reg [9:0] diff_a;
wire next_re;
always @ (posedge xclk) begin // input stage, no overrun detection
if (!en) wa[9:0] <= 10'b0;
else if (ds) wa[9:0] <= wa[9:0]+1;
ds1 <= ds && en;
if (!en) synci <= 1'b0;
else if (ds1) synci <= ~synci;
end
always @ (negedge xclk2x) begin
en2x <= en;
synco[1:0] <= {synco[0],synci};
sync_we <= en2x && (synco[0] != synco[1]);
end
assign ram_dav= sync_we || (diff_a[9:0] != 10'b0);
assign next_re= ram_dav && (!dav_and_fifo_dav || want_read);
always @ (negedge xclk2x) begin
dav <= en2x && (fifo_dav || (dav && !want_read));
fifo_dav <= en2x && (ram_dav ||(dav && fifo_dav && !want_read));
dav_and_fifo_dav <= en2x && (fifo_dav || (dav && !want_read)) && (ram_dav ||(dav && fifo_dav && !want_read)); // will optimize auto
re_r <= en2x && next_re;
if (!en2x) sync_wa[9:0] <= 10'b0;
else if (sync_we) sync_wa[9:0] <= sync_wa[9:0]+1;
if (!en2x) ra_r [9:0] <= 10'b0;
else if (next_re) ra_r [9:0] <= ra_r[9:0]+1;
if (!en2x) diff_a[9:0] <= 10'b0;
else if (sync_we && !next_re) diff_a[9:0] <= diff_a[9:0]+1;
else if (!sync_we && next_re) diff_a[9:0] <= diff_a[9:0]-1;
end
/*
LD i_re (.Q(re_latch),.G(xclk2x),.D(next_re));
LD i_ra9 (.Q(ra_latch[9]),.G(xclk2x),.D(ra_r[9]));
LD i_ra8 (.Q(ra_latch[8]),.G(xclk2x),.D(ra_r[8]));
LD i_ra7 (.Q(ra_latch[7]),.G(xclk2x),.D(ra_r[7]));
LD i_ra6 (.Q(ra_latch[6]),.G(xclk2x),.D(ra_r[6]));
LD i_ra5 (.Q(ra_latch[5]),.G(xclk2x),.D(ra_r[5]));
LD i_ra4 (.Q(ra_latch[4]),.G(xclk2x),.D(ra_r[4]));
LD i_ra3 (.Q(ra_latch[3]),.G(xclk2x),.D(ra_r[3]));
LD i_ra2 (.Q(ra_latch[2]),.G(xclk2x),.D(ra_r[2]));
LD i_ra1 (.Q(ra_latch[1]),.G(xclk2x),.D(ra_r[1]));
LD i_ra0 (.Q(ra_latch[0]),.G(xclk2x),.D(ra_r[0]));
*/
always @* if (xclk2x) re_latch <= next_re;
always @* if (xclk2x) ra_latch <= ra_r;
always @ (posedge xclk2x) begin
load_q <= dav?want_read_early:re_r;
end
/*
LD_1 i_q15 (.Q( q_latch[15]),.G(xclk2x),.D(load_q?fifo_o[15]:q_latch[15]));
LD_1 i_q14 (.Q( q_latch[14]),.G(xclk2x),.D(load_q?fifo_o[14]:q_latch[14]));
LD_1 i_q13 (.Q( q_latch[13]),.G(xclk2x),.D(load_q?fifo_o[13]:q_latch[13]));
LD_1 i_q12 (.Q( q_latch[12]),.G(xclk2x),.D(load_q?fifo_o[12]:q_latch[12]));
LD_1 i_q11 (.Q( q_latch[11]),.G(xclk2x),.D(load_q?fifo_o[11]:q_latch[11]));
LD_1 i_q10 (.Q( q_latch[10]),.G(xclk2x),.D(load_q?fifo_o[10]:q_latch[10]));
LD_1 i_q9 (.Q( q_latch[ 9]),.G(xclk2x),.D(load_q?fifo_o[ 9]:q_latch[ 9]));
LD_1 i_q8 (.Q( q_latch[ 8]),.G(xclk2x),.D(load_q?fifo_o[ 8]:q_latch[ 8]));
LD_1 i_q7 (.Q( q_latch[ 7]),.G(xclk2x),.D(load_q?fifo_o[ 7]:q_latch[ 7]));
LD_1 i_q6 (.Q( q_latch[ 6]),.G(xclk2x),.D(load_q?fifo_o[ 6]:q_latch[ 6]));
LD_1 i_q5 (.Q( q_latch[ 5]),.G(xclk2x),.D(load_q?fifo_o[ 5]:q_latch[ 5]));
LD_1 i_q4 (.Q( q_latch[ 4]),.G(xclk2x),.D(load_q?fifo_o[ 4]:q_latch[ 4]));
LD_1 i_q3 (.Q( q_latch[ 3]),.G(xclk2x),.D(load_q?fifo_o[ 3]:q_latch[ 3]));
LD_1 i_q2 (.Q( q_latch[ 2]),.G(xclk2x),.D(load_q?fifo_o[ 2]:q_latch[ 2]));
LD_1 i_q1 (.Q( q_latch[ 1]),.G(xclk2x),.D(load_q?fifo_o[ 1]:q_latch[ 1]));
LD_1 i_q0 (.Q( q_latch[ 0]),.G(xclk2x),.D(load_q?fifo_o[ 0]:q_latch[ 0]));
*/
always @* if (~xclk2x) begin
if (load_q) q_latch <= fifo_o;
end
/*
RAMB16_S18_S18 i_fifo (
.DOA(), // Port A 16-bit Data Output
.DOPA(), // Port A 2-bit Parity Output
.ADDRA(wa[9:0]), // Port A 10-bit Address Input
.CLKA(xclk), // Port A Clock
.DIA(di[15:0]), // Port A 16-bit Data Input
.DIPA(2'b0), // Port A 2-bit parity Input
.ENA(ds), // Port A RAM Enable Input
.SSRA(1'b0), // Port A Synchronous Set/Reset Input
.WEA(1'b1), // Port A Write Enable Input
.DOB(fifo_o[15:0]),// Port B 16-bit Data Output
.DOPB(), // Port B 2-bit Parity Output
.ADDRB(ra_latch[9:0]), // Port B 10-bit Address Input
.CLKB(xclk2x), // Port B Clock
.DIB(16'b0), // Port B 16-bit Data Input
.DIPB(2'b0), // Port-B 2-bit parity Input
.ENB(re_latch), // PortB RAM Enable Input
.SSRB(1'b0), // Port B Synchronous Set/Reset Input
.WEB(1'b0) // Port B Write Enable Input
);
*/
ram18_var_w_var_r #(
.REGISTERS (0),
.LOG2WIDTH_WR (4),
.LOG2WIDTH_RD (4),
.DUMMY (0)
) i_fifo (
.rclk (xclk2x), // input
.raddr (ra_latch[9:0]), // input[9:0]
.ren (re_latch), // input
.regen (1'b1), // input
.data_out (fifo_o[15:0]), // output[15:0]
.wclk (xclk), // input
.waddr (wa[9:0]), // input[9:0]
.we (ds), // input
.web (4'hf), // input[3:0]
.data_in (di[15:0]) // input[15:0]
);
endmodule
/*
** -----------------------------------------------------------------------------**
** huffman333.v
**
** Huffman encoder for JPEG compressorrdy
**
** Copyright (C) 2002-20015 Elphelk, Inc
**
** -----------------------------------------------------------------------------**
** huffman393 is free software - hardware description language (HDL) code.
**
** This program is free software: you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation, either version 3 of the License, or
** (at your option) any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program. If not, see <http://www.gnu.org/licenses/>.
** -----------------------------------------------------------------------------**
**
*/
// 01/22/2004 - extended flush until ready (modified stuffer.v too)
module huffman393 (
input xclk, // pixel clock, sync to incoming data
input xclk2x, // twice frequency - uses negedge inside
input en, // will reset if ==0 (sync to xclk)
input sclk, // clock to write tables (NOW posgedge) AF2015
input twe, // enable write to a table - now the following will be valid ant negedge sclk
input [8:0] ta, // [8:0] table address
input [15:0] tdi, // [15:0] table data in
input [15:0] di, // [15:0] specially RLL prepared 16-bit data (to FIFO) (sync to xclk)
input ds, // di valid strobe (sync to xclk)
input rdy, // receiver (bit stuffer) is ready to accept data
output reg [15:0] do, // [15:0] output data
output reg [ 3:0] dl, // [3:0] data length (4'h0 is 'h16)
output reg dv, // output data valid
output reg flush, // last block done - flush the rest bits
output reg last_block,
output reg test_lbw,
output gotLastBlock); // last block done - flush the rest bits
/*
huffman i_huffman (.pclk(clk), // pixel clock
.clk(clk2x), // twice frequency - uses negedge inside
.en(cmprs_en), // enable (0 resets counter) sync to .pclk(clk)
.twe(twhe), // enable write to a table
.ta(ta[8:0]), // [8:0] table address
.tdi(di[15:0]), // [23:0] table data in (8 LSBs - quantization data, [13:9] zigzag address
.di(enc_do[15:0]), // [15:0] specially RLL prepared 16-bit data (to FIFO)
.ds(enc_dv), // di valid strobe
.rdy(stuffer_rdy), // receiver (bit stuffer) is ready to accept data
.do(huff_do), // [15:0] output data
.dl(huff_dl), // [3:0] output width (0==16)
.dv(huff_dv), // output data bvalid
.flush(flush),
.last_block(last_block),
.test_lbw(),
.gotLastBlock(test_lbw)); // last block done - flush the rest bits
*/
wire [31:0] tables_out; // Only [19:0] are used
reg [15:0] hcode_latch; // table output huffman code (1..16 bits)
reg [ 3:0] hlen_latch; // table - code length only 4 LSBs are used
reg [ 7:0] haddr_r; // index in huffman table
wire [ 7:0] haddr_next;
reg [ 7:0] haddr70_latch;
reg haddr8_latch;
wire [ 8:0] haddr = {haddr8_latch,haddr70_latch}; // index in huffman table (after latches)
wire [15:0] fifo_o;
reg stuffer_was_rdy;
reg tables_re_latch;
wire read_next; // assigned depending on steps (each other cycle for normal codes, each for special 00/F0
reg [5:0] steps;
// first stage registers
reg [5:0] rll; // 2 MSBs - counter to send "f0" codes
// replacing SRL16 with FD as SRL has longer output delay from clock
reg [3:0] rll1;
reg [3:0] rll2;
reg typeDC;
reg typeAC;
reg [11:0] sval; // signed input value
wire [1:0] code_typ0; // valid at steps[0]
reg tbsel_YC0; // valid at steps[0] - 0 -Y table, 1 - CbCr
reg [1:0] code_typ1;
reg [1:0] code_typ2;
reg code_typ3;
reg code_typ4;
reg tbsel_YC1;
reg tbsel_YC2;
reg tbsel_YC3;
reg [15:0] out_bits; // bits to send
reg [3:0] out_len; // length of bits to send (4'h0 means 16)
wire fifo_or_full; // fifo output register full read_next
wire will_read;
wire [10:0] var_do;
wire [3:0] var_dl;
wire [3:0] var_dl_late;
reg dv0;
reg eob;
wire gotDC;
wire gotAC;
wire gotRLL;
wire gotEOB;
wire gotLastWord;
wire gotColor;
wire want_read; // as will_read, but w/o fifo status
reg ready_to_flush; // read the last data from fifo
reg en2x; // en sync to xclk2x;
wire pre_dv;
wire [15:0] pre_bits;
wire [ 3:0] pre_len;
reg twe_d; // table write enable (twe) delayed by 1 clock
always @ (negedge xclk2x) en2x <= en;
assign gotDC= fifo_o[15] && fifo_o[14];
assign gotAC= fifo_o[15] && !fifo_o[14];
assign gotRLL= !fifo_o[15] && !fifo_o[12];
assign gotEOB= !fifo_o[15] && fifo_o[12];
assign gotLastBlock= fifo_o[15] && fifo_o[14] && fifo_o[12];
assign gotLastWord= !fifo_o[14] && fifo_o[12]; // (AC or RLL) and last bit set
assign gotColor= fifo_o[13];
always @(negedge xclk2x) stuffer_was_rdy <= !en2x || rdy; // stuffer ready shoud be on if !en (move to register?)for now]
reg stuffer_was_rdy_early_latch;
wire want_read_early;
/*
LD i_stuffer_was_rdy_early (.Q(stuffer_was_rdy_early_latch),.G(xclk2x),.D(!en2x || rdy));
LD i_tables_re (.Q(tables_re_latch),.G(xclk2x),.D(en2x && rdy));
*/
always @* if (xclk2x) stuffer_was_rdy_early_latch <= !en2x || rdy;
always @* if (xclk2x) tables_re_latch <= en2x && rdy;
assign read_next= en2x && ((!steps[0] && !rll[5]) || eob ) && fifo_or_full; // fifo will never have data after the last block...
assign will_read= stuffer_was_rdy && fifo_or_full && en2x && ((!steps[0] && !rll[5]) || eob ); // fifo will never have data after the last block...
assign want_read= stuffer_was_rdy && ((!steps[0] && !rll[5]) || eob ); // for FIFO
assign want_read_early= stuffer_was_rdy_early_latch && ((!steps[0] && !rll[5]) || eob ); // for FIFO
always @ (negedge xclk2x) if (stuffer_was_rdy) begin
eob <= read_next && gotEOB;// will be 1 only during step[0]
if (!en2x) steps[5:0] <= 'b0;
else steps[5:0] <= {steps[4] && code_typ4, // will be skipped for codes 00/F0
steps[3:0],
(read_next && !(gotRLL && (fifo_o[5:4]==2'b00))) || rll[5] }; // will not start if it was <16, waiting for AC
end
always @ (negedge xclk2x) begin
last_block <= en2x && (last_block?(!flush):(stuffer_was_rdy && will_read && gotLastBlock));
ready_to_flush <= en2x && (ready_to_flush?(!flush):(stuffer_was_rdy && last_block && will_read && gotLastWord));
test_lbw <= en2x && last_block && gotLastWord;
// did not work if flush was just after not ready?
flush <= en2x &&( flush?(!rdy):(rdy && stuffer_was_rdy && ready_to_flush && !(|steps)) );
end
always @ (negedge xclk2x) if (will_read) begin
typeDC <= gotDC;
typeAC <= gotAC;
sval[11:0] <= fifo_o[11:0];
if (gotDC) tbsel_YC0 <= gotColor;
end
always @ (negedge xclk2x) if (stuffer_was_rdy) begin
if (!en2x || (read_next && gotAC) || (steps[0] && typeAC)) rll[5:4] <= 2'b0;
else if (read_next && gotRLL) rll[5:4] <= fifo_o[5:4];
else if (rll[5:4]!=2'b00) rll[5:4] <= rll[5:4]-1;
if (!en2x || (read_next && !gotAC && !gotRLL) || (steps[0] && typeAC)) rll[3:0] <= 4'b0;
else if (read_next && gotRLL) rll[3:0] <= fifo_o[3:0];
end
assign code_typ0={typeDC || (!eob && (rll[5:4]==2'b0)),
typeDC || (!eob && (rll[5:4]!=2'b0))};
assign haddr_next[7:0] = code_typ2[1]?
(code_typ2[0]?{var_dl[3:0],4'hf}: // DC (reusing the spare cells of the AC table)
{rll2[3:0],var_dl[3:0]}): // AC normal code
(code_typ2[0]?8'hf0: //skip 16 zeros code
8'h00); //skip to end of block code
always @ (negedge xclk2x) if (stuffer_was_rdy && steps[2]) begin // may be just if (stuffer_was_rdy)
haddr_r[7:0] <= haddr_next[7:0];
end
/*
LD i_haddr_8 (.Q(haddr[8]),.G(xclk2x),.D(stuffer_was_rdy?tbsel_YC2:tbsel_YC3));
LD i_haddr_7 (.Q(haddr[7]),.G(xclk2x),.D((stuffer_was_rdy && steps[2])?haddr_next[7]:haddr_r[7]));
LD i_haddr_6 (.Q(haddr[6]),.G(xclk2x),.D((stuffer_was_rdy && steps[2])?haddr_next[6]:haddr_r[6]));
LD i_haddr_5 (.Q(haddr[5]),.G(xclk2x),.D((stuffer_was_rdy && steps[2])?haddr_next[5]:haddr_r[5]));
LD i_haddr_4 (.Q(haddr[4]),.G(xclk2x),.D((stuffer_was_rdy && steps[2])?haddr_next[4]:haddr_r[4]));
LD i_haddr_3 (.Q(haddr[3]),.G(xclk2x),.D((stuffer_was_rdy && steps[2])?haddr_next[3]:haddr_r[3]));
LD i_haddr_2 (.Q(haddr[2]),.G(xclk2x),.D((stuffer_was_rdy && steps[2])?haddr_next[2]:haddr_r[2]));
LD i_haddr_1 (.Q(haddr[1]),.G(xclk2x),.D((stuffer_was_rdy && steps[2])?haddr_next[1]:haddr_r[1]));
LD i_haddr_0 (.Q(haddr[0]),.G(xclk2x),.D((stuffer_was_rdy && steps[2])?haddr_next[0]:haddr_r[0]));
*/
// wire [ 8:0] haddr = {haddr8_latch,haddr70_latch}; // index in huffman table (after latches)
always @* if (xclk2x) begin
if (stuffer_was_rdy) haddr8_latch <= tbsel_YC2;
else haddr8_latch <= tbsel_YC3;
end
always @* if (xclk2x) begin
if (stuffer_was_rdy && steps[2]) haddr70_latch <= haddr_next;
else haddr70_latch <= haddr_r;
end
assign pre_dv = steps[4] || (steps[5] && (var_dl_late[3:0]!=4'b0));
assign pre_bits[15:0] = steps[5]?{5'b0,var_do[10:0]}: hcode_latch[15:0];
assign pre_len [ 3:0] = steps[5]? var_dl_late[ 3:0]: hlen_latch [3:0];
always @ (negedge xclk2x) if (stuffer_was_rdy) begin
dv0 <= pre_dv;
out_bits[15:0] <= pre_bits[15:0];
out_len [ 3:0] <= pre_len [ 3:0];
end
always @ (negedge xclk2x) if (!en2x || rdy) begin
dv <= stuffer_was_rdy? pre_dv:dv0;
do[15:0] <= stuffer_was_rdy? pre_bits[15:0]:out_bits[15:0];
dl[ 3:0] <= stuffer_was_rdy? pre_len [ 3:0]:out_len [ 3:0];
end
// "Extract shift registers" in synthesis should be off! FD has lower output delay than SRL16
always @ (negedge xclk2x) if (stuffer_was_rdy) begin
code_typ1[1:0] <= code_typ0[1:0];
code_typ2[1:0] <= code_typ1[1:0];
code_typ3 <= code_typ2[1];
code_typ4 <= code_typ3;
rll1[3:0] <= rll[3:0];
rll2[3:0] <= rll1[3:0];
tbsel_YC1 <= tbsel_YC0;
tbsel_YC2 <= tbsel_YC1;
tbsel_YC3 <= tbsel_YC2;
end
/*
LD_1 i_hlen3 (.Q( hlen_latch[ 3]),.G(xclk2x),.D(tables_out[19]));
LD_1 i_hlen2 (.Q( hlen_latch[ 2]),.G(xclk2x),.D(tables_out[18]));
LD_1 i_hlen1 (.Q( hlen_latch[ 1]),.G(xclk2x),.D(tables_out[17]));
LD_1 i_hlen0 (.Q( hlen_latch[ 0]),.G(xclk2x),.D(tables_out[16]));
LD_1 i_hcode15(.Q(hcode_latch[15]),.G(xclk2x),.D(tables_out[15]));
LD_1 i_hcode14(.Q(hcode_latch[14]),.G(xclk2x),.D(tables_out[14]));
LD_1 i_hcode13(.Q(hcode_latch[13]),.G(xclk2x),.D(tables_out[13]));
LD_1 i_hcode12(.Q(hcode_latch[12]),.G(xclk2x),.D(tables_out[12]));
LD_1 i_hcode11(.Q(hcode_latch[11]),.G(xclk2x),.D(tables_out[11]));
LD_1 i_hcode10(.Q(hcode_latch[10]),.G(xclk2x),.D(tables_out[10]));
LD_1 i_hcode9 (.Q(hcode_latch[ 9]),.G(xclk2x),.D(tables_out[ 9]));
LD_1 i_hcode8 (.Q(hcode_latch[ 8]),.G(xclk2x),.D(tables_out[ 8]));
LD_1 i_hcode7 (.Q(hcode_latch[ 7]),.G(xclk2x),.D(tables_out[ 7]));
LD_1 i_hcode6 (.Q(hcode_latch[ 6]),.G(xclk2x),.D(tables_out[ 6]));
LD_1 i_hcode5 (.Q(hcode_latch[ 5]),.G(xclk2x),.D(tables_out[ 5]));
LD_1 i_hcode4 (.Q(hcode_latch[ 4]),.G(xclk2x),.D(tables_out[ 4]));
LD_1 i_hcode3 (.Q(hcode_latch[ 3]),.G(xclk2x),.D(tables_out[ 3]));
LD_1 i_hcode2 (.Q(hcode_latch[ 2]),.G(xclk2x),.D(tables_out[ 2]));
LD_1 i_hcode1 (.Q(hcode_latch[ 1]),.G(xclk2x),.D(tables_out[ 1]));
LD_1 i_hcode0 (.Q(hcode_latch[ 0]),.G(xclk2x),.D(tables_out[ 0]));
*/
always @* if (~xclk2x) hlen_latch <= tables_out[19:16];
always @* if (~xclk2x) hcode_latch <= tables_out[15:0];
huff_fifo393 i_huff_fifo (
.xclk(xclk), // input
.xclk2x(xclk2x), // input
.en(en), // input
.di(di[15:0]), // input[15:0] data in (sync to xclk)
.ds(ds), // input din valid (sync to xclk)
.want_read(want_read), // input
.want_read_early(want_read_early), // input
.dav(fifo_or_full), // output reg FIFO output register has data
.q_latch(fifo_o[15:0])); // output[15:0] reg data (will add extra buffering if needed)
varlen_encode393 i_varlen_encode(
.clk (xclk2x), // input
.en (stuffer_was_rdy), // input will enable registers. 0 - freeze
.start (steps[0]), // input
.d (sval[11:0]), // input[11:0] 12-bit signed
.l (var_dl[ 3:0]), // output[3:0] reg code length
.l_late (var_dl_late[3:0]), // output[3:0] reg
.q (var_do[10:0])); // output[10:0] reg code
// always @ (negedge xclk2x) twe_d <= twe;
always @ (posedge sclk) twe_d <= twe;
/*
RAMB16_S18_S36 i_htab (
.DOA(), // Port A 16-bit Data Output
.DOPA(), // Port A 2-bit Parity Output
.ADDRA({ta[8:0],twe_d}), // Port A 10-bit Address Input
.CLKA(!xclk2x), // Port A Clock
.DIA(tdi[15:0]), // Port A 16-bit Data Input
.DIPA(2'b0), // Port A 2-bit parity Input
.ENA(1'b1), // Port A RAM Enable Input
.SSRA(1'b0), // Port A Synchronous Set/Reset Input
.WEA(twe | twe_d),// Port A Write Enable Input
.DOB({unused[11:0],tables_out[19:0]}), // Port B 32-bit Data Output
.DOPB(), // Port B 4-bit Parity Output
.ADDRB(haddr[8:0]), // Port B 9-bit Address Input
.CLKB(xclk2x), // Port B Clock
.DIB(32'b0), // Port B 32-bit Data Input
.DIPB(4'b0), // Port-B 4-bit parity Input
.ENB(tables_re_latch), // PortB RAM Enable Input
.SSRB(1'b0), // Port B Synchronous Set/Reset Input
.WEB(1'b0) // Port B Write Enable Input
);
*/
ram18_var_w_var_r #(
.REGISTERS(0),
.LOG2WIDTH_WR(4),
.LOG2WIDTH_RD(5),
.DUMMY(0)
) i_htab (
.rclk(xclk2x), // input
.raddr(haddr[8:0]), // input[8:0]
.ren(tables_re_latch), // input
.regen(1'b1), // input
// .data_out({unused[11:0],tables_out[19:0]}), // output[31:0]
.data_out(tables_out), // output[31:0]
.wclk(sclk), // input
.waddr({ta[8:0],twe_d}), // input[9:0]
.we(twe | twe_d), // input
.web(4'hf), // input[3:0]
.data_in(tdi[15:0]) // input[15:0]
);
endmodule
......@@ -46,7 +46,10 @@ module jp_channel#(
input [63:0] buf_wdata, // input[63:0]
input page_ready_chn, // single mclk (posedge)
output next_page_chn // single mclk (posedge): Done with the page in the buffer, memory controller may read more data
output next_page_chn, // single mclk (posedge): Done with the page in the buffer, memory controller may read more data
// statistics data was not used in late nc353
output statistics_dv,
output [15:0] statistics_do
);
......@@ -70,7 +73,7 @@ module jp_channel#(
wire [ 9:0] m_cb; // [9:0] scale for CB - default 0.564 (10'h90)
wire [ 9:0] m_cr; // [9:0] scale for CB - default 0.713 (10'hb6)
reg [ 1:0] cmprs_fmode_this; // focusing/overlay mode
//TODO: assign next 5 values from converter_type[2:0]
wire [ 5:0] mb_w_m1; // macroblock width minus 1 // 3 LSB not used, SHOULD BE SET to 3'b111
......@@ -130,10 +133,11 @@ module jp_channel#(
wire color_first; // sending first_r MCU (valid @ ds)
wire color_last; // sending last_r MCU (valid @ ds)
// below signals valid at ds ( 1 later than tn, first_r, last_r)
wire [2:0] yc_nodc_component_num; //[2:0] - component number (YCbCr: 0 - Y, 1 - Cb, 2 - Cr, JP4: 0-1-2-3 in sequence (depends on shift) 4 - don't use
wire yc_nodc_component_color; // use color quantization table (YCbCR, jp4diff)
wire color_first; // first_r this component in a frame (DC absolute, otherwise - difference to previous)
wire yc_nodc_component_lastinmb; // last_r component in a macroblock;
wire [2:0] component_num; //[2:0] - component number (YCbCr: 0 - Y, 1 - Cb, 2 - Cr, JP4: 0-1-2-3 in sequence (depends on shift) 4 - don't use
wire component_color; // use color quantization table (YCbCR, jp4diff)
wire component_first; // first this component in a frame (DC absolute, otherwise - difference to previous)
wire component_lastinmb; // last_r component in a macroblock;
......@@ -297,10 +301,10 @@ module jp_channel#(
.tn (color_tn), // output[2:0]
.first (color_first), // output reg
.last (color_last), // output reg
.component_num (yc_nodc_component_num), // output[2:0]
.component_color (yc_nodc_component_color), // output
.component_first (color_first), // output
.component_lastinmb (yc_nodc_component_lastinmb) // output reg
.component_num (component_num), // output[2:0]
.component_color (component_color), // output
.component_first (component_first), // output
.component_lastinmb (component_lastinmb)// output reg
);
// wire [ 9:0] yc_nodc; // [9:0] data out (4:2:0) (signed, average=0)
......@@ -316,7 +320,7 @@ module jp_channel#(
reg first_block_color_after; // after color conversion,
reg first_block_dct; // after DCT
wire first_block_quant; // after quantizer
always @ (posedge clk) begin
always @ (posedge xclk) begin
if (dct_start) first_block_color_after <= first_block_color;
if (dct_last_in) first_block_dct <= first_block_color_after;
end
......@@ -337,10 +341,15 @@ module jp_channel#(
wire quant_start;
dly_16 #(.WIDTH(1)) i_quant_start (.clk(xclk),.rst(1'b0), .dly(0), .din(dct_pre_first_out), .dout(quant_start)); // dly=0+1
// TODO: Change interface
// TODO: Change interface (first are negedge, twhe - @poswedge mclk
wire twqe;
wire twce;
wire [8:0] ta;
wire twfe; // focusing table write enable
wire twhe; // now @posedge mclk
wire [9:0] ta; // some use [8:0]
wire [15:0] tdi;
reg [ 2:0] cmprs_qpage_this;
......@@ -352,10 +361,11 @@ module jp_channel#(
reg dcc_en;
wire dccout;
wire [ 2:0] hfc_sel;
wire dccvld;
wire [15:0] dccdata; // was not used in late nc353
wire dccvld; // was not used in late nc353
always @ (posedge clk) begin
always @ (posedge xclk) begin
if (!dccout) dcc_en <=1'b0;
else if (dct_start && color_first && (color_tn[2:0]==3'b001)) dcc_en <=1'b1; // 3'b001 - closer to the first "start" in quantizator
end
......@@ -366,9 +376,9 @@ module jp_channel#(
.sclk (mclk), // input system clock, twqe, twce, ta,tdi - valid @posedge (ra, tdi - 2 cycles ahead (was negedge)
.twqe (twqe), // input enable write to a quantization table
.twce (twce), // input enable write to a coring table
.ta (ta), // input[8:0] table address
.ta (ta[8:0]), // input[8:0] table address
.tdi (tdi), // input[15:0] data in (8 LSBs - quantization data - obsolete?)
.ctypei (yc_nodc_component_color),// input component type input (Y/C)
.ctypei (component_color), // input component type input (Y/C)
.dci (yc_avr), // input[8:0] - average value in a block - subtracted before DCT. now normal signed number
.first_stb (first_block_color), // input - this is first stb pulse in a frame
.stb (dct_start), // input - strobe that writes ctypei, dci
......@@ -386,12 +396,230 @@ module jp_channel#(
.color_first (color_first), // input - first MCU in a frame
.coring_num (coring_num), // input[2:0] - coring table pair number (0..7)
.dcc_vld (dccvld), // output reg - single cycle when dcc_data is valid
.dcc_data (), // output[15:0] - dc component data out (for reading by software)
.dcc_data (dccdata[15:0]), // output[15:0] - dc component data out (for reading by software)
.n000 (n000), // input[7:0] - number of zero pixels (255 if 256) - to be multiplexed with dcc
.n255 (n255) // input[7:0] - number of 0xff pixels (255 if 256) - to be multiplexed with dcc
);
// focus sharp module calculates amount of high-frequency components and optioanlly overlays/replaces actual image
wire [12:0] focus_do; // output[12:0] reg pixel data out, make timing ignore (valid 1.5 clk earlier that Quantizer output)
wire focus_ds; // output reg data out strobe (one ahead of the start of dv)
wire [31:0] hifreq; // output[31:0] reg accumulated high frequency components in a frame sub-window
focus_sharp393 focus_sharp393_i (
.clk (xclk), // input
.en (frame_en), // input
.sclk (mclk), // input system clock: twe, ta,tdi - valid @negedge (ra, tdi - 2 cycles ahead)
.twe (twfe), // input enable write to a table
.ta (ta[9:0]), // input[9:0] table address
.tdi (tdi), // input[15:0] table data in (8 LSBs - quantization data)
.mode (cmprs_fmode_this[1:0]), // input[1:0] focus mode (combine image with focus info) - 0 - none, 1 - replace, 2 - combine all, 3 - combine woi
.firsti (color_first), // input first macroblock
.lasti (color_last), // input last macroblock
.tni (color_tn[2:0]), // input[2:0] block number in a macronblock - 0..3 - Y, >=4 - color (sync to stb)
.stb (dct_start), // input strobe that writes ctypei, dci
.start (quant_start), // input marks first input pixel (needs 1 cycle delay from previous DCT stage)
.di (dct_out), // input[12:0] pixel data in (signed)
.quant_ds (quant_ds), // input quantizator ds
.quant_d (quant_do[12:0]), // input[12:0] quantizator data output
.quant_dc_tdo (quant_dc_tdo), // input[15:0] MSB aligned coefficient for the DC component (used in focus module)
.do (focus_do[12:0]), // output[12:0] reg pixel data out, make timing ignore (valid 1.5 clk earlier that Quantizer output)
.ds (focus_ds), // output reg data out strobe (one ahead of the start of dv)
.hifreq (hifreq[31:0]) // output[31:0] reg accumulated high frequency components in a frame sub-window
);
// Format DC components to be output as a mini-frame. Was not used in the late NC353 as the dma1 channel was use3d for IMU instead of dcc
reg pre_finish_dcc;
reg finish_dcc;
dcc_sync393 dcc_sync393_i (
.sclk (xclk2x), // input
.dcc_en (dcc_en), // input xclk rising, sync with start of the frame
.finish_dcc (finish_dcc), // input @ sclk rising
.dcc_vld (dccvld), // input xclk rising
.dcc_data (dccdata[15:0]), // input[15:0] @clk rising
.statistics_dv (statistics_dv), // output reg
.statistics_do (statistics_do[15:0]) // output[15:0] reg @ sclk
);
wire enc_last;
wire [15:0] enc_do;
wire enc_dv;
// generate DC data/strobe for the direct output (re) using sdram channel3 buffering
// encoderDCAC is updated to handle 13-bit signed data instead of the 12-bit. It will limit the values on ot's own
encoderDCAC393 encoderDCAC393_i (
.clk (xclk), // input
.en (frame_en), // input
.lasti (color_last), // input - was "last MCU in a frame" (@ stb)
.first_blocki (first_block_color), // input - first block in frame - save fifo write address (@ stb)
.comp_numberi (component_num[2:0]), // input[2:0] - component number 0..2 in color, 0..3 - in jp4diff, >= 4 - don't use (@ stb)
.comp_firsti (component_first), // input - first this component in a frame (reset DC) (@ stb)
.comp_colori (component_color), // input - use color - huffman? (@ stb)
.comp_lastinmbi (component_lastinmb), // input - last component in a macroblock (@ stb) is it needed?
.stb (dct_start), // input - strobe that writes firsti, lasti, tni,average
.zdi (focus_do[12:0]), // input[12:0] - zigzag-reordered data input
.first_blockz (first_block_quant), // input - first block input (@zds)
.zds (focus_ds), // input - strobe - one ahead of the DC component output
.last (enc_last), // output reg
.do (enc_do[15:0]), // output[15:0] reg
.dv (enc_dv) // output reg
);
wire last_block;
wire test_lbw;
wire stuffer_rdy; // receiver (bit stuffer) is ready to accept data;
wire [15:0] huff_do; // output[15:0] reg
wire [3:0] huff_dl; // output[3:0] reg
wire huff_dv; // output reg
wire flush; // output reg
huffman393 i_huffman (
.xclk (xclk), // input
.xclk2x (xclk2x), // input
.en (frame_en), // input
.sclk (mclk), // input - for writing tables - now @posedge
.twe (twhe), // input - for writing tables - now @posedge mclk
.ta (ta[8:0]), // input[8:0] - table write address @posedge mclk
.tdi (tdi), // input[15:0] - table data in @posedge mclk
.di (enc_do[15:0]), // input[15:0] - specially RLL prepared 16-bit data (to FIFO)
.ds (enc_dv), // input - di valid strobe
.rdy (stuffer_rdy), // input - receiver (bit stuffer) is ready to accept data
.do(huff_do[15:0]), // output[15:0] reg
.dl(huff_dl[3:0]), // output[3:0] reg
.dv(huff_dv), // output reg
.flush(flush), // output reg
.last_block(last_block), // output reg
.test_lbw(), // output reg ??
.gotLastBlock(test_lbw) // output ??
);
/*
wire last_block, test_lbw;
huffman i_huffman (.pclk(clk), // pixel clock
.clk(clk2x), // twice frequency - uses negedge inside
.en(cmprs_en), // enable (0 resets counter) sync to .pclk(clk)
// .cwr(cwr), // CPU WR global clock
.twe(twhe), // enable write to a table
.ta(ta[8:0]), // [8:0] table address
.tdi(di[15:0]), // [23:0] table data in (8 LSBs - quantization data, [13:9] zigzag address
.di(enc_do[15:0]), // [15:0] specially RLL prepared 16-bit data (to FIFO)
.ds(enc_dv), // di valid strobe
.rdy(stuffer_rdy), // receiver (bit stuffer) is ready to accept data
.do(huff_do), // [15:0] output data
.dl(huff_dl), // [3:0] output width (0==16)
.dv(huff_dv), // output data bvalid
.flush(flush),
.last_block(last_block),
.test_lbw(),
.gotLastBlock(test_lbw)); // last block done - flush the rest bits
*/
wire [15:0] stuffer_do;
wire stuffer_dv;
wire stuffer_done;
reg stuffer_done_persist;
wire stuffer_flushing;
wire [23:0] imgptr;
wire [31:0] sec;
wire [19:0] usec;
always @ (negedge xclk2x) pre_finish_dcc <= stuffer_done;
always @ (posedge xclk2x) finish_dcc <= pre_finish_dcc; //stuffer+done - @negedge clk2x
stuffer393 stuffer393_i (
.clk (xclk2x), // input clock - uses negedge inside
.en (cmprs_en_2x_n), // input
.reset_data_counters (reset_data_counters[1]), // input reset data transfer counters (only when DMA and compressor are disabled)
.flush (flush || force_flush), // input - flush output data (fill byte with 0, long word with FFs
.stb (huff_dv), // input
.dl (huff_dl), // input[3:0] number of bits to send (0 - 16) (0-16??)
.d (huff_do), // input[15:0] data to shift (only lower huff_dl bits are valid)
// time stamping - will copy time at the end of color_first (later than the first hact after vact in the current froma, but before the next one
// and before the data is needed for output
.color_first(color_first), // input
.sec(sec[31:0]), // input[31:0]
.usec(usec[19:0]), // input[19:0]
.rdy(stuffer_rdy), // output - enable huffman encoder to proceed. Used as CE for many huffman encoder registers
.q(stuffer_do), // output[15:0] reg - output data
.qv(stuffer_dv), // output reg - output data valid
.done(stuffer_done), // output
.imgptr(imgptr[23:0]), // output[23:0] reg - image pointer in 32-byte chunks
.flushing(stuffer_flushing) // output reg
`ifdef debug_stuffer
,.etrax_dma_r(tst_stuf_etrax[3:0]) // [3:0] just for testing
,.test_cntr(test_cntr[3:0])
,.test_cntr1(test_cntr1[7:0])
`endif
);
/*
stuffer i_stuffer (.clk(clk2x), //clock - uses negedge inside
.en(cmprs_en_2x_n), // enable, 0- reset
.reset_data_counters(reset_data_counters[1]), // reset data transfer counters (only when DMA and compressor are disabled)
.flush(flush || force_flush), // flush output data (fill byte with 0, long word with FFs
.stb(huff_dv), // input data strobe
.dl(huff_dl), // [3:0] number of bits to send (0 - 16)
.d(huff_do), // [15:0] input data to shift (only lower bits are valid)
// time stamping - will copy time at the end of color_first (later than the first hact after vact in the current froma, but before the next one
// and before the data is needed for output
.color_first(color_first), //
.sec(sec[31:0]),
.usec(usec[19:0]),
.rdy(stuffer_rdy), // enable huffman encoder to proceed. Used as CE for many huffman encoder registers
.q(stuffer_do), // [15:0] output data
.qv(stuffer_dv), // output data valid
.done(stuffer_done),
.imgptr (imgptr[23:0]), // [23:0]image pointer in 32-byte chunks
.flushing(stuffer_flushing)
`ifdef debug_stuffer
,.etrax_dma_r(tst_stuf_etrax[3:0]) // [3:0] just for testing
,.test_cntr(test_cntr[3:0])
,.test_cntr1(test_cntr1[7:0])
`endif
);
dcc_sync i_dcc_sync(//.clk(clk),
.sclk(clk2x),
.dcc_en(dcc_en), // clk rising, sync with start of the frame
.finish_dcc(finish_dcc), // sclk rising
.dcc_vld(dccvld), // clk rising
.dcc_data(dccdata[15:0]), //[15:0] clk risimg
.statistics_dv(statistics_dv), //sclk
.statistics_do(statistics_do[15:0])//[15:0] sclk
);
//TODO: compact table
focus_sharp i_focus_sharp(.clk(clk), // pixel clock
.en(cmprs_en), // enable (0 resets counter)
.sclk(clk2x), // system clock, twe, ta,tdi - valid @negedge (ra, tdi - 2 cycles ahead
.twe(twfe), // enable write to a table
.ta(ta[9:0]), // [9:0] table address
.tdi(di[15:0]), // [15:0] table data in (8 LSBs - quantization data)
.mode(cmprs_fmode_this[1:0]), // focus mode (combine image with focus info) - 0 - none, 1 - replace, 2 - combine all, 3 - combine woi
// .stren(focus_strength),
.firsti(color_first), // first macroblock
.lasti(color_last), // last macroblock
.tni(color_tn[2:0]), // block number in a macronblock - 0..3 - Y, >=4 - color (sync to stb)
.stb(dct_start), // strobe that writes ctypei, dci
.start(quant_start),// marks first input pixel (needs 1 cycle delay from previous DCT stage)
.di(dct_out[12:0]), // [11:0] pixel data in (signed)
.quant_ds(quant_ds), // quantizator data strobe (1 before DC)
.quant_d(quant_do[12:0]), // quantizator data output
.quant_dc_tdo(quant_dc_tdo[15:0]), //[15:0], MSB aligned coefficient for the DC component (used in focus module)
// .quant_dc_tdo_stb(quant_dc_tdo_stb),
.do(focus_do[12:0]), // [11:0] pixel data out (AC is only 9 bits long?) - changed to 10
.ds(focus_ds), // data out strobe (one ahead of the start of dv)
.hifreq(hifreq[31:0]) //[31:0]) // accumulated high frequency components in a frame sub-window
);
xdct i_xdct ( .clk(clk), // top level module
.en(cmprs_en), // if zero will reset transpose memory page numbers
.start(dct_start), // single-cycle start pulse that goes with the first pixel data. Other 63 should follow
......
......@@ -258,7 +258,7 @@ module quantizer393(
.rclk (clk), // input
.raddr ({tba[9:6],tba[2:0],tba[5:3]}), // input[8:0]
.ren (1'b1), // input
.regen (1'b0), // input
.regen (1'b1), // input
.data_out (tdo[15:0]), // output[15:0]
.wclk (sclk), // input
.waddr ({ta[8:0],twqe_d}), // input[8:0]
......@@ -276,7 +276,7 @@ module quantizer393(
.rclk (clk), // input
.raddr ({tbac[3:0],qmulr[11:4]}), // input[10:0]
.ren (1'b1), // input
.regen (1'b0), // input
.regen (1'b1), // input
.data_out (tdco[3:0]), // output[3:0]
.wclk (sclk), // input
.waddr ({ta[8:0],twce_d}), // input[9:0]
......@@ -294,7 +294,7 @@ module quantizer393(
.rclk (clk), // input
.raddr ({3'b0,rpage,zra[5:0]}), // input[8:0]
.ren (next_dv), // input
.regen (1'b0), // input
.regen (1'b1), // input
.data_out (zigzag_q[15:0]), // output[31:0]
.wclk (clk), // input
.waddr ({3'b0,wpage,zwa[5:0]}), // input[8:0]
......
/*
** -----------------------------------------------------------------------------**
** stuffer393.v
**etrax_dma
** Bit stuffer for JPEG encoder
**
** Copyright (C) 2002-2015 Elphel, Inc
**
** -----------------------------------------------------------------------------**
** stuffer393.v is free software - hardware description language (HDL) code.
**
** This program is free software: you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation, either version 3 of the License, or
** (at your option) any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program. If not, see <http://www.gnu.org/licenses/>.
** -----------------------------------------------------------------------------**
**
*/
`define debug_compressor
// 08.27.2005 - modified "rdy" - moved register to make it faster.
// 01.22.2004 - fixed bug if flush comes with !rdy (required mod of huffman.v to extend "flush" until ready)
// 02.05.2004 - modified data length output. It is 24 it ow, in bytes and is output as last 4 bytes in the
// data block that is 32-byte DMA page aligned
// running on v8.2i - does not meet constraints with enabled global USE_SYNC_SET yes/auto because set input is slower. Trying to selectively disable it
// s ynthesis attribute use_sync_set of stuffer is no;
// s ynthesis attribute use_sync_reset of stuffer is no;
// s ynthesis attribute use_clock_enable of stuffer is no;
module stuffer393 (
input clk, // 2x pixel clock
input en, // enable, 0- reset
input reset_data_counters, // reset data transfer counters (only when DMA and compressor are disabled)
input flush, // flush output data (fill byte with 0, long word with 0
input stb, // input data strobe
input [3:0] dl, // [3:0] number of bits to send (0 - 16) ??
input [15:0] d, // [15:0] input data to shift (only lower bits are valid)
// time stamping - will copy time at the end of color_first (later than the first hact after vact in the current froma, but before the next one
// and before the data is needed for output
input color_first, //
input [31:0] sec, // [31:0] number of seconds
input [19:0] usec, // [19:0] number of microseconds
output rdy, // enable huffman encoder to proceed. Used as CE for many huffman encoder registers
output reg [15:0] q, // [15:0] output data
output reg qv, // output data valid
output done,// reset by !en, goes high after some delay after flushing
output reg [23:0] imgptr, // [23:0]image pointer in 32-byte chunks
output reg flushing
`ifdef debug_stuffer
, output reg [3:0] etrax_dma_r, // [3:0] just for testing
output reg [3:0] test_cntr,
output reg [7:0] test_cntr1
`endif
);
`ifdef debug_stuffer
reg en_d;
`endif
reg [23:1] stage1; // stage 1 register (after right-shifting input data by 0..7 - actually left by 7..0)
wire [2:0] shift1; // shift amount for stage 1
reg [4:0] stage1_bits; // number of topmost invalid bits in stage1 register - 2 MSBs, use lower 3 stage2_bits
reg [4:0] stage1_length; // number of bits (1..16) in stage 1 register
wire flush_end;
reg stage1_full;
wire [7:0] byteMask;
wire [31:1] longMask;
wire [31:1] dflt_stage2;
wire [ 2:0] sel;
wire [ 1:0] st2m;
wire [31:1] st2_d;
reg [31:1] stage2;
reg [ 4:0] stage2_bits;
wire send8h;
wire send8l;
wire send8;
reg flush_end_delayed; // update: fixed delay some delay after flush_end to ensure combining with output FIFO empty
wire pre_flush_end_delayed; // some delay after flush_end to ensure combining with output FIFO empty
reg [23:0] size_count; //(now will be byte count)
// to make it faster - split in parts
reg inc_size_count2316;
reg [ 2:0] size_out;
reg size_out_over;// only needed with extra 32 bytes of zeroes added.
reg busy_eob; // flushing and sending length
reg trailer; // sending out data length and 32 bytes for ETRAX
reg was_trailer; // sending out data length and 32 bytes for ETRAX
reg [ 3:0] etrax_dma; // count words to make total size multiple of 32 bytes.
// Last 4 bytes of data will have actual length in bytes
// There will always be at least 4 more bytes (0-es) before length - needed for software
reg will_flush; // next dv will be flushing byte/word
wire flush_now;
wire start_sizeout; //delay by 2 cycles
reg send8h_r;
reg send8l_r;
wire pre_stage2_bits_3; // what will be registered to stage2_bits[3];
wire [4:3] willbe_stage1_bits;
wire [3:0] sum_lengths;
reg [1:0] st2m_r;
reg [2:0] stb_time;
reg [31:0] sec_r;
reg [19:0] usec_r;
reg time_out;
reg time_size_out;
wire start_time_out;
// stb_time[2] - single-cycle pulse after color_first goes low
reg [19:0] imgsz32; // current image size in multiples of 32-bytes
reg inc_imgsz32;
always @ (negedge clk) flushing <= en && !flush_end && ((flush && rdy) || flushing);
wire [4:0] pre_stage1_bits;
assign pre_stage1_bits[4:0]={2'b00,stage1_bits[2:0]} + {(dl[3:0]==4'b0),dl[3:0]};
always @ (negedge clk) begin
if (!en || flush_end) stage1_bits[4:0] <= 5'b0;
else if (stb && rdy) stage1_bits <= {(2'b10-pre_stage1_bits[4:3]),pre_stage1_bits[2:0]};
end
assign shift1[2:0]= stage1_bits[2:0] + dl[2:0];
always @ (negedge clk) if (stb && rdy) begin
case (shift1[2:0])
0: stage1[23:1] <= { d[15:0],7'b0};
1: stage1[23:1] <= {1'b0,d[15:0],6'b0};
2: stage1[23:1] <= {2'b0,d[15:0],5'b0};
3: stage1[23:1] <= {3'b0,d[15:0],4'b0};
4: stage1[23:1] <= {4'b0,d[15:0],3'b0};
5: stage1[23:1] <= {5'b0,d[15:0],2'b0};
6: stage1[23:1] <= {6'b0,d[15:0],1'b0};
7: stage1[23:1] <= {7'b0,d[15:0] };
endcase
stage1_length[4:0] <= {(dl[3:0]==4'b0),dl[3:0]};
end
//*****************************
always @ (negedge clk) begin
if (!en) stage2_bits <= 5'b0;
else if (send8) stage2_bits[4:0] <= stage2_bits[4:0] - 8;
else if (flushing && !stage1_full && !stage2_bits[4] && (stage2_bits[3:0]!=4'b0)) stage2_bits[4:0]<=5'h10; // actual flushing to word size
else stage2_bits[4:0] <= (rdy && stage1_full)? {1'b0,stage2_bits[3:0]}+stage1_length[4:0]:{1'b0,stage2_bits[3:0]};
end
assign sum_lengths=stage2_bits[3:0]+stage1_length[3:0];
assign pre_stage2_bits_3= en &&
(send8? (~stage2_bits[3]): (
!(flushing && !stage1_full && !stage2_bits[4] && (stage2_bits[3:0]!=4'b0)) && // not flushing
((rdy && stage1_full)?sum_lengths[3]: stage2_bits[3] )
));
assign willbe_stage1_bits[4:3]={2{en && !flush_end}} & ((stb && rdy)?(2'b10-pre_stage1_bits[4:3]):stage1_bits[4:3]);
// accelerating rdy calculation - making it a register
wire pre_busy_eob=en && !flush_end_delayed && (busy_eob || (flush && rdy));
wire [4:3] pre_stage2_bits_4_interm1=stage2_bits[4:3]-2'h1;
wire [4:0] pre_stage2_bits_4_interm2={1'b0,stage2_bits[3:0]}+stage1_length[4:0];
wire pre_stage2_bits_4=en && (send8?
(pre_stage2_bits_4_interm1[4]):
((flushing && !stage1_full && !stage2_bits[4] && (stage2_bits[3:0]!=4'b0))?
(1'b1):
(((rdy && stage1_full))?
(pre_stage2_bits_4_interm2[4]):
(1'b0)
)
)
);
wire pre_send8h_r= (( send8h_r && stage2_bits[4])?
(&stage2[23:16]):
((!send8l_r || !stage2_bits[4])?
(&((longMask[31:24] & st2_d[31:24]) | (~longMask[31:24] & dflt_stage2[31:24]))):
(send8h_r)
)
);
wire pre_send8l_r= ((( send8h_r || send8l_r) && stage2_bits[4] )?
(&stage2[15:8]):
(&((longMask[23:16] & st2_d[23:16]) | (~longMask[23:16] & dflt_stage2[23:16])))
);
//Trying to delay rdy to make more room before it
reg rdy_rega;
reg rdy_regb;
reg rdy_regc;
reg rdy_regd;
// s ynthesis attribute use_sync_set of {module_name|signal_name|instance_name} [is] no;
always @ (negedge clk) begin
rdy_rega <= !pre_stage2_bits_4;
rdy_regb <= !pre_send8h_r;
rdy_regc <= !pre_send8l_r;
rdy_regd <= !pre_busy_eob;
busy_eob <= pre_busy_eob;
//**********************************
send8h_r<=pre_send8h_r;
send8l_r<=pre_send8l_r;
end
assign rdy = (rdy_rega || (rdy_regb && rdy_regc)) && rdy_regd;
assign send8h= send8h_r && stage2_bits[4];
assign send8l= send8l_r && stage2_bits[4];
assign send8=stage2_bits[4] && (send8h_r || send8l_r);
always @ (negedge clk) begin
if (!en) stage1_full <= 1'b0;
/* TODO: MAke sure it is OK !! 05/12/2010 */
else if (flushing) stage1_full <= 1'b0; //force flush does not turn off stb, in normal operation flushing is after last stb
else if (rdy) stage1_full <=stb; //force flush does not turn off stb, in normal operation flushing is after last stb
end
assign sel[2:0]=stage2_bits[2:0];
assign byteMask[7:0]= {!sel[2] && !sel[1] && !sel[0],
!sel[2] && !sel[1],
!sel[2] && (!sel[1] || !sel[0]),
!sel[2],
!sel[2] || (!sel[1] && !sel[0]),
!sel[2] || !sel[1],
!sel[2] || !sel[1] || !sel[0],
1'b1
};
//TODO: Try to move stage1_full up here, this is the time-limiting path 05.26.2010
assign longMask[31:1]={{8{(flushing || stage1_full) && !stage2_bits[3]}} & byteMask[7:0],
{8{flushing || stage1_full}} & ({8{!stage2_bits[3]}} | byteMask[7:0]),
{8{stage1_full}},
{7{stage1_full}}};
always @ (negedge clk) st2m_r[1:0]<=willbe_stage1_bits[4:3]-{1'b0,pre_stage2_bits_3};
assign st2m[1:0]=st2m_r[1:0];
assign st2_d[31:1]= {{8{!flushing || stage1_full}} & (st2m[1]?{stage1[7:1],1'b0}:(st2m[0]? stage1[15:8]: stage1[23:16])),
{8{!flushing || stage1_full}} & (st2m[1]? stage1[23:16]: (st2m[0]?{stage1[7:1],1'b0}:stage1[15: 8])),
st2m[1]? stage1[15: 8]: {stage1[7:1],1'b0},
{stage1[7:1]}};
assign dflt_stage2=stage2_bits[4]?{stage2[15:1],16'b0}:{stage2[31:1]};
always @ (negedge clk) begin
if (send8h) stage2[31:24] <= stage2[23:16];
else if (send8l) stage2[31:24] <= 8'h00;
else stage2[31:24] <= (longMask[31:24] & st2_d[31:24]) | (~longMask[31:24] & dflt_stage2[31:24]);
if (send8) stage2[23:16] <= stage2[15:8];
else stage2[23:16] <= (longMask[23:16] & st2_d[23:16]) | (~longMask[23:16] & dflt_stage2[23:16]);
if (send8) stage2[15: 8] <= {stage2[7:1],1'b0};
else stage2[15: 8] <= (longMask[15: 8] & st2_d[15: 8]) | (~longMask[15: 8] & dflt_stage2[15: 8]);
if (send8) stage2[7: 1] <= 7'b0;
else stage2[7: 1] <= (longMask[7: 1] & st2_d[7: 1]) | (~longMask[7: 1] & dflt_stage2[7: 1]);
end
// output stage
assign flush_end= !stage2_bits[4] && flushing && !stage1_full && (stage2_bits[3:0]==4'b0);
assign flush_now= en && (!send8) && (flushing && !stage1_full && !stage2_bits[4]) && !will_flush;
`ifdef debug_stuffer
reg [3:0] tst_done_dly;
`endif
always @ (negedge clk) begin
stb_time[2:0] <= {stb_time[1] & ~stb_time[0], stb_time[0],color_first};
if (stb_time[2]) sec_r[31:0] <= sec[31:0];
else if (start_sizeout) sec_r[31:0] <= {8'hff, size_count[23:0]};
else if (time_size_out) sec_r[31:0] <= {usec_r[15:0],sec_r[31:16]};
if (stb_time[2]) usec_r[19:0] <= usec[19:0];
else if (time_out) usec_r[19:0] <= {16'h0,usec_r[19:16]};
//reset_data_counters; // reset data transfer counters (only when DMA and compressor are disabled)
if (reset_data_counters ) etrax_dma[3:0] <= 0; // not needed to be reset after frame, and that was wrong (to early)
else if (qv) etrax_dma[3:0] <= etrax_dma[3:0] + 1;
// just for testing
`ifdef debug_stuffer
en_d<= en;
if (en) etrax_dma_r[3:0] <= etrax_dma[3:0];
if (done) test_cntr1[7:0] <= 0;
else if (qv) test_cntr1[7:0] <= test_cntr1[7:0] +1 ; // normally should be one (done 1 ahead of end of qv)
tst_done_dly[3:0] <= {tst_done_dly[2:0],done};
if (tst_done_dly[1]) test_cntr[3:0] <= 0;
else if (qv) test_cntr[3:0] <= test_cntr[3:0] +1 ;
`endif
size_out_over <= en && (size_out_over?(!done):size_out[0]);
size_out[2:0]<={size_out[1:0],start_sizeout};
time_out <= en && (start_time_out || (time_out && !(etrax_dma[3:2]== 2'h3)));
time_size_out <= en && (start_time_out || (time_size_out && !(etrax_dma[3:1]== 3'h7)));
trailer <= en && (trailer?(!flush_end_delayed):(flush_end));
was_trailer<=trailer;
will_flush <= en && (will_flush?(!qv):(flush_now && (stage2_bits[3:0]!=4'b0)));
if (flush_now) size_count[0] <= stage2_bits[3] ^ (|stage2_bits[2:0]); // odd number of bytes
if (!en || size_out[2]) size_count[15:1] <= 0;
else if (!trailer && !was_trailer && qv && (!will_flush || !size_count[0])) size_count[15:1] <= size_count[15:1]+1;
inc_size_count2316 <= (!trailer && !was_trailer && qv && (!will_flush || !size_count[0])) && (&size_count[15:1]);
//reset_data_counters instead of !en here?
if (!en || size_out[2]) size_count[23:16] <= 0;
else if (inc_size_count2316) size_count[23:16] <= size_count[23:16]+1;
qv <= en && (stage2_bits[4] || trailer);
// to make it faster (if needed) use a single register as a source for q[15:0] in two following lines
if (time_size_out) q[15:0] <= {sec_r[7:0],sec_r[15:8]};
else q[15:0] <= {(stage2_bits[4]?stage2[31:24]:8'b0),
((stage2_bits[4] && !send8h)? stage2[23:16]:8'b0)};
inc_imgsz32 <= (etrax_dma[3:0]== 4'h0) && qv;
//reset_data_counters instead of !en here?
if (reset_data_counters || done) imgsz32[19:0] <= 0;
else if (inc_imgsz32) imgsz32[19:0]<=imgsz32[19:0]+1;
if (reset_data_counters) imgptr[23:0] <= 0;
else if (done) imgptr[23:0] <= imgptr[23:0]+ imgsz32[19:0];
flush_end_delayed <= en & pre_flush_end_delayed; // en just to prevent optimizing pre_flush_end_delayed+flush_end_delayed into a single SRL16
end
//start_sizeout
assign start_time_out= qv && trailer && (etrax_dma[3:0]== 4'h8) && !size_out_over;
assign start_sizeout= time_out && (etrax_dma[3:0]== 4'hc);
// SRL16_1 i_pre_flush_end_delayed (.D(size_out[1]),.Q(pre_flush_end_delayed), .A0(1'b0), .A1(1'b1), .A2(1'b1), .A3(1'b1), .CLK(clk)); // dly=3+1 // rather arbitrary?
dly_16 #(.WIDTH(1)) i_pre_flush_end_delayed(.clk(~clk),.rst(1'b0), .dly(14), .din(size_out[1]), .dout(pre_flush_end_delayed)); // dly=14+1 // rather arbitrary?
assign done = flush_end_delayed;
endmodule
/*
** -----------------------------------------------------------------------------**
** varlen_encode393.v
**
** Part of the Huffman encoder for JPEG compressor - variable length encoder
**
** Copyright (C) 2002-2015 Elphel, Inc
**
** -----------------------------------------------------------------------------**
** varlen_encode393.v is free software - hardware description language (HDL) code.
**
** This program is free software: you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation, either version 3 of the License, or
** (at your option) any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program. If not, see <http://www.gnu.org/licenses/>.
** -----------------------------------------------------------------------------**
**
*/
//used the other edge of the clk2x
// Encoder will work 2 cycles/"normal" word, 1 cycle for codes "00" and "f0",
// only magnitude output is needed ASAP (2 cycles, the value out should be
// valid on the 5-th cycle - it will latency 4 cycles run each other cycle
// I'll make a shortcut - all codes processed in 2 cycles.
module varlen_encode393 (
input clk, // twice frequency - uses negedge inside
input en, // will enable registers. 0 - freeze at once
input start, // (not faster than each other cycle)
input [11:0] d, // 12-bit signed
output reg [3:0] l, // [3:0] code length
output reg [3:0] l_late,// delayed l (sync to q)
output reg [10:0] q); // [10:0]code
/*
varlen_encode393 i_varlen_encode(.clk(clk),
.en(stuffer_was_rdy), //will enable registers. 0 - freeze
.start(steps[0]),
.d(sval[11:0]), // 12-bit signed
.l(var_dl[ 3:0]), // [3:0] code length
.l_late(var_dl_late[3:0]),
.q(var_do[10:0])); // [10:0]code
*/
reg [11:0] d1;
reg [10:0] q0;
reg [2:0] cycles;
wire this0 = |d1[ 3:0];
wire this1 = |d1[ 7:4];
wire this2 = |d1[10:8];
wire [1:0] codel0 = {|d1[ 3: 2],d1[ 3] || (d1[ 1] & ~d1[ 2])};
wire [1:0] codel1 = {|d1[ 7: 6],d1[ 7] || (d1[ 5] & ~d1[ 6])};
wire [1:0] codel2 = {|d1[ 10], (d1[ 9] & ~d1[10])};
wire [3:0] codel = this2? {2'b10,codel2[1:0]} :
(this1? {2'b01, codel1[1:0]} :
(this0 ? {2'b00,codel0[1:0]} : 4'b1111)); // after +1 will be 0;
always @ (negedge clk) if (en) begin
cycles[2:0] <= {cycles[1:0],start};
end
always @ (negedge clk) if (en && start) begin
d1[ 11] <= d[11];
d1[10:0] <= d[11]?-d[10:0]:d[10:0];
end
always @ (negedge clk) if (en & cycles[0]) begin
q0[10:0] <= d1[11]?~d1[10:0]:d1[10:0];
l <= codel[3:0]+1; // needed only ASAP, valid only 2 cycles after start
end
always @ (negedge clk) if (en & cycles[2]) begin
q[10:0] <= q0[10:0];
l_late[3:0] <= l[3:0];
end
endmodule
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment