1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
/*
** -----------------------------------------------------------------------------**
** focus_sharp393.v
**
** Module to determine focus sharpness on by integrating
** DCT coefficient, multiplied my 8x8 array and squared
**
** Copyright (C) 2008-2015 Elphel, Inc
**
** -----------------------------------------------------------------------------**
** ocus_sharp393.v is free software - hardware description language (HDL) code.
**
** This program is free software: you can redistribute it and/or modify
** it under the terms of the GNU General Public License as published by
** the Free Software Foundation, either version 3 of the License, or
** (at your option) any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU General Public License for more details.
**
** You should have received a copy of the GNU General Public License
** along with this program. If not, see <http://www.gnu.org/licenses/>.
** -----------------------------------------------------------------------------**
**
*/
`include "system_defines.vh"
`timescale 1ns/1ps
//TODO: Modify to work with other modes (now only on color)
module focus_sharp393(
input clk, // pixel clock, posedge
input clk2x, // 2x pixel clock
input en, // enable (0 resets)
input mclk, // system clock to write tables
input tser_we, // enable write to a table
input tser_a_not_d, // address/not data distributed to submodules
input [ 7:0] tser_d, // byte-wide serialized tables address/data to submodules
input [ 1:0] mode, // focus mode (combine image with focus info) - 0 - none, 1 - replace, 2 - combine all, 3 - combine woi
input firsti, // first macroblock
input lasti, // last macroblock
input [ 2:0] tni, // block number in a macronblock - 0..3 - Y, >=4 - color (sync to stb)
input stb, // strobe that writes ctypei, dci
input start, // marks first input pixel (needs 1 cycle delay from previous DCT stage)
input [12:0] di, // [11:0] pixel data in (signed)
input quant_ds, // quantizator ds
input [12:0] quant_d, // [11:0]quantizator data output
input [15:0] quant_dc_tdo, // [15:0], MSB aligned coefficient for the DC component (used in focus module)
output reg [12:0] do, // [11:0] pixel data out, make timing ignore (valid 1.5 clk earlier that Quantizer output)
output reg ds, // data out strobe (one ahead of the start of dv)
output reg [31:0] hifreq); //[31:0]) // accumulated high frequency components in a frame sub-window
wire [15:0] tdo;
reg [ 5:0] tba;
reg [11:0] wnd_reg; // intermediate register
reg wnd_wr; // writing window
reg [ 2:0] wnd_a; // window register address
// next measured in 8x8 blocks, totalwidth - write one less than needed (i.e. 511 fro the 512-wide window)
// blocks on the border are included
reg [ 8:0] wnd_left;
reg [ 8:0] wnd_right;
reg [ 8:0] wnd_top;
reg [ 8:0] wnd_bottom;
reg [ 8:1] wnd_totalwidth;
reg [ 3:0] filt_sel0; // select filter number, 0..14 (15 used for window parameters)
reg [ 3:0] filt_sel; // select filter number, 0..14 (15 used for window parameters)
reg stren; // strength (visualization)
reg [ 2:0] ic;
reg [ 2:0] oc;
wire first,last; //valid at start (with first di word), switches immediately after
wire [ 2:0] tn;
reg [39:0] acc_frame;
reg [12:0] pre_do;
reg pre_ds;
reg need_corr_max; // limit output by quant_dc_tdo
reg [11:0] fdo; // focus data output
reg start_d; //start delayed by 1
reg [ 2:0] tn_d; //tn delayed by 1
wire out_mono;
wire out_window;
wire [12:0] combined_qf;
wire [12:0] next_do;
wire [12:0] fdo_minus_max;
reg [11:0] di_d;
reg [11:0] d1;
reg [8:0] start2;
// reg [7:0] finish2;
reg [6:0] finish2; // bit[7] never used
reg [5:0] use_k_dly;
reg [23:0] acc_blk; // accumulator for the sum ((a[i]*d[i])^2)
reg [22:0] sum_blk; // accumulator for the sum ((a[i]*d[i])^2), copied at block end
reg acc_ldval; // value to load to acc_blk: 0 - 24'h0, 1 - 24'h7fffff
wire acc_clear=start2[8];
wire acc_add=use_k_dly[4];
wire acc_corr=use_k_dly[5];
wire acc_to_out=finish2[6];
wire [17:0] mult_a;
wire [17:0] mult_b;
wire [35:0] mult_p;
reg [17:0] mult_s; //truncated and saturated (always positive) multiplier result (before calculating squared)
reg next_ac; // next will be AC component
reg use_coef; // use multiplier for the first operation - DCT coeff. by table elements
reg started_luma;// started Luma block
reg luma_dc_out; // 1 cycle ahead of the luma DC component out (optionally combined with the WOI (mode=3))
reg luma_dc_acc; // 1 cycle ahead of the luma DC component out (always combined with the WOI)
reg was_last_luma;
reg copy_acc_frame;
wire twe;
wire [15:0] tdi;
wire [22:0] ta;
assign fdo_minus_max[12:0]= {1'b0,fdo[11:0]}-{1'b0,quant_dc_tdo[15:5]};
assign combined_qf[12:0]=stren?({quant_d[12:0]}+{1'b0,fdo[11:0]}): //original image plus positive
({quant_d[12],quant_d[12:1]}+ // half original
{fdo_minus_max[12],fdo_minus_max[12:1]}); // plus half signed
assign next_do[12:0] = (mode[1:0]==2'h1)?(luma_dc_out?fdo_minus_max[12:0]:13'h0):
((mode[1] && luma_dc_out )? combined_qf[12:0]: {quant_d[12:0]} );
always @ (posedge clk) begin
if (!en) ic[2:0] <= 3'b0;
else if (stb) ic[2:0] <= ic[2:0]+1;
if (!en) oc[2:0] <= 3'b0;
else if (start) oc[2:0] <= oc[2:0]+1;
end
// writing window parameters in the last bank of a table
always @ (posedge mclk) begin
if (twe) begin
wnd_reg[11:0] <= tdi[11:0] ;
wnd_a <= ta[2:0];
end
wnd_wr <= twe && (ta[9:3]==7'h78) ; // first 8 location in the last 64-word bank
if (wnd_wr) begin
case (wnd_a[2:0])
3'h0: wnd_left[8:0] <= wnd_reg[11:3] ;
3'h1: wnd_right[8:0] <= wnd_reg[11:3] ;
3'h2: wnd_top[8:0] <= wnd_reg[11:3] ;
3'h3: wnd_bottom[8:0] <= wnd_reg[11:3] ;
3'h4: wnd_totalwidth[8:1] <= wnd_reg[11:4] ;
3'h5: filt_sel0[3:0] <= wnd_reg[3:0] ;
3'h6: stren <= wnd_reg[0] ;
default: begin end
endcase
end
end
// determine if this block needs to be processed (Y, inside WOI)
reg [ 7:0] mblk_hor; //horizontal macroblock (2x2 blocks) counter
reg [ 7:0] mblk_vert; //vertical macroblock (2x2 blocks) counter
wire start_of_line= (first || (mblk_hor[7:0] == wnd_totalwidth[8:1]));
wire first_in_macro= (tn[2:0]==3'h0);
reg in_woi; // maybe specified as slow
always @(posedge clk) begin
if (first_in_macro && start) mblk_hor[7:0] <= start_of_line? 8'h0:(mblk_hor[7:0]+1);
if (first_in_macro && start && start_of_line) mblk_vert[7:0] <= first? 8'h0:(mblk_vert[7:0]+1);
start_d <= start;
tn_d[2:0] <= tn[2:0];
if (start_d) in_woi <= !tn_d[2] &&
({mblk_hor [7:0],tn_d[0]} >= wnd_left[8:0]) &&
({mblk_hor [7:0],tn_d[0]} <= wnd_right[8:0]) &&
({mblk_vert[7:0],tn_d[1]} >= wnd_top[8:0]) &&
({mblk_vert[7:0],tn_d[1]} <= wnd_bottom[8:0]);
end
//Will use posedge sclk to balance huffman and system
// wire clkdiv2;
// FD i_clkdiv2(.C(clk), .D(!clkdiv2), .Q(clkdiv2));
reg clkdiv2=0;
always @ (posedge clk) begin
clkdiv2 <= ~clkdiv2;
end
reg [2:0] clksync;
wire csync=clksync[2];
always @ (posedge clk2x) begin
clksync[2:0] <= {(clksync[1]==clksync[0]),clksync[0],clkdiv2};
end
always @ (posedge clk) begin
if (di[11]==di[12]) di_d[11:0] <=di[11:0];
else di_d[11:0] <= {~di[11],{11{di[11]}}}; //saturate
end
assign mult_a[17:0] = use_coef ? {1'b0,tdo[15:0],1'b0}: mult_s[17:0];
assign mult_b[17:0] = use_coef ? {d1[10:0],{7{d1[0]}}}: mult_s[17:0];
always @ (posedge clk2x) begin
filt_sel[3:0] <= filt_sel0[3:0];
if (clksync[2]) d1[11:0]<=di_d[11:0];
start2[8:0] <= {start2[7:0], start && csync};
// finish2[7:0]<= {finish2[6:0],use_coef && !next_ac};
finish2[6:0]<= {finish2[5:0],use_coef && !next_ac}; // finish2[7] was never used
if (!en || start2[0]) tba[5:0] <= 6'h0;
else if (!csync && (tba[5:0] != 6'h3f)) tba[5:0] <= tba[5:0] + 1;
// mult_s[17:0] <= (&mult_p[35:31] || !(&mult_p[35:31]))?mult_p[31:14]:18'h1ffff;
mult_s[17:0] <= (&mult_p[35:31] || !(|mult_p[35:31]))?mult_p[31:14]:18'h1ffff;
next_ac <= en && (start2[3] || (next_ac && ((tba[5:0] != 6'h3f) || csync )));
use_coef <= next_ac && !csync;
use_k_dly[5:0] <= {use_k_dly[4:0],use_coef};
acc_ldval <= !(|start2[7:6]);
if (acc_clear || (acc_corr && acc_blk[23])) acc_blk[23:0] <= {1'b0,{23{acc_ldval}}};
else if (acc_add) acc_blk[23:0] <= acc_blk[23:0] + mult_p[31:8]; // mult_p[35:8];
if (acc_to_out) fdo[11:0] <= (|acc_blk[23:20])?12'hfff:acc_blk[19:8]; // positive, 0..0xfff
if (acc_to_out) sum_blk[22:0] <= acc_blk[22:0]; // accumulator for the sum ((a[i]*d[i])^2), copied at block end
end
// acc_blk will (after corr) be always with MSB=0 - max 24'h7fffff
// for image output - max 24'h0fffff->12 bit signed, shifted
// combining output
//assign combined_qf[12:0]={quant_d[11],quant_d[11:0]}+{fdo[11],fdo[11:0]};
// SRL16 i_out_mono (.Q(out_mono), .A0(1'b1), .A1(1'b1), .A2(1'b1), .A3(1'b1), .CLK(clk), .D(started_luma)); // timing not critical
// SRL16 i_out_window (.Q(out_window), .A0(1'b1), .A1(1'b1), .A2(1'b1), .A3(1'b1), .CLK(clk), .D(in_woi)); // timing not critical
dly_16 #(.WIDTH(1)) i_out_mono(.clk(clk), .rst(1'b0), .dly(4'd15), .din(started_luma), .dout(out_mono)); // timing not critical
dly_16 #(.WIDTH(1)) i_out_window(.clk(clk),.rst(1'b0), .dly(4'd15), .din(in_woi), .dout(out_window)); // timing not critical
always @ (posedge clk) begin
if (start) started_luma <= !tn[2];
luma_dc_out <= quant_ds && out_mono && ((mode[1:0]!=3) || out_window);
luma_dc_acc <= quant_ds && out_mono && out_window;
was_last_luma <= en && last && out_mono;
copy_acc_frame <= was_last_luma && !out_mono;
if (first && first_in_macro) acc_frame[39:0] <= 40'h0;
else if (luma_dc_acc) acc_frame[39:0] <= acc_frame[39:0] + sum_blk[22:0];
if (copy_acc_frame) hifreq[31:0] <= acc_frame[39:8];
pre_ds <= quant_ds;
ds <= pre_ds;
pre_do[12:0] <= next_do[12:0];
need_corr_max <=luma_dc_out && (mode[1:0]!=2'h0);
do[12:0] <= (need_corr_max && !pre_do[12] && (pre_do[11] || (pre_do[10:0]>quant_dc_tdo[15:5])) )?
{2'b0,quant_dc_tdo[15:5]} :
pre_do[12:0];
end
table_ad_receive #(
.MODE_16_BITS (1),
.NUM_CHN (1)
) table_ad_receive_i (
.clk (mclk), // input
.a_not_d (tser_a_not_d), // input
.ser_d (tser_d), // input[7:0]
.dv (tser_we), // input
.ta (ta), // output[22:0]
.td (tdi), // output[15:0]
.twe (twe) // output
);
/*
MULT18X18SIO #(
.AREG(1), // Enable the input registers on the A port (1=on, 0=off)
.BREG(1), // Enable the input registers on the B port (1=on, 0=off)
.B_INPUT("DIRECT"), // B cascade input "DIRECT" or "CASCADE"
.PREG(1) // Enable the input registers on the P port (1=on, 0=off)
) i_focus_mult (
.BCOUT(), // 18-bit cascade output
.P(mult_p), // 36-bit multiplier output
.A(mult_a), // 18-bit multiplier input
.B(mult_b), // 18-bit multiplier input
.BCIN(18'h0), // 18-bit cascade input
.CEA(en), // Clock enable input for the A port
.CEB(en), // Clock enable input for the B port
.CEP(en), // Clock enable input for the P port
.CLK(sclk), // Clock input
.RSTA(1'b0), // Synchronous reset input for the A port
.RSTB(1'b0), // Synchronous reset input for the B port
.RSTP(1'b0) // Synchronous reset input for the P port
);
*/
reg [35:0] mult_p_r;
reg [17:0] mult_a_r;
reg [17:0] mult_b_r;
assign mult_p = mult_p_r;
always @(posedge clk2x) begin
mult_a_r <= mult_a;
mult_b_r <= mult_b;
mult_p_r <= mult_a_r * mult_b_r;
end
/*
RAM16X1D i_tn0 (.D(tni[0]),.DPO(tn[0]),.A0(ic[0]),.A1(ic[1]),.A2(1'b0),.A3(1'b0),.DPRA0(oc[0]),.DPRA1(oc[1]),.DPRA2(1'b0),.DPRA3(1'b0),.WCLK(clk),.WE(stb));
RAM16X1D i_tn1 (.D(tni[1]),.DPO(tn[1]),.A0(ic[0]),.A1(ic[1]),.A2(1'b0),.A3(1'b0),.DPRA0(oc[0]),.DPRA1(oc[1]),.DPRA2(1'b0),.DPRA3(1'b0),.WCLK(clk),.WE(stb));
RAM16X1D i_tn2 (.D(tni[2]),.DPO(tn[2]),.A0(ic[0]),.A1(ic[1]),.A2(1'b0),.A3(1'b0),.DPRA0(oc[0]),.DPRA1(oc[1]),.DPRA2(1'b0),.DPRA3(1'b0),.WCLK(clk),.WE(stb));
RAM16X1D i_first (.D(firsti),.DPO(first),.A0(ic[0]),.A1(ic[1]),.A2(1'b0),.A3(1'b0),.DPRA0(oc[0]),.DPRA1(oc[1]),.DPRA2(1'b0),.DPRA3(1'b0),.WCLK(clk),.WE(stb));
RAM16X1D i_last (.D(lasti), .DPO(last), .A0(ic[0]),.A1(ic[1]),.A2(1'b0),.A3(1'b0),.DPRA0(oc[0]),.DPRA1(oc[1]),.DPRA2(1'b0),.DPRA3(1'b0),.WCLK(clk),.WE(stb));
*/
reg [ 4:0] ram4[0:3];
always @ (posedge clk) begin
ram4[ic[1:0]] <= {lasti,firsti,tni[2:0]};
end
assign {last,first,tn[2:0]} = ram4[oc[1:0]];
// is it correct posedge sclk on rd, negedge on wr and no xclk?
/*
RAMB16_S18_S18 i_focus_dct_tab (
.DOA(tdo[15:0]), // Port A 16-bit Data Output
.DOPA(), // Port A 2-bit Parity Output
.ADDRA({filt_sel[3:0],tba[2:0],tba[5:3]}), // Port A 10-bit Address Input
.CLKA(sclk), // Port A Clock
.DIA(16'b0), // Port A 16-bit Data Input
.DIPA(2'b0), // Port A 2-bit parity Input
.ENA(1'b1), // Port A RAM Enable Input
.SSRA(1'b0), // Port A Synchronous Set/Reset Input
.WEA(1'b0), // Port A Write Enable Input
.DOB(), // Port B 16-bit Data Output
.DOPB(), // Port B 4-bit Parity Output
.ADDRB({ta[9:0]}), // Port B 2-bit Address Input
.CLKB(!sclk), // Port B Clock
.DIB(tdi[15:0]), // Port B 16-bit Data Input
.DIPB(2'b0), // Port-B 2-bit parity Input
.ENB(1'b1), // PortB RAM Enable Input
.SSRB(1'b0), // Port B Synchronous Set/Reset Input
.WEB(twe) // Port B Write Enable Input
);
*/
ram18_var_w_var_r #(
.REGISTERS (0),
.LOG2WIDTH_WR (4),
.LOG2WIDTH_RD (4),
.DUMMY (0)
`ifdef PRELOAD_BRAMS
`include "includes/focus_filt.dat.vh"
`endif
) i_focus_dct_tab (
.rclk (clk), // input
.raddr ({filt_sel[3:0],tba[2:0],tba[5:3]}), // input[9:0]
.ren (1'b1), // input
.regen (1'b1), // input
.data_out (tdo[15:0]), // output[31:0]
.wclk (mclk), // input
.waddr (ta[9:0]), // input[8:0]
.we (twe), // input
.web (4'hf), // input[3:0]
.data_in (tdi[15:0]) // input[31:0]
);
endmodule