/******************************************************************************* * Module: lens_flat393 * Date:2015-08-27 * Author: Andrey Filippov * Description: Correction of lens+sensor vignetting. Initially it is just * a quadratic function that can be improved later by a piece-linear table * function T() of the calculated f(x,y)=p*(x-x0)^2 + q(y-yo)^2 + c. * T(f(x,y)) can be used to approximate cos^4). or other vignetting functions * * This function - f(x,y) or T(f(x,y)) here deal with full sensor data before * gamma-tables are applied and the data is compressed to 8 bits * * Copyright (c) 2008-2015 Elphel, Inc. * lens_flat393.v is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * lens_flat393.v is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . *******************************************************************************/ `timescale 1ns / 1ps /* F2(x,y)=p*(x-x0)^2 + q(y-yo)^2 + c= p*x^2 - (2*p*x0) * x + p* (x0*x0) + q*y^2 - (2*q*y0) * y + q* (y0*y0) + c= p* x^2 - (2*p*x0) * x + q* y^2 -(2*q)* y + (p* (x0*x0)+q* (y0*y0) + c) Final: F2(X,Y)=p* x^2 - (2*p*x0) * x + q* y^2 -(2*q)* y + (p* (x0*x0)+q* (y0*y0) + c): Ax(Y)= p Bx(Y)=-(2*p) F(0,Y)= q*y^2 - (2*q*y0) * y + (q* (y0*y0) + c + p* (x0*x0)) C= (q* (y0*y0) + c + p* (x0*x0)); BY= - (2*q*y0) AY= q AX= p BX= -2*p*x0 */ module lens_flat393 #( // Vignetting correction / pixel value scaling - controlled via single data word (same as in 252), some of bits [23:16] // are used to select register, bits 25:24 - select sub-frame parameter SENS_LENS_ADDR = 'h43c, parameter SENS_LENS_ADDR_MASK = 'h7fc, // parameter SENS_LENS_HEIGHTS = 'h0, // .. 'h2 set frame heights (all that is not SENS_LENS_COEFF) parameter SENS_LENS_COEFF = 'h3, // set vignetting/scale coefficients ( parameter SENS_LENS_AX = 'h00, // 00000... parameter SENS_LENS_AX_MASK = 'hf8, parameter SENS_LENS_AY = 'h08, // 00001... parameter SENS_LENS_AY_MASK = 'hf8, parameter SENS_LENS_C = 'h10, // 00010... parameter SENS_LENS_C_MASK = 'hf8, parameter SENS_LENS_BX = 'h20, // 001..... parameter SENS_LENS_BX_MASK = 'he0, parameter SENS_LENS_BY = 'h40, // 010..... parameter SENS_LENS_BY_MASK = 'he0, parameter SENS_LENS_SCALES = 'h60, // 01100... parameter SENS_LENS_SCALES_MASK = 'hf8, parameter SENS_LENS_FAT0_IN = 'h68, // 01101000 parameter SENS_LENS_FAT0_IN_MASK = 'hff, parameter SENS_LENS_FAT0_OUT = 'h69, // 01101001 parameter SENS_LENS_FAT0_OUT_MASK = 'hff, parameter SENS_LENS_POST_SCALE = 'h6a, // 01101010 parameter SENS_LENS_POST_SCALE_MASK = 'hff, parameter SENS_NUM_SUBCHN = 3, // number of subchannels on the same sensor port (<=4) parameter SENS_LENS_F_WIDTH = 19, // AF2015 18, // number of bits in the output result parameter SENS_LENS_F_SHIFT = 22, // shift ~2*log2(width/2), for 4K width parameter SENS_LENS_B_SHIFT = 12, //(<=F_SHIFT) shift b- coeff (12 is 2^12 - good for lines <4096, 1 output count per width) parameter SENS_LENS_A_WIDTH = 19, // AF2015 18, // number of bits in a-coefficient (unsigned). Just to match the caller - MSBs will be anyway discarded parameter SENS_LENS_B_WIDTH = 21 // number of bits in b-coefficient (signed). ) ( input prst, // @pclk sync reset input pclk, // global clock input, pixel rate (96MHz for MT9P006) // programming interface input mrst, // @mclk sync reset input mclk, // global clock, half DDR3 clock, synchronizes all I/O through the command port input [7:0] cmd_ad, // byte-serial command address/data (up to 6 bytes: AL-AH-D0-D1-D2-D3 input cmd_stb, // strobe (with first byte) for the command a/d input [15:0] pxd_in, // @(posedge pclk) input hact_in, input sof_in, // start of frame, single pclk, input input eof_in, // end of frame, single pclk, input output reg [15:0] pxd_out, // pixel data out, 16 bit unsigned output hact_out, // output sof_out, // latency 8 from pxd_in; output eof_out, // input [1:0] bayer, output [1:0] subchannel, // for gamma correction (valid before/at start of line, may be invalid at the end) output last_in_sub // last line in subchannel (valid before/at start of line, may be invalid at the end) ); // AF2015 new signals wire [ 1:0] cmd_a; wire [31:0] cmd_data; reg [31:0] cmd_data_r; // holds data to cross clock boundary wire cmd_we; reg [15:0] heights_m1_ram[0:3]; // set @ posedge mclk, used at pclk, but should be OK (change before first hact) reg [15:0] line_cntr; // count image lines to switch to next subchannels reg [1:0] sub_frame_early; // valid before/at newline to provide coefficients to lens_flat393_line reg [1:0] sub_frame; reg [1:0] sub_frame_late; // valid @ hact_d[2] reg [3:0] sub_frame_late_d; // add extra stages if needed reg pre_first_line; reg inc_sub_frame; reg [13:0] hact_d; // lens_corr_out; /// lens correction out valid (first clock from column0 ) wire [15:0] pxd_d; // pxd_in delayed buy 4 clocks reg [ 2:0] newline; reg sosf; // start of subframe reg we_AX,we_BX,we_AY,we_BY,we_C; reg we_scales;/// write additional individual per-color scales (17 bits each) reg we_fatzero_in,we_fatzero_out; /// reg we_post_scale; //F(x,y)=Ax*x^2+Bx*x+Ay*y^2+By*y+C // small rams to store per-subframe parameters, they will be registered at each subframe start reg [18:0] AX_ram[0:3]; /// Ax reg [18:0] AY_ram[0:3]; /// Ax reg [20:0] BX_ram[0:3]; /// Bx reg [20:0] BY_ram[0:3]; /// By reg [18:0] C_ram[0:3]; /// C reg [16:0] scales_ram[0:15]; // per-color coefficients (parallel-combined fro all colors) // reg [16:0] scales_r; reg [15:0] fatzero_in_ram[0:3]; /// zero level to subtract before multiplication reg [15:0] fatzero_out_ram[0:3]; /// zero level to add after multiplication reg [ 3:0] post_scale_ram[0:3]; /// shift product after first multiplier - maybe needed when using decimation wire [18:0] FY; /// F(0,y) wire [23:0] ERR_Y; /// running error for the first column wire [18:0] FXY; /// F(x,y) // reg [18:0] FXY_sat; // Not used, add extra cycle in calculations? /// copied form sensorpix353.v reg bayer_nset; reg bayer0_latched; reg [1:0] color; wire [35:0] mult_first_res; reg [17:0] mult_first_scaled; /// scaled multiplication result (to use with decimation to make parabola 'sharper') wire [35:0] mult_second_res; // Use sub_frame_late? wire [20:0] pre_pixdo_with_zero= mult_second_res[35:15] + {{5{fatzero_out_ram[sub_frame][15]}},fatzero_out_ram[sub_frame][15:0]}; // wire sync_bayer=linerun && ~lens_corr_out[0]; // wire sync_bayer=hact_d[2] && ~hact_d[3]; wire sync_bayer=hact_d[6] && ~hact_d[7]; // sub_frame_late_d[3:2] sets 1 cycle ahead of needed, OK to ease timing (there is always >=1 hact gap) wire [17:0] pix_zero = {2'b0,pxd_d[15:0]}-{{2{fatzero_in_ram[sub_frame_late_d[3:2]][15]}},fatzero_in_ram[sub_frame_late_d[3:2]][15:0]}; // Writing to register files @mclk (4 per-subframe registers for coefficients, 4x4 - for per-subframe per-color scales) // these registers will be read out at other clock (pclk) wire set_lens_w = cmd_we && (cmd_a == SENS_LENS_COEFF ); wire set_heights_w = cmd_we && (cmd_a != SENS_LENS_COEFF ); assign subchannel = sub_frame ; assign last_in_sub = inc_sub_frame; assign hact_out = hact_d[13]; always @(posedge mclk) begin cmd_data_r <= cmd_data; if (set_heights_w) heights_m1_ram[cmd_a] <= cmd_data[15:0]; we_AX <= set_lens_w && func_cmd_we (cmd_data, SENS_LENS_AX, SENS_LENS_AX_MASK); we_AY <= set_lens_w && func_cmd_we (cmd_data, SENS_LENS_AY, SENS_LENS_AY_MASK); we_C <= set_lens_w && func_cmd_we (cmd_data, SENS_LENS_C, SENS_LENS_C_MASK); we_BX <= set_lens_w && func_cmd_we (cmd_data, SENS_LENS_BX, SENS_LENS_BX_MASK); we_BY <= set_lens_w && func_cmd_we (cmd_data, SENS_LENS_BY, SENS_LENS_BY_MASK); we_scales <= set_lens_w && func_cmd_we (cmd_data, SENS_LENS_SCALES, SENS_LENS_SCALES_MASK); we_fatzero_in <= set_lens_w && func_cmd_we (cmd_data, SENS_LENS_FAT0_IN, SENS_LENS_FAT0_IN_MASK); we_fatzero_out <= set_lens_w && func_cmd_we (cmd_data, SENS_LENS_FAT0_OUT, SENS_LENS_FAT0_OUT_MASK); we_post_scale <= set_lens_w && func_cmd_we (cmd_data, SENS_LENS_POST_SCALE, SENS_LENS_POST_SCALE_MASK); // Write to RAM if (we_AX) AX_ram [func_chn(cmd_data_r)] <= cmd_data_r[18:0]; if (we_AY) AY_ram [func_chn(cmd_data_r)] <= cmd_data_r[18:0]; if (we_BX) BX_ram [func_chn(cmd_data_r)] <= cmd_data_r[20:0]; if (we_BY) BY_ram [func_chn(cmd_data_r)] <= cmd_data_r[20:0]; if (we_C) C_ram [func_chn(cmd_data_r)] <= cmd_data_r[18:0]; if (we_scales) scales_ram [{func_chn(cmd_data_r), cmd_data_r[18:17]}] <= cmd_data_r[16:0]; if (we_fatzero_in) fatzero_in_ram [func_chn(cmd_data_r)] <= cmd_data_r[15:0]; if (we_fatzero_out) fatzero_out_ram[func_chn(cmd_data_r)] <= cmd_data_r[15:0]; if (we_post_scale) post_scale_ram [func_chn(cmd_data_r)] <= cmd_data_r[ 3:0]; end always @ (posedge pclk) begin hact_d <= {hact_d[12:0],hact_in}; // newline <= {newline[1:0], hact_in && !hact_d[0]}; newline <= {newline[1:0], hact_d[3] && !hact_d[4]}; // line_start <= newline; // make it SR? if (sof_in) pre_first_line <= 1; else if (newline[0]) pre_first_line <= 0; if (pre_first_line || newline[0]) inc_sub_frame <= (sub_frame != (SENS_NUM_SUBCHN - 1)) && (line_cntr == 0); sub_frame_early <= sub_frame + inc_sub_frame; if (pre_first_line) sub_frame <= 0; else if (newline[0]) sub_frame <= sub_frame_early; // adjust when to switch? if (pre_first_line || (newline[1] && inc_sub_frame)) line_cntr <= heights_m1_ram[sub_frame]; else if (newline[1] ) line_cntr <= line_cntr - 1; // if (newline[2]) sub_frame_late <= sub_frame; if (newline[1]) sub_frame_late <= sub_frame; sub_frame_late_d <= {sub_frame_late_d[1:0],sub_frame_late}; // valid @ hact_d[3], use @hact_d[4] as there is always >= 1 clock HACT gap sosf <= (hact_in && ~hact_d[0]) && (pre_first_line || inc_sub_frame); end //reg color[1:0] always @ (posedge pclk) begin // bayer_nset <= !sof_in && (bayer_nset || hact_d[1]); bayer_nset <= !sof_in && (bayer_nset || hact_d[5]); bayer0_latched<= bayer_nset? bayer0_latched:bayer[0]; color[1:0] <= { bayer_nset? (sync_bayer ^ color[1]):bayer[1] , (bayer_nset &&(~sync_bayer))?~color[0]:bayer0_latched }; /// now scale the result (normally post_scale[2:0] ==1) case (post_scale_ram[sub_frame][2:0]) 3'h0:mult_first_scaled[17:0]<= (~mult_first_res[35] & |mult_first_res[34:33]) ? 18'h1ffff:mult_first_res[33:16]; /// only limit positive overflow 3'h1:mult_first_scaled[17:0]<= (~mult_first_res[35] & |mult_first_res[34:32]) ? 18'h1ffff:mult_first_res[32:15]; 3'h2:mult_first_scaled[17:0]<= (~mult_first_res[35] & |mult_first_res[34:31]) ? 18'h1ffff:mult_first_res[31:14]; 3'h3:mult_first_scaled[17:0]<= (~mult_first_res[35] & |mult_first_res[34:30]) ? 18'h1ffff:mult_first_res[30:13]; 3'h4:mult_first_scaled[17:0]<= (~mult_first_res[35] & |mult_first_res[34:29]) ? 18'h1ffff:mult_first_res[29:12]; 3'h5:mult_first_scaled[17:0]<= (~mult_first_res[35] & |mult_first_res[34:28]) ? 18'h1ffff:mult_first_res[28:11]; 3'h6:mult_first_scaled[17:0]<= (~mult_first_res[35] & |mult_first_res[34:27]) ? 18'h1ffff:mult_first_res[27:10]; 3'h7:mult_first_scaled[17:0]<= (~mult_first_res[35] & |mult_first_res[34:26]) ? 18'h1ffff:mult_first_res[26: 9]; endcase if (hact_d[12]) pxd_out[15:0] <= pre_pixdo_with_zero[20]? 16'h0: /// negative - use 0 ((|pre_pixdo_with_zero[19:16])?16'hffff: ///>0xffff - limit by 0xffff pre_pixdo_with_zero[15:0]); end // Replacing MULT18X18SIO of x353, registers on both inputs, outputs reg signed [17:0] mul1_a; reg signed [17:0] mul1_b; reg signed [35:0] mul1_p; reg signed [17:0] mul2_a; reg signed [17:0] mul2_b; // wire [17:0] mul2_b = mult_first_scaled[17:0]; // TODO - delay to have a register! reg signed [35:0] mul2_p; always @ (posedge pclk) begin if (hact_d[7]) mul1_a <= (FXY[18]==FXY[17])?FXY[17:0]:(FXY[18]?18'h20000:18'h1ffff); if (hact_d[7]) mul1_b <= {1'b0,scales_ram[{sub_frame_late,~color[1:0]}]}; if (hact_d[8]) mul1_p <= mul1_a * mul1_b; if (hact_d[10]) mul2_a <= pix_zero[17:0]; // adjust sub_frame delay if (hact_d[10]) mul2_b <= mult_first_scaled[17:0]; // 18-bit multiplier input - always positive if (hact_d[11]) mul2_p <= mul2_a * mul2_b; end assign mult_first_res = mul1_p; assign mult_second_res = mul2_p; cmd_deser #( .ADDR (SENS_LENS_ADDR), .ADDR_MASK (SENS_LENS_ADDR_MASK), .NUM_CYCLES (6), .ADDR_WIDTH (2), .DATA_WIDTH (32) ) cmd_deser_lens_i ( .rst (mrst), // rst), // input .clk (mclk), // input .srst (mrst), // input .ad (cmd_ad), // input[7:0] .stb (cmd_stb), // input .addr (cmd_a), // output[15:0] .data (cmd_data), // output[31:0] .we (cmd_we) // output ); dly_16 #( .WIDTH(2) ) dly_16_sof_eof_i ( .clk (pclk), // input .rst (prst), // input .dly (4'd12), // input[3:0] .din ({sof_in,eof_in}), // input[0:0] .dout ({sof_out,eof_out}) // output[0:0] ); dly_16 #( .WIDTH(16) ) dly_16_pxd_i ( .clk (pclk), // input .rst (prst), // input .dly (4'd10), // input[3:0] .din (pxd_in), // input[0:0] .dout (pxd_d) // output[0:0] ); /* dly_16 #( .WIDTH(1) ) dly_16_sof_d_i ( .clk (pclk), // input .rst (prst), // input .dly (4'd8), // input[3:0] .din (sof_in), // input[0:0] .dout (sosf) // output[0:0] ); */ lens_flat393_line #( .F_WIDTH (SENS_LENS_F_WIDTH), // number of bits in the output result (signed) .F_SHIFT (SENS_LENS_F_SHIFT), // shift ~2*log2(width/2), for 4K width .B_SHIFT (SENS_LENS_B_SHIFT), //(<=F_SHIFT) shift b- coeff (12 is 2^12 - good for lines <4096, 1 output count per width) .A_WIDTH (SENS_LENS_A_WIDTH), // number of bits in a-coefficient (signed). Just to match the caller - MSBs will be anyway discarded .B_WIDTH (SENS_LENS_B_WIDTH)) // number of bits in b-coefficient (signed). i_fy( .pclk (pclk), // pixel clock // wrong - need to restart for each sub-frame .first (sosf), // initialize running parameters from the inputs (first column). Should be at least 1-cycle gap between "first" and first "next" .next (newline[0]), // calcualte next pixel .F0 (C_ram[sub_frame_early]), // value of the output in the first column (before saturation), 18 bit, unsigned .ERR0 (24'b0), // initial value of the running error (-2.0inc: /// 100 -> 11 /// 101 -> 11 /// 110 -> 11 /// 111 -> 00 /// 000 -> 00 /// 001 -> 01 /// 010 -> 01 /// 011 -> 01 wire [1:0] inc= {preERR[F_SHIFT+1] & (~preERR[F_SHIFT] | ~preERR[F_SHIFT-1]), (preERR[F_SHIFT+1:F_SHIFT-1] != 3'h0) & (preERR[F_SHIFT+1:F_SHIFT-1] != 3'h7)}; always @(posedge pclk) begin first_d <=first; next_d <=next; if (first) begin F1 [F_WIDTH-1:0] <= F0[ F_WIDTH-1:0]; dF[(DF_WIDTH)-1:0] <= B0[B_WIDTH-1: (F_SHIFT-B_SHIFT)]; ERR[F_SHIFT+1:0] <= ERR0[F_SHIFT+1:0]; ApB[F_SHIFT+1:0] <= {{F_SHIFT + 2 - A_WIDTH{A0[A_WIDTH-1]}},A0[A_WIDTH-1:0]} + // width correct //AF2015 {B0[B_WIDTH-1:0],{F_SHIFT-B_SHIFT{1'b0}}}; /// high bits from B will be discarded {B0[B_SHIFT-1:0],{F_SHIFT-B_SHIFT{1'b0}}}; /// high bits from B are discarded A [A_WIDTH-1:0] <= A0[A_WIDTH-1:0]; end else if (next) begin //AF2015 dF[(DF_WIDTH)-1:0] <= dF[(DF_WIDTH)-1:0]+{{((DF_WIDTH)-1){inc[1]}},inc[1:0]}; dF[(DF_WIDTH)-1:0] <= dF[(DF_WIDTH)-1:0] + {{((DF_WIDTH)-2){inc[1]}},inc[1:0]}; ERR[F_SHIFT-1:0]<= preERR[F_SHIFT-1:0]; ERR[F_SHIFT+1:F_SHIFT]<= preERR[F_SHIFT+1:F_SHIFT]-inc[1:0]; end if (first_d) F_r[F_WIDTH-1:0] <= F1[ F_WIDTH-1:0]; else if (next_d) F_r[F_WIDTH-1:0] <= F_r[F_WIDTH-1:0]+{{(F_WIDTH-(DF_WIDTH)){dF[(DF_WIDTH)-1]}},dF[(DF_WIDTH)-1:0]}; //AF2015 if (first_d) A2X[F_SHIFT+1:1] <= {{F_SHIFT+2-A_WIDTH{A[A_WIDTH-1]}},A[A_WIDTH-1:0]}; //AF2015 else if (next) A2X[F_SHIFT+1:1] <= A2X[F_SHIFT+1:1] + {{F_SHIFT+2-A_WIDTH{A[A_WIDTH-1]}},A[A_WIDTH-1:0]}; if (first_d) A2X[F_SHIFT+1:1] <= {{F_SHIFT+1-A_WIDTH{A[A_WIDTH-1]}},A[A_WIDTH-1:0]}; else if (next) A2X[F_SHIFT+1:1] <= A2X[F_SHIFT+1:1] + {{F_SHIFT+1-A_WIDTH{A[A_WIDTH-1]}},A[A_WIDTH-1:0]}; end endmodule