Commit 5ce0a999 authored by Mikhail Karpenko's avatar Mikhail Karpenko

Add timing helper function, invalidate cache from tasklet

Timing helper can be used to profile code execution from user space.
Cache invalidation is organized in tasklet and will be removed from mem
driver after some testing is performed.
parent 04cf69a7
......@@ -14,6 +14,7 @@ obj-$(CONFIG_ELPHEL393) += clock10359.o
#fpgajtag-y := fpgajtag353.o x393.o
#obj-$(CONFIG_ELPHEL393_EXTERNAL) += fpgajtag.o
obj-$(CONFIG_ELPHEL393) += x393_helpers.o
obj-$(CONFIG_ELPHEL393) += framepars.o
obj-$(CONFIG_ELPHEL393) += sensor_common.o x393.o
obj-$(CONFIG_ELPHEL393) += quantization_tables.o
......
......@@ -69,7 +69,8 @@
#include "circbuf.h"
#include "exif.h"
#include "x393_macro.h"
#include "x393.h"
//#include "x393.h"
#include "x393_helpers.h"
#define CIRCBUF_DRIVER_NAME "circbuf driver"
......@@ -416,6 +417,8 @@ inline int get_image_start(int last_chunk_offset, unsigned int len32)
* free memory may be less by a whole frame if compressor is running.
* LSEEK_CIRC_USED - returns memory used in the in circbuf from the current file pointer,
* or -EINVAL if the pointer is invalid
* The following command is used for profiling from user space applications. It does not change file pointer:
* LSEEK_CIRC_UTIME return current value of microsecond counter.
* @param[in] file pointer to \e file structure
* @param[in] offset offset inside buffer in bytes
* @param[in] orig origin
......@@ -656,6 +659,9 @@ loff_t circbuf_lseek(struct file *file, loff_t offset, int orig)
}
if (fvld < 0) return -ESPIPE; // invalid seek - have better code?
return file->f_pos ; // data already available, return file pointer
case LSEEK_CIRC_UTIME:
return get_rtc_usec();
break;
default:
if ((offset & ~0x1f)==LSEEK_DAEMON_CIRCBUF) {
wait_event_interruptible(circbuf_wait_queue, get_imageParamsThis(P_DAEMON_EN) & (1<<(offset & 0x1f)));
......@@ -804,6 +810,17 @@ int circbuf_mmap(struct file *file, struct vm_area_struct *vma)
vma->vm_end - vma->vm_start,
vma->vm_page_prot);
// ret = dma_common_mmap(g_dev_ptr, vma,
// circbuf_priv[chn].buf_ptr,
// circbuf_priv[chn].phys_addr,
// pElphel_buf->size * PAGE_SIZE);
//
// ret = arm_dma_mmap(g_dev_ptr, vma,
// circbuf_priv[chn].buf_ptr,
// circbuf_priv[chn].phys_addr,
// pElphel_buf->size * PAGE_SIZE,
// NULL);
dev_dbg(g_dev_ptr, "remap_pfn_range returned 0x%x\n", ret);
if (ret) return -EAGAIN;
......
......@@ -37,6 +37,8 @@
#include <asm/outercache.h>
#include <asm/cacheflush.h>
#include <elphel/elphel393-mem.h>
#include "x393_helpers.h"
#define SYSFS_PERMISSIONS 0644 /* default permissions for sysfs files */
#define SYSFS_READONLY 0444
#define SYSFS_WRITEONLY 0222
......@@ -310,6 +312,7 @@ static ssize_t sync_for_device_bidir(struct device *dev, struct device_attribute
return count;
}
static ssize_t flush_cpu_cache(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
{
const int buff_size = 0x1000000;
......@@ -318,8 +321,10 @@ static ssize_t flush_cpu_cache(struct device *dev, struct device_attribute *attr
int start_offset, end_offset;
int num_items;
dma_addr_t phys_addr_start, phys_addr_end;
u32 start_time, end_time;
num_items = sscanf(buf, "%u:%d:%d", &chn, &start_offset, &end_offset);
start_time = get_rtc_usec();
if (num_items == 3) {
// invalidate L2 caches
if (end_offset > start_offset) {
......@@ -340,6 +345,12 @@ static ssize_t flush_cpu_cache(struct device *dev, struct device_attribute *attr
outer_inv_range(phys_addr_start, phys_addr_end);
}
}
end_time = get_rtc_usec();
if (start_time == 0 && end_time == 0) {
pr_info("Unable to get usec values\n");
} else {
pr_info("Cache invalidate time: %lu\n", end_time - start_time);
}
return count;
}
......
......@@ -75,12 +75,14 @@
//#include "gamma_tables.h"
#include "quantization_tables.h"
#include "x393_macro.h"
#include "x393.h"
//#include "x393.h"
#include "x393_helpers.h"
/**
* @brief driver name to display in log messages
*/
/** @brief Driver name to display in log messages. */
#define IMAGEACQ_DRIVER_NAME "Elphel (R) Model 393 Image Acquisition device driver"
/** @brief The size in bytes of L2 cache invalidation area. This size must be aligned to cache line size.
* 16 kbytes seems to be good starting point. */
#define L2_INVAL_SIZE (16 * 1024)
/**@struct jpeg_ptr_t
* @brief \e jpeg_ptr_t structure contains read and write pointers along with
......@@ -91,7 +93,7 @@
* JPEG read pointer in 32 bit words
* @var jpeg_ptr_t::fpga_cntr_prev
* This field contains previous value of the FPGA transfer counter which is used
* to find out if it has changed
* to find out if it has changed. This pointer is in 32 bit words.
* @var jpeg_ptr_t::irq_num_comp
* IRQ number associated with compressor
* @var jpeg_ptr_t::irq_num_sens
......@@ -227,8 +229,12 @@ struct sensorproc_t * copy_sensorproc (struct sensorproc_t * copy)
///
int init_acq_sensor(void);
DECLARE_TASKLET(tasklet_fpga, tasklet_fpga_function, 0); /// 0 - no arguments for now
//DECLARE_TASKLET(tasklet_fpga, tasklet_fpga_function, 0); /// 0 - no arguments for now
DECLARE_TASKLET(tasklet_fpga_0, tasklet_fpga_function, 0); /// 0 - no arguments for now
DECLARE_TASKLET(tasklet_fpga_1, tasklet_fpga_function, 1); /// 0 - no arguments for now
DECLARE_TASKLET(tasklet_fpga_2, tasklet_fpga_function, 2); /// 0 - no arguments for now
DECLARE_TASKLET(tasklet_fpga_3, tasklet_fpga_function, 3); /// 0 - no arguments for now
static struct tasklet_struct *tasklets[IMAGE_CHN_NUM] = {&tasklet_fpga_0, &tasklet_fpga_1, &tasklet_fpga_2, &tasklet_fpga_3};
/**
......@@ -244,16 +250,16 @@ static inline int updateIRQJPEG_wp(struct jpeg_ptr_t *jptr)
{
phys_addr_t phys_addr;
void *virt_addr;
int prev_dword;
// int prev_dword;
int xferred; /// number of 32-byte chunks transferred since compressor was reset
x393_afimux_status_t stat = x393_afimux0_status(jptr->chn_num);
xferred = stat.offset256 - jptr->fpga_cntr_prev;
xferred = stat.offset256 - (jptr->fpga_cntr_prev >> 3);
if (xferred == 0)
return 0; /// no advance (compressor was off?)
jptr->flags |= SENS_FLAG_IRQ;
jptr->fpga_cntr_prev = stat.offset256;
jptr->fpga_cntr_prev = jptr->jpeg_wp;
jptr->jpeg_wp = (stat.offset256 << 3);
/* debug code follows */
......@@ -268,53 +274,53 @@ static inline int updateIRQJPEG_wp(struct jpeg_ptr_t *jptr)
/* Correct frame length and re-adjust interframe params.
* This operations was scheduled on previous interrupt.
*/
if (jptr->flags & SENS_FLAG_HW_OFF) {
int len32;
int len32_ptr = jptr->jpeg_wp - INTERFRAME_PARAMS_SZ - 1;
phys_addr = circbuf_priv_ptr[jptr->chn_num].phys_addr + DW2BYTE(jptr->jpeg_wp) - CHUNK_SIZE;
virt_addr = circbuf_priv_ptr[jptr->chn_num].buf_ptr + jptr->jpeg_wp - INTERFRAME_PARAMS_SZ;
outer_inv_range(phys_addr, phys_addr + (CHUNK_SIZE - 1));
__cpuc_flush_dcache_area(virt_addr, CHUNK_SIZE);
len32 = circbuf_priv_ptr[jptr->chn_num].buf_ptr[len32_ptr];
len32 -= INTERFRAME_PARAMS_SZ;
circbuf_priv_ptr[jptr->chn_num].buf_ptr[len32_ptr] = len32;
__cpuc_flush_dcache_area(virt_addr, CHUNK_SIZE);
outer_inv_range(phys_addr, phys_addr + (CHUNK_SIZE - 1));
jptr->flags &= ~SENS_FLAG_HW_OFF;
}
// if (jptr->flags & SENS_FLAG_HW_OFF) {
// int len32;
// int len32_ptr = jptr->jpeg_wp - INTERFRAME_PARAMS_SZ - 1;
// phys_addr = circbuf_priv_ptr[jptr->chn_num].phys_addr + DW2BYTE(jptr->jpeg_wp) - CHUNK_SIZE;
// virt_addr = circbuf_priv_ptr[jptr->chn_num].buf_ptr + jptr->jpeg_wp - INTERFRAME_PARAMS_SZ;
// outer_inv_range(phys_addr, phys_addr + (CHUNK_SIZE - 1));
// __cpuc_flush_dcache_area(virt_addr, CHUNK_SIZE);
// len32 = circbuf_priv_ptr[jptr->chn_num].buf_ptr[len32_ptr];
// len32 -= INTERFRAME_PARAMS_SZ;
// circbuf_priv_ptr[jptr->chn_num].buf_ptr[len32_ptr] = len32;
// __cpuc_flush_dcache_area(virt_addr, CHUNK_SIZE);
// outer_inv_range(phys_addr, phys_addr + (CHUNK_SIZE - 1));
// jptr->flags &= ~SENS_FLAG_HW_OFF;
// }
/* Looks like compressor is not writing 32 zero bytes when last frame ends precisely at the
* end of buffer. Try to detect this situation and set a flag so that we can overwrite first
* 32 bytes of the buffer on next interrupt. These bytes will be used as interframe parameters and current frame length
* will be decreased by these 32 bytes. Such a measure will corrupt the frame but preserve the structure.
*/
if (jptr->jpeg_wp == 0) {
// we need to invalidate two cache lines in order to
// estimate the situation correctly: one line after the pointer, which should be the line of
// 32 bytes of newly compressed frame(or zero bytes?), and one line before the pointer, which should be the last line of the frame. If this is not done
// then the data read from memory can be incorrect and error detection will give false result. Barrier is set to
// prevent compiler from reordering operations.
phys_addr = circbuf_priv_ptr[jptr->chn_num].phys_addr;
virt_addr = circbuf_priv_ptr[jptr->chn_num].buf_ptr;
dev_dbg(NULL, "from updateIRQJPEG_wp: phys_addr = 0x%x, virt_addr = 0x%p\n", phys_addr, virt_addr);
outer_inv_range(phys_addr, phys_addr + (CHUNK_SIZE - 1));
__cpuc_flush_dcache_area(virt_addr, CHUNK_SIZE);
phys_addr = circbuf_priv_ptr[jptr->chn_num].phys_addr + CCAM_DMA_SIZE - CHUNK_SIZE;
virt_addr = circbuf_priv_ptr[jptr->chn_num].buf_ptr + BYTE2DW(CCAM_DMA_SIZE - CHUNK_SIZE);
outer_inv_range(phys_addr, phys_addr + (CHUNK_SIZE - 1));
__cpuc_flush_dcache_area(virt_addr, CHUNK_SIZE);
dev_dbg(NULL, "from updateIRQJPEG_wp: phys_addr = 0x%x, virt_addr = 0x%p\n", phys_addr, virt_addr);
barrier();
prev_dword = X393_BUFFSUB(DW2BYTE(jptr->jpeg_wp), 4);
dev_dbg(NULL, "circbuf_priv_ptr[jptr->chn_num].buf_ptr[jptr->jpeg_wp] = 0x%x\n", circbuf_priv_ptr[jptr->chn_num].buf_ptr[jptr->jpeg_wp]);
dev_dbg(NULL, "circbuf_priv_ptr[jptr->chn_num].buf_ptr[BYTE2DW(prev_dword)] = 0x%x\n", circbuf_priv_ptr[jptr->chn_num].buf_ptr[BYTE2DW(prev_dword)]);
// if (circbuf_priv_ptr[jptr->chn_num].buf_ptr[jptr->jpeg_wp] == 0x00 &&
if ((circbuf_priv_ptr[jptr->chn_num].buf_ptr[BYTE2DW(prev_dword)] & MARKER_FF) == MARKER_FF) {
// jptr->jpeg_wp += INTERFRAME_PARAMS_SZ;
jptr->flags |= SENS_FLAG_HW_OFF;
corrected_offset[jptr->chn_num] += 1;
}
}
// if (jptr->jpeg_wp == 0) {
// // we need to invalidate two cache lines in order to
// // estimate the situation correctly: one line after the pointer, which should be the line of
// // 32 bytes of newly compressed frame(or zero bytes?), and one line before the pointer, which should be the last line of the frame. If this is not done
// // then the data read from memory can be incorrect and error detection will give false result. Barrier is set to
// // prevent compiler from reordering operations.
// phys_addr = circbuf_priv_ptr[jptr->chn_num].phys_addr;
// virt_addr = circbuf_priv_ptr[jptr->chn_num].buf_ptr;
// dev_dbg(NULL, "from updateIRQJPEG_wp: phys_addr = 0x%x, virt_addr = 0x%p\n", phys_addr, virt_addr);
// outer_inv_range(phys_addr, phys_addr + (CHUNK_SIZE - 1));
// __cpuc_flush_dcache_area(virt_addr, CHUNK_SIZE);
// phys_addr = circbuf_priv_ptr[jptr->chn_num].phys_addr + CCAM_DMA_SIZE - CHUNK_SIZE;
// virt_addr = circbuf_priv_ptr[jptr->chn_num].buf_ptr + BYTE2DW(CCAM_DMA_SIZE - CHUNK_SIZE);
// outer_inv_range(phys_addr, phys_addr + (CHUNK_SIZE - 1));
// __cpuc_flush_dcache_area(virt_addr, CHUNK_SIZE);
// dev_dbg(NULL, "from updateIRQJPEG_wp: phys_addr = 0x%x, virt_addr = 0x%p\n", phys_addr, virt_addr);
// barrier();
// prev_dword = X393_BUFFSUB(DW2BYTE(jptr->jpeg_wp), 4);
// dev_dbg(NULL, "circbuf_priv_ptr[jptr->chn_num].buf_ptr[jptr->jpeg_wp] = 0x%x\n", circbuf_priv_ptr[jptr->chn_num].buf_ptr[jptr->jpeg_wp]);
// dev_dbg(NULL, "circbuf_priv_ptr[jptr->chn_num].buf_ptr[BYTE2DW(prev_dword)] = 0x%x\n", circbuf_priv_ptr[jptr->chn_num].buf_ptr[BYTE2DW(prev_dword)]);
//// if (circbuf_priv_ptr[jptr->chn_num].buf_ptr[jptr->jpeg_wp] == 0x00 &&
// if ((circbuf_priv_ptr[jptr->chn_num].buf_ptr[BYTE2DW(prev_dword)] & MARKER_FF) == MARKER_FF) {
//// jptr->jpeg_wp += INTERFRAME_PARAMS_SZ;
// jptr->flags |= SENS_FLAG_HW_OFF;
// corrected_offset[jptr->chn_num] += 1;
// }
// }
// invalidate CPU L1 and L2 caches
// the code below was used to find cache coherence issues
......@@ -522,11 +528,12 @@ inline void updateIRQ_Exif(struct interframe_params_t* interframe) {
*/
static irqreturn_t frame_sync_irq_handler(int irq, void *dev_id)
{
struct jpeg_ptr_t *priv = dev_id;
struct jpeg_ptr_t *jptr = dev_id;
update_frame_pars();
wake_up_interruptible(&framepars_wait_queue);
tasklet_schedule(&tasklet_fpga);
// tasklet_schedule(&tasklet_fpga);
tasklet_schedule(tasklets[jptr->chn_num]);
return IRQ_HANDLED;
}
......@@ -541,24 +548,25 @@ static irqreturn_t frame_sync_irq_handler(int irq, void *dev_id)
*/
static irqreturn_t compressor_irq_handler(int irq, void *dev_id)
{
struct jpeg_ptr_t *priv = dev_id;
struct jpeg_ptr_t *jptr = dev_id;
struct interframe_params_t *interframe = NULL;
x393_cmprs_interrupts_t irq_ctrl;
unsigned long flag;
local_irq_save(flag);
if (updateIRQJPEG_wp(priv)) {
if (updateIRQJPEG_wp(jptr)) {
// update_irq_circbuf(priv);
// updateIRQFocus(priv);
interframe = updateIRQ_interframe(priv);
interframe = updateIRQ_interframe(jptr);
//updateIRQ_Exif(interframe);
wake_up_interruptible(&circbuf_wait_queue);
}
//wake_up_interruptible(&framepars_wait_queue);
// tasklet_schedule(&tasklet_fpga);
tasklet_schedule(tasklets[jptr->chn_num]);
irq_ctrl.interrupt_cmd = IRQ_CLEAR;
x393_cmprs_interrupts(irq_ctrl, priv->chn_num);
x393_cmprs_interrupts(irq_ctrl, jptr->chn_num);
local_irq_restore(flag);
return IRQ_HANDLED;
......@@ -596,6 +604,25 @@ void tasklet_fpga_function(unsigned long arg) {
unsigned long prevFrameNumber=thisFrameNumber-1;
unsigned long * hash32p=&(framepars[(thisFrameNumber-1) & PARS_FRAMES_MASK].pars[P_GTAB_R]);
unsigned long * framep= &(framepars[(thisFrameNumber-1) & PARS_FRAMES_MASK].pars[P_FRAME]);
const struct jpeg_ptr_t *jptr = &image_acq_priv.jpeg_ptr[arg];
dma_addr_t phys_addr_start, phys_addr_end;
unsigned int sz;
/* invalidate L2 cache lines in the beginning of current frame */
phys_addr_start = circbuf_priv_ptr[jptr->chn_num].phys_addr + DW2BYTE(jptr->fpga_cntr_prev);
sz = DW2BYTE(jptr->fpga_cntr_prev) + L2_INVAL_SIZE;
if (sz < CCAM_DMA_SIZE) {
phys_addr_end = phys_addr_start + L2_INVAL_SIZE - 1;
outer_inv_range(phys_addr_start, phys_addr_end);
} else {
phys_addr_end = phys_addr_start + (CCAM_DMA_SIZE - DW2BYTE(jptr->fpga_cntr_prev) - 1);
outer_inv_range(phys_addr_start, phys_addr_end);
phys_addr_start = circbuf_priv_ptr[jptr->chn_num].phys_addr;
phys_addr_end = phys_addr_start + (sz - CCAM_DMA_SIZE - 1);
outer_inv_range(phys_addr_start, phys_addr_end);
}
#ifdef TEST_DISABLE_CODE
/// Time is out?
......
......@@ -53,8 +53,6 @@ int image_acq_init(struct platform_device *pdev);
// indicate that this channel need attention; set in interrupt handler, reset in bottom half
#define SENS_FLAG_IRQ 0x01
// hw pointer is set incorrectly (32 zero bytes are not recorded), try to fix it
#define SENS_FLAG_HW_OFF 0x20
// got 0x20 more than start of the new image
#define OFFSET_X40 0x40
......
/** @file x393_helpers.c
*
* @brief Helper functions for various routines form x393.h which require several actions to get
* reliable result.
*/
/* Copyright (C) 2016 Elphel, Inc
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <stddef.h>
#include "x393_helpers.h"
/**
* @brief Read RTC microsecond counter.
* @return Current value of microsecond counter or 0 in case read sequence was
* not successful.
*/
u32 get_rtc_usec(void)
{
x393_rtc_status_t stat;
x393_status_ctrl_t stat_ctrl;
x393_rtc_usec_t usec;
unsigned int i;
stat = x393_rtc_status();
stat_ctrl.d32 = 0;
stat_ctrl.mode = 1;
stat_ctrl.seq_num = stat.seq_num + 1;
set_x393_rtc_set_status(stat_ctrl);
for (i = 0; i < REPEAT_READ; i++) {
stat = x393_rtc_status();
if (stat.seq_num == stat_ctrl.seq_num) {
usec = x393_rtc_status_usec();
return usec.usec;
}
}
return 0;
}
/** @file x393_helpers.h
*
* @brief Helper functions for various routines form x393.h which require several actions to get
* reliable result.
*/
/* Copyright (C) 2016 Elphel, Inc
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _X393_HELPERS_H
#define _X393_HELPERS_H
#include <asm/types.h>
#include "x393.h"
/** @brief Number of times to repeat register read sequence while waiting for
* sequence number specified.
*/
#define REPEAT_READ 10
u32 get_rtc_usec(void);
#endif /* _X393_HELPERS_H */
......@@ -1362,6 +1362,7 @@ struct p_names_t {
#define LSEEK_CIRC_FREE 12
#define LSEEK_CIRC_USED 13
#define LSEEK_CIRC_STOP_COMPRESSOR 14
#define LSEEK_CIRC_UTIME 15
#define LSEEK_HUFFMAN_DC0 1
#define LSEEK_HUFFMAN_AC0 2
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment