ahci_elphel.c 50 KB
Newer Older
Mikhail Karpenko's avatar
Mikhail Karpenko committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * Elphel AHCI SATA platform driver for elphel393 camera
 *
 * Based on the AHCI SATA platform driver by Jeff Garzik and Anton Vorontsov
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
 * more details.
 */

16
/* this one is required for printk_ratelimited */
17 18
#define CONFIG_PRINK

Mikhail Karpenko's avatar
Mikhail Karpenko committed
19 20 21 22 23 24
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/ahci_platform.h>
#include <linux/of.h>
#include <linux/of_device.h>
25
#include <linux/of_address.h>
Mikhail Karpenko's avatar
Mikhail Karpenko committed
26 27 28
#include <linux/slab.h>
#include <linux/platform_device.h>
#include <linux/dma-mapping.h>
29
#include <linux/sysfs.h>
30 31
#include <elphel/exifa.h>
#include <elphel/elphel393-mem.h>
32

Mikhail Karpenko's avatar
Mikhail Karpenko committed
33
#include "ahci.h"
34 35 36
#include "ahci_elphel.h"
#include "../elphel/exif393.h"
#include "../elphel/jpeghead.h"
Mikhail Karpenko's avatar
Mikhail Karpenko committed
37 38

#define DRV_NAME "elphel-ahci"
39 40 41 42 43 44
/*
 * FPGA bitstream control address and bit mask. These are used to check whether
 * bitstream is loaded or not.
 */
#define BITSTREAM_CTRL_ADDR	0xf800700c
#define BITSTREAM_CTRL_BIT	0x4
Mikhail Karpenko's avatar
Mikhail Karpenko committed
45 46 47 48 49 50 51 52 53

/* Property names from device tree, these are specific for the controller */
#define PROP_NAME_CLB_OFFS "clb_offs"
#define PROP_NAME_FB_OFFS "fb_offs"

static struct ata_port_operations ahci_elphel_ops;
static const struct ata_port_info ahci_elphel_port_info;
static struct scsi_host_template ahci_platform_sht;
static const struct of_device_id ahci_elphel_of_match[];
54 55 56
static const struct attribute_group dev_attr_root_group;

static bool load_driver = false;
57
static unsigned char app15[ALIGNMENT_SIZE] = {0xff, 0xef};
Mikhail Karpenko's avatar
Mikhail Karpenko committed
58

59 60
static void elphel_cmd_issue(struct ata_port *ap, uint64_t start, uint16_t count, struct fvec *sgl, unsigned int elem, uint8_t cmd);
static int init_buffers(struct device *dev, struct frame_buffers *buffs);
61
static void init_vectors(struct frame_buffers *buffs, struct fvec *chunks);
62 63
static void deinit_buffers(struct device *dev, struct frame_buffers *buffs);
static inline struct elphel_ahci_priv *dev_get_dpriv(struct device *dev);
64 65 66
static void finish_cmd(struct elphel_ahci_priv *dpriv);
static void finish_rec(struct elphel_ahci_priv *dpriv);
static int process_cmd(struct elphel_ahci_priv *dpriv);
67
static inline size_t get_size_from(const struct fvec *vects, int index, size_t offset, int all);
68 69
static inline void vectmov(struct fvec *vec, size_t len);
static inline void vectsplit(struct fvec *vect, struct fvec *parts, size_t *n_elem);
70 71 72 73
static int move_tail(struct elphel_ahci_priv *dpriv);
static int move_head(struct elphel_ahci_priv *dpriv);
static size_t get_prev_slot(const struct elphel_ahci_priv *dpriv);
static int is_cmdq_empty(const struct elphel_ahci_priv *dpriv);
74
void process_queue(unsigned long data);
75 76
static void set_flag(struct elphel_ahci_priv *drpiv, uint32_t flag);
static void reset_flag(struct elphel_ahci_priv *dpriv, uint32_t flag);
77 78 79
/* debug functions */
static int check_chunks(struct fvec *vects);
static void dump_sg_list(const struct device *dev, const struct fvec *sgl, size_t elems);
Mikhail Karpenko's avatar
Mikhail Karpenko committed
80

81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
static ssize_t set_load_flag(struct device *dev, struct device_attribute *attr,
		const char *buff, size_t buff_sz)
{
	load_driver = true;

	return buff_sz;
}

static int bitstream_loaded(u32 *ptr)
{
	u32 val = ioread32(ptr);

	if (val & BITSTREAM_CTRL_BIT)
		return 1;
	else
		return 0;
}

99
static int elphel_check_load(struct device *dev)
100
{
101
	int ret = 0;
102 103
	u32 *ctrl_ptr = ioremap_nocache(BITSTREAM_CTRL_ADDR, 4);

104 105 106 107
	if (!bitstream_loaded(ctrl_ptr)) {
		ret = -1;
		dev_err(dev, "FPGA bitstream is not loaded or bitstream "
				"does not contain AHCI controller. Remove driver, load bitstream and try again\n");
108 109
	}
	iounmap(ctrl_ptr);
110 111

	return ret;
112 113
}

114 115
static irqreturn_t elphel_irq_handler(int irq, void * dev_instance)
{
116
	unsigned long irq_flags;
117 118 119 120 121 122 123 124 125 126
	irqreturn_t handled;
	struct ata_host *host = dev_instance;
	struct ahci_host_priv *hpriv = host->private_data;
	struct ata_port *port = host->ports[DEFAULT_PORT_NUM];
	void __iomem *port_mmio = ahci_port_base(port);
	struct elphel_ahci_priv *dpriv = hpriv->plat_data;
	uint32_t irq_stat, host_irq_stat;


	if (dpriv->flags & IRQ_SIMPLE) {
127
		/* handle interrupt from internal command */
128 129 130 131 132 133
		host_irq_stat = readl(hpriv->mmio + HOST_IRQ_STAT);
		if (!host_irq_stat)
			return IRQ_NONE;
		dpriv->flags &= ~IRQ_SIMPLE;
		irq_stat = readl(port_mmio + PORT_IRQ_STAT);

134
		dev_dbg(host->dev, "irq_stat = 0x%x, host irq_stat = 0x%x\n", irq_stat, host_irq_stat);
135

136
		writel(irq_stat, port_mmio + PORT_IRQ_STAT);
137 138
		writel(host_irq_stat, hpriv->mmio + HOST_IRQ_STAT);
		handled = IRQ_HANDLED;
139
		tasklet_schedule(&dpriv->bh);
140
	} else {
141
		/* pass handling to AHCI level and then decide if the resource should be freed */
142
		handled = ahci_single_irq_intr(irq, dev_instance);
143 144 145 146 147 148 149
		spin_lock_irqsave(&dpriv->flags_lock, irq_flags);
		if (is_cmdq_empty(dpriv)) {
			dpriv->flags &= ~DISK_BUSY;
		} else {
			tasklet_schedule(&dpriv->bh);
		}
		spin_unlock_irqrestore(&dpriv->flags_lock, irq_flags);
150 151 152 153
	}

	return handled;
}
154
/** Command queue processing tasklet */
155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177
void process_queue(unsigned long data)
{
	unsigned long irq_flags;
	struct elphel_ahci_priv *dpriv = (struct elphel_ahci_priv *)data;

	if (process_cmd(dpriv) == 0) {
		finish_cmd(dpriv);
		if (move_head(dpriv) != -1) {
			process_cmd(dpriv);
		} else {
			if (dpriv->flags & DELAYED_FINISH) {
				dpriv->flags &= ~DELAYED_FINISH;
				finish_rec(dpriv);
			} else {
				/* all commands have been processed */
				spin_lock_irqsave(&dpriv->flags_lock, irq_flags);
				dpriv->flags &= ~DISK_BUSY;
				spin_unlock_irqrestore(&dpriv->flags_lock, irq_flags);
			}
		}
	}
}

178 179
// What about port_stop and freeing/unmapping ?
// Or at least check if it is re-started and memory is already allocated/mapped
Mikhail Karpenko's avatar
Mikhail Karpenko committed
180 181 182 183 184 185 186 187 188
static int elphel_port_start(struct ata_port *ap)
{
	void *mem;
	dma_addr_t mem_dma;
	struct device *dev = ap->host->dev;
	struct ahci_port_priv *pp;
	struct ahci_host_priv *hpriv = ap->host->private_data;
	const struct elphel_ahci_priv *dpriv = hpriv->plat_data;

189
	dev_dbg(dev, "starting port %d", ap->port_no);
Mikhail Karpenko's avatar
Mikhail Karpenko committed
190
	pp = devm_kzalloc(dev, sizeof(struct ahci_port_priv), GFP_KERNEL);
Mikhail Karpenko's avatar
Mikhail Karpenko committed
191 192 193
	if (!pp)
		return -ENOMEM;

194
	mem = devm_kmalloc(dev, 0x100000, GFP_KERNEL); // AHCI_CMD_TBL_AR_SZ = 0x16000
Mikhail Karpenko's avatar
Mikhail Karpenko committed
195 196
	if (!mem)
		return -ENOMEM;
197 198
	mem_dma = dma_map_single(dev, mem, AHCI_CMD_TBL_AR_SZ, DMA_TO_DEVICE); // maybe DMA_BIDIRECTIONAL, but currently we do not use DMA for received FISes

199 200
	pp->cmd_tbl = mem;
	pp->cmd_tbl_dma = mem_dma;
Mikhail Karpenko's avatar
Mikhail Karpenko committed
201 202 203 204

	/*
	 * Set predefined addresses
	 */
Mikhail Karpenko's avatar
Mikhail Karpenko committed
205
	pp->cmd_slot = hpriv->mmio + dpriv->clb_offs;
206
	pp->cmd_slot_dma = dpriv->base_addr + dpriv->clb_offs;
Mikhail Karpenko's avatar
Mikhail Karpenko committed
207 208

	pp->rx_fis = hpriv->mmio + dpriv->fb_offs;
209
	pp->rx_fis_dma = dpriv->base_addr + dpriv->fb_offs;
Mikhail Karpenko's avatar
Mikhail Karpenko committed
210 211 212 213 214

	/*
	 * Save off initial list of interrupts to be enabled.
	 * This could be changed later
	 */
Mikhail Karpenko's avatar
Mikhail Karpenko committed
215
	pp->intr_mask = DEF_PORT_IRQ;
Mikhail Karpenko's avatar
Mikhail Karpenko committed
216 217 218

	ap->private_data = pp;

Mikhail Karpenko's avatar
Mikhail Karpenko committed
219
	return ahci_port_resume(ap);
Mikhail Karpenko's avatar
Mikhail Karpenko committed
220 221 222 223 224 225
}

static int elphel_parse_prop(const struct device_node *devn,
		struct device *dev,
		struct elphel_ahci_priv *dpriv)
{
226
	int rc = 0;
Mikhail Karpenko's avatar
Mikhail Karpenko committed
227
	const __be32 *val;
228
	struct resource res;
Mikhail Karpenko's avatar
Mikhail Karpenko committed
229 230

	if (!devn) {
231
		dev_err(dev, "elphel-ahci device tree node is not found");
Mikhail Karpenko's avatar
Mikhail Karpenko committed
232 233 234 235
		return -EINVAL;
	}

	val = of_get_property(devn, PROP_NAME_CLB_OFFS, NULL);
236 237 238 239
	if (!val) {
		dev_err(dev, "can not find clb_offs in device tree");
		return -EINVAL;
	}
Mikhail Karpenko's avatar
Mikhail Karpenko committed
240
	dpriv->clb_offs = be32_to_cpup(val);
241

Mikhail Karpenko's avatar
Mikhail Karpenko committed
242
	val = of_get_property(devn, PROP_NAME_FB_OFFS, NULL);
243 244 245 246
	if (!val) {
		dev_err(dev, "can not find fb_offs in device tree");
		return -EINVAL;
	}
Mikhail Karpenko's avatar
Mikhail Karpenko committed
247
	dpriv->fb_offs = be32_to_cpup(val);
248 249 250 251 252

	rc = of_address_to_resource((struct device_node *)devn, 0, &res);
	if (rc < 0) {
		dev_err(dev, "can not find address in device tree");
		return -EINVAL;
253
	}
254
	dpriv->base_addr = (u32)res.start;
Mikhail Karpenko's avatar
Mikhail Karpenko committed
255 256 257 258 259 260

	return 0;
}

static int elphel_drv_probe(struct platform_device *pdev)
{
261
	int ret, i, irq_num;
Mikhail Karpenko's avatar
Mikhail Karpenko committed
262
	struct ahci_host_priv *hpriv;
263
	struct elphel_ahci_priv *dpriv;
Mikhail Karpenko's avatar
Mikhail Karpenko committed
264 265
	struct device *dev = &pdev->dev;
	const struct of_device_id *match;
266
	struct ata_host *host;
Mikhail Karpenko's avatar
Mikhail Karpenko committed
267

268 269 270 271 272
	ret = elphel_check_load(dev);
	if (ret < 0) {
		return ret;
	}

273 274 275 276 277 278
	if (&dev->kobj) {
		ret = sysfs_create_group(&dev->kobj, &dev_attr_root_group);
		if (ret < 0)
			return ret;
	}

Mikhail Karpenko's avatar
Mikhail Karpenko committed
279
	dev_info(&pdev->dev, "probing Elphel AHCI driver");
280

281 282
	dpriv = devm_kzalloc(dev, sizeof(struct elphel_ahci_priv), GFP_KERNEL);
	if (!dpriv)
Mikhail Karpenko's avatar
Mikhail Karpenko committed
283 284
		return -ENOMEM;

285
	dpriv->dev = dev;
286
	spin_lock_init(&dpriv->flags_lock);
287
	tasklet_init(&dpriv->bh, process_queue, (unsigned long)dpriv);
288

289 290 291 292
	for (i = 0; i < MAX_CMD_SLOTS; i++) {
		ret = init_buffers(dev, &dpriv->fbuffs[i]);
		if (ret != 0)
			return ret;
293
		init_vectors(&dpriv->fbuffs[i], dpriv->data_chunks[i]);
294
	}
295

Mikhail Karpenko's avatar
Mikhail Karpenko committed
296 297 298 299
	match = of_match_device(ahci_elphel_of_match, &pdev->dev);
	if (!match)
		return -EINVAL;

300
	ret = elphel_parse_prop(dev->of_node, dev, dpriv);
Mikhail Karpenko's avatar
Mikhail Karpenko committed
301 302 303
	if (ret != 0)
		return ret;

Mikhail Karpenko's avatar
Mikhail Karpenko committed
304
	hpriv = ahci_platform_get_resources(pdev);
Mikhail Karpenko's avatar
Mikhail Karpenko committed
305 306 307
	if (IS_ERR(hpriv))
		return PTR_ERR(hpriv);

308
	hpriv->plat_data = dpriv;
Mikhail Karpenko's avatar
Mikhail Karpenko committed
309

Mikhail Karpenko's avatar
Mikhail Karpenko committed
310
	ret = ahci_platform_init_host(pdev, hpriv, &ahci_elphel_port_info,
Mikhail Karpenko's avatar
Mikhail Karpenko committed
311 312
			&ahci_platform_sht);
	if (ret) {
Mikhail Karpenko's avatar
Mikhail Karpenko committed
313
		dev_err(dev, "can not initialize platform host");
Mikhail Karpenko's avatar
Mikhail Karpenko committed
314 315 316 317
		ahci_platform_disable_resources(hpriv);
		return ret;
	}

318 319 320 321 322 323 324 325
	/* reassign automatically assigned interrupt handler */
	irq_num = platform_get_irq(pdev, 0);
	host = platform_get_drvdata(pdev);
	devm_free_irq(dev, irq_num, host);
	ret = devm_request_irq(dev, irq_num, elphel_irq_handler, IRQF_SHARED, dev_name(dev), host);
	if (ret) {
		dev_err(dev, "failed to reassign default IRQ handler to Elphel handler\n");
		return ret;
326
	}
Mikhail Karpenko's avatar
Mikhail Karpenko committed
327 328 329 330 331 332

	return 0;
}

static int elphel_drv_remove(struct platform_device *pdev)
{
333
	int i;
334 335
	struct elphel_ahci_priv *dpriv = dev_get_dpriv(&pdev->dev);

Mikhail Karpenko's avatar
Mikhail Karpenko committed
336
	dev_info(&pdev->dev, "removing Elphel AHCI driver");
337
	tasklet_kill(&dpriv->bh);
338 339
	for (i = 0; i < MAX_CMD_SLOTS; i++)
		deinit_buffers(&pdev->dev, &dpriv->fbuffs[i]);
340
	sysfs_remove_group(&pdev->dev.kobj, &dev_attr_root_group);
Mikhail Karpenko's avatar
Mikhail Karpenko committed
341 342 343 344 345
	ata_platform_remove_one(pdev);

	return 0;
}

346
static void elphel_qc_prep(struct ata_queued_cmd *qc)
Mikhail Karpenko's avatar
Mikhail Karpenko committed
347
{
348 349 350 351 352 353 354 355 356 357
	struct ata_port *ap = qc->ap;
	struct ahci_port_priv *pp = ap->private_data;
	int is_atapi = ata_is_atapi(qc->tf.protocol);
	void *cmd_tbl;
	u32 opts;
	const u32 cmd_fis_len = 5; /* five dwords */
	unsigned int n_elem;
	struct scatterlist *sg;
	struct ahci_sg *ahci_sg;

358 359 360
	/* There is only one slot in controller thus we need to change tag*/
	qc->tag = 0;

361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389
	/*
	 * Fill in command table information.  First, the header,
	 * a SATA Register - Host to Device command FIS.
	 */
	dma_sync_single_for_cpu(&qc->dev->tdev, pp->cmd_tbl_dma,
			AHCI_CMD_TBL_AR_SZ, DMA_TO_DEVICE);
	cmd_tbl = pp->cmd_tbl + qc->tag * AHCI_CMD_TBL_SZ;

	ata_tf_to_fis(&qc->tf, qc->dev->link->pmp, 1, cmd_tbl);

	if (is_atapi) {
		memset(cmd_tbl + AHCI_CMD_TBL_CDB, 0, 32);
		memcpy(cmd_tbl + AHCI_CMD_TBL_CDB, qc->cdb, qc->dev->cdb_len);
	}

	/*
	 * Next, the S/G list.
	 */
	n_elem = 0;
	ahci_sg = cmd_tbl + AHCI_CMD_TBL_HDR_SZ;
	if (qc->flags & ATA_QCFLAG_DMAMAP) {
		for_each_sg(qc->sg, sg, qc->n_elem, n_elem) {
			dma_addr_t addr = sg_dma_address(sg);
			u32 sg_len = sg_dma_len(sg);

			ahci_sg[n_elem].addr = cpu_to_le32(addr & 0xffffffff);
			ahci_sg[n_elem].addr_hi = cpu_to_le32((addr >> 16) >> 16);
			ahci_sg[n_elem].flags_size = cpu_to_le32(sg_len - 1);
		}
390
	}
Mikhail Karpenko's avatar
Mikhail Karpenko committed
391

392 393 394 395 396 397 398 399 400 401 402 403
	/*
	 * Fill in command slot information.
	 */
	opts = cmd_fis_len | n_elem << 16 | (qc->dev->link->pmp << 12);
	if (qc->tf.flags & ATA_TFLAG_WRITE)
		opts |= AHCI_CMD_WRITE;
	if (is_atapi)
		opts |= AHCI_CMD_ATAPI | AHCI_CMD_PREFETCH;

	ahci_fill_cmd_slot(pp, qc->tag, opts);
	dma_sync_single_for_device(&qc->dev->tdev, pp->cmd_tbl_dma,
			AHCI_CMD_TBL_AR_SZ, DMA_TO_DEVICE);
Mikhail Karpenko's avatar
Mikhail Karpenko committed
404 405
}

406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425
/** Set flag @e flag in driver private structure. This function uses spin lock to access the flags variable. */
static void set_flag(struct elphel_ahci_priv *dpriv, uint32_t flag)
{
	unsigned long irq_flags;

	spin_lock_irqsave(&dpriv->flags_lock, irq_flags);
	dpriv->flags |= flag;
	spin_unlock_irqrestore(&dpriv->flags_lock, irq_flags);
}

/** Reset flag @e flag in driver private structure. This function uses spin lock to access the flags variable. */
static void reset_flag(struct elphel_ahci_priv *dpriv, uint32_t flag)
{
	unsigned long irq_flags;

	spin_lock_irqsave(&dpriv->flags_lock, irq_flags);
	dpriv->flags &= ~flag;
	spin_unlock_irqrestore(&dpriv->flags_lock, irq_flags);
}

426 427 428 429 430 431 432 433
/** Map buffer vectors to S/G list and return the number of vectors mapped */
static int map_vectors(struct elphel_ahci_priv *dpriv)
{
	int i;
	int index = 0;
	int finish = 0;
	size_t total_sz = 0;
	size_t tail;
434
	struct fvec *chunks;
435 436
	struct fvec vect;

437
	chunks = dpriv->data_chunks[dpriv->head_ptr];
438 439 440 441 442 443 444 445 446 447 448 449 450 451
	for (i = dpriv->curr_data_chunk; i < MAX_DATA_CHUNKS; i++) {
		if (i == CHUNK_REM)
			/* remainder should never be processed */
			continue;
		if (i == dpriv->curr_data_chunk) {
			total_sz = chunks[i].iov_len - dpriv->curr_data_offset;
			vect.iov_base = (unsigned char *)chunks[i].iov_base + dpriv->curr_data_offset;
			vect.iov_dma = chunks[i].iov_dma + dpriv->curr_data_offset;
			vect.iov_len = chunks[i].iov_len - dpriv->curr_data_offset;
		} else {
			total_sz += chunks[i].iov_len;
			vect = chunks[i];
		}
		if (total_sz > dpriv->max_data_sz) {
452
			/* truncate current buffer and finish mapping */
453 454 455 456 457 458 459 460 461 462 463
			tail = total_sz - dpriv->max_data_sz;
			vect.iov_len -= tail;
			dpriv->curr_data_chunk = i;
			dpriv->curr_data_offset = chunks[i].iov_len - tail;
			finish = 1;
		} else if (unlikely(total_sz == dpriv->max_data_sz)) {
			dpriv->curr_data_chunk = i;
			dpriv->curr_data_offset = chunks[i].iov_len;
			finish = 1;
		}
		if (vect.iov_len != 0) {
464 465 466 467 468 469 470 471 472 473 474 475 476 477
			if (vect.iov_len < MAX_PRDT_LEN) {
				dpriv->sgl[index++] = vect;
			} else {
				/* current vector is too long and can not be mapped to a single PRDT entry, split it */
				vectsplit(&vect, dpriv->sgl, &index);
				if (vect.iov_len < MAX_PRDT_LEN) {
					dpriv->sgl[index++] = vect;
				} else {
					/* free slots in PRDT table have ended */
					dpriv->curr_data_chunk = i;
					dpriv->curr_data_offset = (unsigned char *)vect.iov_base - (unsigned char *)chunks[i].iov_base;
					finish = 1;
				}
			}
478 479
			if (index == (MAX_SGL_LEN - 1))
				finish = 1;
480 481 482 483 484
		}
		if (finish)
			break;
	}
	if (finish == 0) {
Mikhail Karpenko's avatar
Mikhail Karpenko committed
485
		/* frame vectors have been fully processed, stop calling me */
486 487 488 489 490 491 492
		dpriv->curr_data_chunk = MAX_DATA_CHUNKS;
		dpriv->curr_data_offset = 0;
	}

	return index;
}

493
/** Split buffer pointed by vector @e vect into several smaller buffer. Each part will be less than #MAX_PRDT_LEN bytes */
494 495 496 497 498 499 500 501 502 503 504 505 506 507 508
static inline void vectsplit(struct fvec *vect, struct fvec *parts, size_t *n_elem)
{
	size_t len;
	struct fvec split;

	while (vect->iov_len > MAX_PRDT_LEN && *n_elem < MAX_SGL_LEN) {
		len = MAX_PRDT_LEN - MAX_PRDT_LEN % PHY_BLOCK_SIZE;
		split.iov_base = vect->iov_base;
		split.iov_dma = vect->iov_dma;
		split.iov_len = len;
		vectmov(vect, len);
		parts[*n_elem] = split;
		*n_elem = *n_elem + 1;
	}
}
509 510

/** Copy @e len bytes from buffer pointed by @e src vector to buffer pointed by @e dest vector */
511 512 513 514 515 516 517
static inline void vectcpy(struct fvec *dest, void *src, size_t len)
{
	unsigned char *d = (unsigned char *)dest->iov_base;

	memcpy(d + dest->iov_len, src, len);
	dest->iov_len += len;
}
518 519

/** Move vector forward by @e len bytes decreasing its length */
520 521 522 523 524 525 526 527
static inline void vectmov(struct fvec *vec, size_t len)
{
	if (vec->iov_len >= len) {
		vec->iov_base = (unsigned char *)vec->iov_base + len;
		vec->iov_dma += len;
		vec->iov_len -= len;
	}
}
528 529

/** Shrink vector length by @len bytes */
530 531 532 533 534 535
static inline void vectshrink(struct fvec *vec, size_t len)
{
	if (vec->iov_len >= len) {
		vec->iov_len -= len;
	}
}
536 537

/** Return the number of bytes needed to align @e data_len to @e align_len boundary */
538 539 540 541 542 543 544 545
static inline size_t align_bytes_num(size_t data_len, size_t align_len)
{
	size_t rem = data_len % align_len;
	if (rem == 0)
		return 0;
	else
		return align_len - rem;
}
546

547 548 549 550 551 552
/** This helper function is used to position a pointer @e offset bytes from the end
 * of a buffer. DMA handle is not updated intentionally as it is not needed during copying */
static inline unsigned char *vectrpos(struct fvec *vec, size_t offset)
{
	return (unsigned char *)vec->iov_base + (vec->iov_len - offset);
}
553 554

/** Align current frame to disk sector boundary and each individual buffer to #ALIGNMENT_SIZE boundary */
555
static void align_frame(struct elphel_ahci_priv *dpriv)
556 557 558
{
	unsigned char *src;
	size_t len, total_sz, data_len;
559 560 561
	size_t cmd_slot = dpriv->tail_ptr;
	size_t prev_slot = get_prev_slot(dpriv);
	size_t max_len = dpriv->fbuffs[cmd_slot].common_buff.iov_len;
562
	struct device *dev = dpriv->dev;
563
	struct frame_buffers *fbuffs = &dpriv->fbuffs[cmd_slot];
564
	struct fvec *chunks = dpriv->data_chunks[cmd_slot];
565
	struct fvec *cbuff = &chunks[CHUNK_COMMON];
566
	struct fvec *rbuff = &dpriv->data_chunks[prev_slot][CHUNK_REM];
567

568
	total_sz = get_size_from(chunks, 0, 0, INCLUDE_REM) + rbuff->iov_len;
569 570
	if (total_sz < PHY_BLOCK_SIZE) {
		/* the frame length is less than sector size, delay this frame */
571 572 573 574 575
		if (prev_slot != cmd_slot) {
			/* some data may be left from previous frame */
			vectcpy(&chunks[CHUNK_REM], rbuff->iov_base, rbuff->iov_len);
			vectshrink(rbuff, rbuff->iov_len);
		}
576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594
		dev_dbg(dev, "frame size is less than sector size: %u bytes; delay recording\n", total_sz);
		vectcpy(&chunks[CHUNK_REM], chunks[CHUNK_LEADER].iov_base, chunks[CHUNK_LEADER].iov_len);
		vectshrink(&chunks[CHUNK_LEADER], chunks[CHUNK_LEADER].iov_len);
		vectcpy(&chunks[CHUNK_REM], chunks[CHUNK_EXIF].iov_base, chunks[CHUNK_EXIF].iov_len);
		vectshrink(&chunks[CHUNK_EXIF], chunks[CHUNK_EXIF].iov_len);
		vectcpy(&chunks[CHUNK_REM], chunks[CHUNK_HEADER].iov_base, chunks[CHUNK_HEADER].iov_len);
		vectshrink(&chunks[CHUNK_HEADER], chunks[CHUNK_HEADER].iov_len);
		vectcpy(&chunks[CHUNK_REM], chunks[CHUNK_DATA_0].iov_base, chunks[CHUNK_DATA_0].iov_len);
		vectshrink(&chunks[CHUNK_DATA_0], chunks[CHUNK_DATA_0].iov_len);
		vectcpy(&chunks[CHUNK_REM], chunks[CHUNK_DATA_1].iov_base, chunks[CHUNK_DATA_1].iov_len);
		vectshrink(&chunks[CHUNK_DATA_1], chunks[CHUNK_DATA_1].iov_len);
		vectcpy(&chunks[CHUNK_REM], chunks[CHUNK_TRAILER].iov_base, chunks[CHUNK_TRAILER].iov_len);
		vectshrink(&chunks[CHUNK_TRAILER], chunks[CHUNK_TRAILER].iov_len);
		return;
	}

	dma_sync_single_for_cpu(dev, fbuffs->common_buff.iov_dma, fbuffs->common_buff.iov_len, DMA_TO_DEVICE);

	/* copy remainder of previous frame to the beginning of common buffer */
595 596 597 598 599
	if (likely(rbuff->iov_len != 0)) {
		len = rbuff->iov_len;
		dev_dbg(dev, "copy %u bytes from REM #%u to common buffer\n", len, prev_slot);
		vectcpy(cbuff, rbuff->iov_base, len);
		vectshrink(rbuff, rbuff->iov_len);
600 601 602 603 604 605 606 607 608 609
	}

	/* copy JPEG marker */
	len = chunks[CHUNK_LEADER].iov_len;
	vectcpy(cbuff, chunks[CHUNK_LEADER].iov_base, len);
	vectshrink(&chunks[CHUNK_LEADER], chunks[CHUNK_LEADER].iov_len);

	/* copy Exif if present */
	if (chunks[CHUNK_EXIF].iov_len != 0) {
		len = chunks[CHUNK_EXIF].iov_len;
610
		dev_dbg(dev, "copy %u bytes from EXIF to common buffer\n", len);
611 612 613 614
		vectcpy(cbuff, chunks[CHUNK_EXIF].iov_base, len);
		vectshrink(&chunks[CHUNK_EXIF], chunks[CHUNK_EXIF].iov_len);
	}

615 616 617 618 619 620 621 622 623 624 625
	/* align common buffer to ALIGNMENT boundary, APP15 marker should be placed before header data */
	data_len = cbuff->iov_len + chunks[CHUNK_HEADER].iov_len;
	len = align_bytes_num(data_len, ALIGNMENT_SIZE);
	if (len < JPEG_MARKER_LEN + JPEG_SIZE_LEN && len != 0) {
		/* the number of bytes needed for alignment is less than the length of the marker itself, increase the number of stuffing bytes */
		len += ALIGNMENT_SIZE;
	}
	dev_dbg(dev, "total number of stuffing bytes in APP15 marker: %u\n", len);
	app15[3] = len - JPEG_MARKER_LEN;
	vectcpy(cbuff, app15, len);

626 627
	/* copy JPEG header */
	len = chunks[CHUNK_HEADER].iov_len;
628
	dev_dbg(dev, "copy %u bytes from HEADER to common buffer\n", len);
629 630 631
	vectcpy(cbuff, chunks[CHUNK_HEADER].iov_base, len);
	vectshrink(&chunks[CHUNK_HEADER], chunks[CHUNK_HEADER].iov_len);

632 633
	/* check if there is enough data to continue - JPEG data length can be too short */
	len = get_size_from(chunks, CHUNK_DATA_0, 0, EXCLUDE_REM);
634
	if (len < PHY_BLOCK_SIZE) {
635
		size_t num = align_bytes_num(cbuff->iov_len, PHY_BLOCK_SIZE);
636
		dev_dbg(dev, "jpeg data is too short, delay this frame\n");
637 638 639
		if (len >= num) {
			/* there is enough data to align common buffer to sector boundary */
			if (num >= chunks[CHUNK_DATA_0].iov_len) {
640
				vectcpy(cbuff, chunks[CHUNK_DATA_0].iov_base, chunks[CHUNK_DATA_0].iov_len);
641
				num -= chunks[CHUNK_DATA_0].iov_len;
642 643
				vectshrink(&chunks[CHUNK_DATA_0], chunks[CHUNK_DATA_0].iov_len);
			} else {
644 645 646 647
				src = vectrpos(&chunks[CHUNK_DATA_0], num);
				vectcpy(cbuff, chunks[CHUNK_DATA_0].iov_base, num);
				vectshrink(&chunks[CHUNK_DATA_0], num);
				num = 0;
648
			}
649 650 651
			if (num >= chunks[CHUNK_DATA_1].iov_len) {
				vectcpy(cbuff, chunks[CHUNK_DATA_1].iov_base, chunks[CHUNK_DATA_1].iov_len);
				num -= chunks[CHUNK_DATA_1].iov_len;
652
				vectshrink(&chunks[CHUNK_DATA_1], chunks[CHUNK_DATA_1].iov_len);
653 654 655 656 657
			} else {
				src = vectrpos(&chunks[CHUNK_DATA_1], num);
				vectcpy(cbuff, chunks[CHUNK_DATA_1].iov_base, num);
				vectshrink(&chunks[CHUNK_DATA_1], num);
				num = 0;
658
			}
659 660 661
			if (num >= chunks[CHUNK_TRAILER].iov_len) {
				vectcpy(cbuff, chunks[CHUNK_TRAILER].iov_base, chunks[CHUNK_TRAILER].iov_len);
				num -= chunks[CHUNK_TRAILER].iov_len;
662
				vectshrink(&chunks[CHUNK_TRAILER], chunks[CHUNK_TRAILER].iov_len);
663 664 665 666 667
			} else {
				src = vectrpos(&chunks[CHUNK_TRAILER], num);
				vectcpy(cbuff, chunks[CHUNK_TRAILER].iov_base, num);
				vectshrink(&chunks[CHUNK_TRAILER], num);
				num = 0;
668
			}
669 670 671 672 673 674
		} else {
			/* there is not enough data to align common buffer to sector boundary, truncate common buffer */
			data_len = cbuff->iov_len % PHY_BLOCK_SIZE;
			src = vectrpos(cbuff, data_len);
			vectcpy(&chunks[CHUNK_REM], src, data_len);
			vectshrink(cbuff, data_len);
675
		}
676 677 678 679 680 681
		vectcpy(&chunks[CHUNK_REM], chunks[CHUNK_DATA_0].iov_base, chunks[CHUNK_DATA_0].iov_len);
		vectshrink(&chunks[CHUNK_DATA_0], chunks[CHUNK_DATA_0].iov_len);
		vectcpy(&chunks[CHUNK_REM], chunks[CHUNK_DATA_1].iov_base, chunks[CHUNK_DATA_1].iov_len);
		vectshrink(&chunks[CHUNK_DATA_1], chunks[CHUNK_DATA_1].iov_len);
		vectcpy(&chunks[CHUNK_REM], chunks[CHUNK_TRAILER].iov_base, chunks[CHUNK_TRAILER].iov_len);
		vectshrink(&chunks[CHUNK_TRAILER], chunks[CHUNK_TRAILER].iov_len);
682 683 684 685

		return;
	}

686
	/* align frame to sector size boundary; total size could have changed by the moment - recalculate */
687 688
	total_sz = get_size_from(chunks, 0, 0, INCLUDE_REM);
	len = total_sz % PHY_BLOCK_SIZE;
689 690 691 692 693 694 695 696
	dev_dbg(dev, "number of bytes crossing sector boundary: %u\n", len);
	if (len != 0) {
		if (len >= (chunks[CHUNK_DATA_1].iov_len + chunks[CHUNK_TRAILER].iov_len)) {
			/* current frame is not split or the second part of JPEG data is too short */
			data_len = len - chunks[CHUNK_DATA_1].iov_len - chunks[CHUNK_TRAILER].iov_len;
			src = vectrpos(&chunks[CHUNK_DATA_0], data_len);
			vectcpy(&chunks[CHUNK_REM], src, data_len);
			vectshrink(&chunks[CHUNK_DATA_0], data_len);
697 698 699 700
			vectcpy(&chunks[CHUNK_REM], chunks[CHUNK_DATA_1].iov_base, chunks[CHUNK_DATA_1].iov_len);
			vectshrink(&chunks[CHUNK_DATA_1], chunks[CHUNK_DATA_1].iov_len);
			vectcpy(&chunks[CHUNK_REM], chunks[CHUNK_TRAILER].iov_base, chunks[CHUNK_TRAILER].iov_len);
			vectshrink(&chunks[CHUNK_TRAILER], chunks[CHUNK_TRAILER].iov_len);
701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719
		} else if (len >= chunks[CHUNK_TRAILER].iov_len) {
			/* there is enough data in second part to align the frame */
			data_len = len - chunks[CHUNK_TRAILER].iov_len;
			src = vectrpos(&chunks[CHUNK_DATA_1], data_len);
			vectcpy(&chunks[CHUNK_REM], src, data_len);
			vectshrink(&chunks[CHUNK_DATA_1], data_len);
			vectcpy(&chunks[CHUNK_REM], chunks[CHUNK_TRAILER].iov_base, chunks[CHUNK_TRAILER].iov_len);
			vectshrink(&chunks[CHUNK_TRAILER], chunks[CHUNK_TRAILER].iov_len);
		} else {
			/* the trailing marker is split by sector boundary, copy (PHY_BLOCK_SIZE - 1) bytes from
			 * JPEG data block(s) to remainder buffer and then add trailing marker */
			data_len = PHY_BLOCK_SIZE - (chunks[CHUNK_TRAILER].iov_len - len);
			if (data_len >= chunks[CHUNK_DATA_1].iov_len) {
				size_t cut_len = data_len - chunks[CHUNK_DATA_1].iov_len;
				src = vectrpos(&chunks[CHUNK_DATA_0], cut_len);
				vectcpy(&chunks[CHUNK_REM], src, cut_len);
				vectshrink(&chunks[CHUNK_DATA_0], cut_len);
				vectcpy(&chunks[CHUNK_REM], chunks[CHUNK_DATA_1].iov_base, chunks[CHUNK_DATA_1].iov_len);
				vectshrink(&chunks[CHUNK_DATA_1], chunks[CHUNK_DATA_1].iov_len);
720 721
				vectcpy(&chunks[CHUNK_REM], chunks[CHUNK_TRAILER].iov_base, chunks[CHUNK_TRAILER].iov_len);
				vectshrink(&chunks[CHUNK_TRAILER], chunks[CHUNK_TRAILER].iov_len);
722
			} else {
723
				src = vectrpos(&chunks[CHUNK_DATA_1], data_len);
724
				vectcpy(&chunks[CHUNK_REM], src, data_len);
725 726 727 728 729
				vectshrink(&chunks[CHUNK_DATA_1], data_len);
				vectcpy(&chunks[CHUNK_REM], chunks[CHUNK_TRAILER].iov_base, chunks[CHUNK_TRAILER].iov_len);
				vectshrink(&chunks[CHUNK_TRAILER], chunks[CHUNK_TRAILER].iov_len);
			}
		}
730 731 732
	} else {
		/* the frame is aligned to sector boundary but some buffers may be not */
		chunks[CHUNK_ALIGN].iov_base = vectrpos(cbuff, 0);
733
		chunks[CHUNK_ALIGN].iov_dma = cbuff->iov_dma + cbuff->iov_len;
734 735 736 737 738 739 740 741 742 743 744 745 746 747
		chunks[CHUNK_ALIGN].iov_len = 0;
		if (chunks[CHUNK_DATA_1].iov_len == 0) {
			data_len = chunks[CHUNK_DATA_0].iov_len % ALIGNMENT_SIZE;
			src = vectrpos(&chunks[CHUNK_DATA_0], data_len);
			vectcpy(&chunks[CHUNK_ALIGN], src, data_len);
			vectshrink(&chunks[CHUNK_DATA_0], data_len);
		} else {
			data_len = chunks[CHUNK_DATA_1].iov_len % ALIGNMENT_SIZE;
			src = vectrpos(&chunks[CHUNK_DATA_1], data_len);
			vectcpy(&chunks[CHUNK_ALIGN], src, data_len);
			vectshrink(&chunks[CHUNK_DATA_1], data_len);
		}
		vectcpy(&chunks[CHUNK_ALIGN], chunks[CHUNK_TRAILER].iov_base, chunks[CHUNK_TRAILER].iov_len);
		vectshrink(&chunks[CHUNK_TRAILER], chunks[CHUNK_TRAILER].iov_len);
748 749 750 751 752 753 754 755 756 757 758 759 760
	}

	/* debug sanity check, should not happen */
	if (cbuff->iov_len >= max_len) {
		dev_err(NULL, "ERROR: the number of bytes copied to common buffer exceeds its size\n");
	}
}

/** Calculate the number of blocks this frame will occupy. The frame must be aligned to block size */
static inline size_t get_blocks_num(struct fvec *sgl, size_t n_elem)
{
	int num;
	size_t total = 0;
761

762 763
	for (num = 0; num < n_elem; num++) {
		total += sgl[num].iov_len;
764 765
	}

766 767
	return total / PHY_BLOCK_SIZE;
}
768

769 770 771 772 773
/** Calculate the size of current frame in bytes starting from vector and offset given */
static inline size_t get_size_from(const struct fvec *vects, int index, size_t offset, int all)
{
	int i;
	size_t total = 0;
774

775 776 777
	if (index >= MAX_DATA_CHUNKS || offset > vects[index].iov_len) {
		return 0;
	}
778

779 780 781 782 783 784 785 786
	for (i = index; i < MAX_DATA_CHUNKS; i++) {
		if (i == CHUNK_REM && all == EXCLUDE_REM)
			/* remainder should not be processed */
			continue;
		if (i == index)
			total += vects[i].iov_len - offset;
		else
			total += vects[i].iov_len;
787 788
	}

789 790 791 792
	return total;
}

/** Set vectors pointing to data buffers except for JPEG data - those are set in circbuf driver */
793
static void init_vectors(struct frame_buffers *buffs, struct fvec *chunks)
794 795 796 797 798
{
	chunks[CHUNK_EXIF].iov_base = buffs->exif_buff.iov_base;
	chunks[CHUNK_EXIF].iov_len = 0;

	chunks[CHUNK_LEADER].iov_base = buffs->jpheader_buff.iov_base;
799 800
	chunks[CHUNK_LEADER].iov_len = 0;
	chunks[CHUNK_HEADER].iov_base = (unsigned char *)chunks[CHUNK_LEADER].iov_base + JPEG_MARKER_LEN;
801 802 803
	chunks[CHUNK_HEADER].iov_len = 0;

	chunks[CHUNK_TRAILER].iov_base = buffs->trailer_buff.iov_base;
804
	chunks[CHUNK_TRAILER].iov_len = 0;
805 806 807 808 809 810 811 812 813 814

	chunks[CHUNK_REM].iov_base = buffs->rem_buff.iov_base;
	chunks[CHUNK_REM].iov_len = 0;

	/* this is the only DMA mapped buffer and its DMA address should be set */
	chunks[CHUNK_COMMON].iov_base = buffs->common_buff.iov_base;
	chunks[CHUNK_COMMON].iov_dma = buffs->common_buff.iov_dma;
	chunks[CHUNK_COMMON].iov_len = 0;
}

815
/** Allocate memory for frame buffers */
816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839
static int init_buffers(struct device *dev, struct frame_buffers *buffs)
{
	int mult;
	int total_sz;
	unsigned char *ptr;

	buffs->exif_buff.iov_base = kmalloc(MAX_EXIF_SIZE, GFP_KERNEL);
	if (!buffs->exif_buff.iov_base)
		return -ENOMEM;
	buffs->exif_buff.iov_len = MAX_EXIF_SIZE;

	buffs->jpheader_buff.iov_base = kmalloc(JPEG_HEADER_MAXSIZE, GFP_KERNEL);
	if (!buffs->jpheader_buff.iov_base)
		goto err_header;
	buffs->jpheader_buff.iov_len = JPEG_HEADER_MAXSIZE;

	buffs->trailer_buff.iov_base = kmalloc(JPEG_MARKER_LEN, GFP_KERNEL);
	if (!buffs->trailer_buff.iov_base)
		goto err_trailer;
	buffs->trailer_buff.iov_len = JPEG_MARKER_LEN;
	ptr = buffs->trailer_buff.iov_base;
	ptr[0] = 0xff;
	ptr[1] = 0xd9;

840 841 842
	/* common buffer should be large enough to contain JPEG header, Exif, some alignment bytes and
	 * remainder from previous frame */
	total_sz = MAX_EXIF_SIZE + JPEG_HEADER_MAXSIZE + ALIGNMENT_SIZE + 2 * PHY_BLOCK_SIZE;
843 844 845 846 847
	if (total_sz > PAGE_SIZE) {
		mult = total_sz / PAGE_SIZE + 1;
		total_sz = mult * PAGE_SIZE;
	} else {
		total_sz = PAGE_SIZE;
848
	}
849 850 851 852 853 854 855 856 857
	buffs->common_buff.iov_base = kmalloc(total_sz, GFP_KERNEL);
	if (!buffs->common_buff.iov_base)
		goto err_common;
	buffs->common_buff.iov_len = total_sz;
	/* this is the only buffer which needs DMA mapping as all other data will be collected in it */
	buffs->common_buff.iov_dma = dma_map_single(dev, buffs->common_buff.iov_base, buffs->common_buff.iov_len, DMA_TO_DEVICE);
	if (dma_mapping_error(dev, buffs->common_buff.iov_dma))
		goto err_common_dma;

858
	buffs->rem_buff.iov_base = kmalloc(2 * PHY_BLOCK_SIZE, GFP_KERNEL);
859 860
	if (!buffs->rem_buff.iov_base)
		goto err_remainder;
861
	buffs->rem_buff.iov_len = 2 * PHY_BLOCK_SIZE;
862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877

	return 0;

err_remainder:
	dma_unmap_single(dev, buffs->common_buff.iov_dma, buffs->common_buff.iov_len, DMA_TO_DEVICE);
err_common_dma:
	kfree(buffs->common_buff.iov_base);
err_common:
	kfree(buffs->trailer_buff.iov_base);
err_trailer:
	kfree(buffs->jpheader_buff.iov_base);
err_header:
	kfree(buffs->exif_buff.iov_base);
	return -ENOMEM;
}

878
/** Free allocated frame buffers */
879 880 881 882 883 884 885 886 887
static void deinit_buffers(struct device *dev, struct frame_buffers *buffs)
{
	kfree(buffs->jpheader_buff.iov_base);
	kfree(buffs->exif_buff.iov_base);
	kfree(buffs->trailer_buff.iov_base);
	dma_unmap_single(dev, buffs->common_buff.iov_dma, buffs->common_buff.iov_len, DMA_TO_DEVICE);
	kfree(buffs->common_buff.iov_base);
	kfree(buffs->rem_buff.iov_base);
}
888

889
/** Discard buffer pointers which makes the command slot marked as empty */
890 891 892
static inline void reset_chunks(struct fvec *vects, int all)
{
	int i;
893

894 895 896 897 898 899
	for (i = 0; i < MAX_DATA_CHUNKS; i++) {
		if (i != CHUNK_REM)
			vects[i].iov_len = 0;
	}
	if (all) {
		vects[CHUNK_REM].iov_len = 0;
900
	}
901
}
902

903
/** Get driver private structure from pointer to device structure */
904 905 906 907 908 909 910 911
static inline struct elphel_ahci_priv *dev_get_dpriv(struct device *dev)
{
	struct ata_host *host = dev_get_drvdata(dev);
	struct ahci_host_priv *hpriv = host->private_data;
	struct elphel_ahci_priv *dpriv = hpriv->plat_data;

	return dpriv;
}
912

913
/** Process command and return the number of S/G entries mapped */
914
static int process_cmd(struct elphel_ahci_priv *dpriv)
915
{
916
	struct fvec *cbuff;
917 918
	struct ata_host *host = dev_get_drvdata(dpriv->dev);
	struct ata_port *port = host->ports[DEFAULT_PORT_NUM];
919
	size_t max_sz = (MAX_LBA_COUNT + 1) * PHY_BLOCK_SIZE;
920
	size_t rem_sz = get_size_from(dpriv->data_chunks[dpriv->head_ptr], dpriv->curr_data_chunk, dpriv->curr_data_offset, EXCLUDE_REM);
921

922 923 924 925
	if (dpriv->flags & PROC_CMD)
		dpriv->lba_ptr.lba_write += dpriv->lba_ptr.wr_count;
	dpriv->flags |= PROC_CMD;

926 927 928 929 930 931 932
	/* define ATA command to use for current transaction */
	if ((dpriv->lba_ptr.lba_write & ~ADDR_MASK_28_BIT) || rem_sz > max_sz) {
		dpriv->curr_cmd = ATA_CMD_WRITE_EXT;
		dpriv->max_data_sz = (MAX_LBA_COUNT_EXT + 1) * PHY_BLOCK_SIZE;
	} else {
		dpriv->curr_cmd = ATA_CMD_WRITE;
		dpriv->max_data_sz = (MAX_LBA_COUNT + 1) * PHY_BLOCK_SIZE;
933 934
	}

935 936
	dpriv->sg_elems = map_vectors(dpriv);
	if (dpriv->sg_elems != 0) {
937
		dump_sg_list(dpriv->dev, dpriv->sgl, dpriv->sg_elems);
938 939

		dpriv->lba_ptr.wr_count = get_blocks_num(dpriv->sgl, dpriv->sg_elems);
940 941 942 943
		if (dpriv->lba_ptr.lba_write + dpriv->lba_ptr.wr_count > dpriv->lba_ptr.lba_end) {
			/* the frame rolls over the buffer boundary, don't split it and start writing from the beginning */
			dpriv->lba_ptr.lba_write = dpriv->lba_ptr.lba_start;
		}
944
		cbuff = &dpriv->fbuffs[dpriv->head_ptr].common_buff;
945
		dma_sync_single_for_device(dpriv->dev, cbuff->iov_dma, cbuff->iov_len, DMA_TO_DEVICE);
946
		elphel_cmd_issue(port, dpriv->lba_ptr.lba_write, dpriv->lba_ptr.wr_count, dpriv->sgl, dpriv->sg_elems, dpriv->curr_cmd);
947
	}
948 949 950
	return dpriv->sg_elems;
}

951
/** Finish currently running command */
952
static void finish_cmd(struct elphel_ahci_priv *dpriv)
953 954 955 956 957 958 959 960 961 962
{
	int all;

	dpriv->lba_ptr.wr_count = 0;
	if ((dpriv->flags & LAST_BLOCK) == 0) {
		all = 0;
	} else {
		all = 1;
		dpriv->flags &= ~LAST_BLOCK;
	}
963
	reset_chunks(dpriv->data_chunks[dpriv->head_ptr], all);
964 965 966 967
	dpriv->curr_cmd = 0;
	dpriv->max_data_sz = 0;
	dpriv->curr_data_chunk = 0;
	dpriv->curr_data_offset = 0;
968
	dpriv->flags &= ~PROC_CMD;
969 970
}

971
/** Fill free space in REM buffer with 0 and save the remaining data chunk */
972
static void finish_rec(struct elphel_ahci_priv *dpriv)
973 974
{
	size_t stuff_len;
975
	unsigned char *src;
976 977
	struct fvec *cvect = &dpriv->data_chunks[dpriv->head_ptr][CHUNK_COMMON];
	struct fvec *rvect = &dpriv->data_chunks[dpriv->head_ptr][CHUNK_REM];
978

979
	if (rvect->iov_len == 0)
980 981
		return;

982
	dev_dbg(dpriv->dev, "write last chunk of data from slot %u, size: %u\n", dpriv->head_ptr, rvect->iov_len);
983
	stuff_len = PHY_BLOCK_SIZE - rvect->iov_len;
984 985
	src = vectrpos(rvect, 0);
	memset(src, 0, stuff_len);
986
	rvect->iov_len += stuff_len;
987
	dma_sync_single_for_cpu(dpriv->dev, dpriv->fbuffs[dpriv->head_ptr].common_buff.iov_dma, dpriv->fbuffs[dpriv->head_ptr].common_buff.iov_len, DMA_TO_DEVICE);
988 989
	vectcpy(cvect, rvect->iov_base, rvect->iov_len);
	vectshrink(rvect, rvect->iov_len);
990 991

	dpriv->flags |= LAST_BLOCK;
992
	process_cmd(dpriv);
993 994
}

995 996
/** Move a pointer to free command slot one step forward. This function holds spin lock #elphel_ahci_priv::flags_lock */
static int move_tail(struct elphel_ahci_priv *dpriv)
997
{
998
	size_t slot = (dpriv->tail_ptr + 1) % MAX_CMD_SLOTS;
999

1000
	if (slot != dpriv->head_ptr) {
1001
		set_flag(dpriv, LOCK_TAIL);
1002
		dpriv->tail_ptr = slot;
1003
		dev_dbg(dpriv->dev, "move tail pointer to slot: %u\n", slot);
1004 1005 1006 1007 1008 1009 1010
		return 0;
	} else {
		/* no more free command slots */
		return -1;
	}
}

1011 1012
/** Move a pointer to next ready command. This function holds spin lock #elphel_ahci_priv::flags_lock*/
static int move_head(struct elphel_ahci_priv *dpriv)
1013 1014
{
	size_t use_tail;
1015
	unsigned long irq_flags;
1016 1017
	size_t slot = (dpriv->head_ptr + 1) % MAX_CMD_SLOTS;

1018
	spin_lock_irqsave(&dpriv->flags_lock, irq_flags);
1019 1020 1021 1022 1023 1024
	if (dpriv->flags & LOCK_TAIL) {
		/* current command slot is not ready yet, use previous */
		use_tail = get_prev_slot(dpriv);
	} else {
		use_tail = dpriv->tail_ptr;
	}
1025
	spin_unlock_irqrestore(&dpriv->flags_lock, irq_flags);
1026 1027 1028

	if (dpriv->head_ptr != use_tail) {
		dpriv->head_ptr = slot;
1029
		dev_dbg(dpriv->dev, "move head pointer to slot: %u\n", slot);
1030 1031 1032 1033
		return 0;
	} else {
		/* no more commands in queue */
		return -1;
1034 1035 1036
	}

}
1037

1038
/** Check if command queue is empty */
1039
static int is_cmdq_empty(const struct elphel_ahci_priv *dpriv)
1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054
{
	size_t use_tail;

	if (dpriv->flags & LOCK_TAIL) {
		/* current command slot is not ready yet, use previous */
		use_tail = get_prev_slot(dpriv);
	} else {
		use_tail = dpriv->tail_ptr;
	}
	if (dpriv->head_ptr != use_tail)
		return 0;
	else
		return 1;
}

1055
/** Get command slot before the last one filled in */
1056
static size_t get_prev_slot(const struct elphel_ahci_priv *dpriv)
1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070
{
	size_t slot;

	if (dpriv->tail_ptr == dpriv->head_ptr)
		return dpriv->tail_ptr;

	if (dpriv->tail_ptr != 0) {
		slot = dpriv->tail_ptr - 1;
	} else {
		slot = MAX_CMD_SLOTS - 1;
	}
	return slot;
}

1071 1072 1073 1074 1075
/** Get and enqueue new command */
static ssize_t rawdev_write(struct device *dev,  ///< device structure associated with the driver
		struct device_attribute *attr,           ///< interface for device attributes
		const char *buff,                        ///< buffer containing new command
		size_t buff_sz)                          ///< the size of the command buffer
1076
{
1077
	ssize_t rcvd = 0;
1078
	bool proceed = false;
1079
	unsigned long irq_flags;
1080 1081
	struct elphel_ahci_priv *dpriv = dev_get_dpriv(dev);
	struct frame_data fdata;
1082 1083
	struct frame_buffers *buffs;
	struct fvec *chunks;
1084

1085
	/* simple check if we've got the right command */
1086
	if (buff_sz != sizeof(struct frame_data)) {
1087
		dev_err(dev, "the size of the data buffer is incorrect, should be equal to sizeof(struct frame_data)\n");
1088 1089 1090
		return -EINVAL;
	}
	memcpy(&fdata, buff, sizeof(struct frame_data));
1091

1092
	/* lock disk resource as soon as possible */
1093
	spin_lock_irqsave(&dpriv->flags_lock, irq_flags);
1094 1095 1096
	if ((dpriv->flags & DISK_BUSY) == 0) {
		dpriv->flags |= DISK_BUSY;
		proceed = true;
1097 1098 1099
	}
	spin_unlock_irqrestore(&dpriv->flags_lock, irq_flags);

1100
	if (fdata.cmd & DRV_CMD_FINISH) {
1101 1102
		if ((dpriv->flags & PROC_CMD) == 0 && proceed) {
			finish_rec(dpriv);
1103 1104 1105
		} else {
			dpriv->flags |= DELAYED_FINISH;
		}
1106 1107
		return buff_sz;
	}
1108

1109
	if (move_tail(dpriv) == -1) {
1110
		/* we are not ready yet because command queue is full */
1111
		printk_ratelimited(KERN_DEBUG "command queue is full, flags = %u, proceed = %d\n", dpriv->flags, proceed);
1112 1113
		return -EAGAIN;
	}
1114
	chunks = dpriv->data_chunks[dpriv->tail_ptr];
1115
	buffs = &dpriv->fbuffs[dpriv->tail_ptr];
1116

1117
	dev_dbg(dev, "process frame from sensor port: %u, command = %d, flags = %u\n", fdata.sensor_port, fdata.cmd, dpriv->flags);
1118 1119 1120 1121
	if (fdata.cmd & DRV_CMD_EXIF) {
		rcvd = exif_get_data(fdata.sensor_port, fdata.meta_index, buffs->exif_buff.iov_base, buffs->exif_buff.iov_len);
		chunks[CHUNK_EXIF].iov_len = rcvd;
	}
1122

1123
	rcvd = jpeghead_get_data(fdata.sensor_port, buffs->jpheader_buff.iov_base, buffs->jpheader_buff.iov_len, 0);
1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136
	if (rcvd < 0) {
		/* free resource lock and current command slot */
		if (proceed) {
			spin_lock_irqsave(&dpriv->flags_lock, irq_flags);
			dpriv->flags &= ~DISK_BUSY;
			spin_unlock_irqrestore(&dpriv->flags_lock, irq_flags);
		}
		reset_chunks(chunks, 0);
		dpriv->tail_ptr = get_prev_slot(dpriv);
		dpriv->flags &= ~LOCK_TAIL;
		dev_err(dev, "could not get JPEG header, error %d\n", rcvd);
		return -EINVAL;
	}
1137 1138 1139
	chunks[CHUNK_LEADER].iov_len = JPEG_MARKER_LEN;
	chunks[CHUNK_TRAILER].iov_len = JPEG_MARKER_LEN;
	chunks[CHUNK_HEADER].iov_len = rcvd - chunks[CHUNK_LEADER].iov_len;
1140

1141
	rcvd = circbuf_get_ptr(fdata.sensor_port, fdata.cirbuf_ptr, fdata.jpeg_len, &chunks[CHUNK_DATA_0], &chunks[CHUNK_DATA_1]);
1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154
	if (rcvd < 0) {
		/* free resource lock and current command slot */
		if (proceed) {
			spin_lock_irqsave(&dpriv->flags_lock, irq_flags);
			dpriv->flags &= ~DISK_BUSY;
			spin_unlock_irqrestore(&dpriv->flags_lock, irq_flags);
		}
		reset_chunks(chunks, 0);
		dpriv->tail_ptr = get_prev_slot(dpriv);
		dpriv->flags &= ~LOCK_TAIL;
		dev_err(dev, "could not get JPEG data, error %d\n", rcvd);
		return -EINVAL;
	}
1155
	align_frame(dpriv);
1156
	/* new command slot is ready now and can be unlocked */
1157
	reset_flag(dpriv, LOCK_TAIL);
1158

1159 1160 1161 1162 1163 1164 1165 1166 1167 1168
	if (!proceed) {
		/* disk may be free by the moment, try to grab it */
		spin_lock_irqsave(&dpriv->flags_lock, irq_flags);
		if ((dpriv->flags & DISK_BUSY) == 0) {
			dpriv->flags |= DISK_BUSY;
			proceed = true;
		}
		spin_unlock_irqrestore(&dpriv->flags_lock, irq_flags);
	}
	if ((dpriv->flags & PROC_CMD) == 0 && proceed) {
1169
		if (get_size_from(dpriv->data_chunks[dpriv->head_ptr], 0, 0, EXCLUDE_REM) == 0)
1170
			move_head(dpriv);
1171
		process_cmd(dpriv);
1172
	}
1173 1174 1175 1176 1177 1178 1179

	return buff_sz;
}

/** Prepare software constructed command FIS in command table area. The structure of the
 * command FIS is described in Transport Layer chapter of Serial ATA revision 3.1 documentation.
 */
1180 1181 1182 1183
static inline void prep_cfis(uint8_t *cmd_tbl,   ///< pointer to the beginning of command table
		uint8_t cmd,                             ///< ATA command as described in ATA/ATAPI command set
		uint64_t start_addr,                     ///< LBA start address
		uint16_t count)                          ///< sector count, the number of 512 byte sectors to read or write
1184
		                                         ///< @return None
1185
{
1186
	uint8_t device, ctrl;
1187 1188 1189 1190 1191

	/* select the content of Device and Control registers based on command, read the description of
	 * a command in ATA/ATAPI command set documentation
	 */
	switch (cmd) {
1192
	case ATA_CMD_WRITE:
1193 1194 1195 1196 1197 1198 1199 1200
	case ATA_CMD_READ:
		device = 0xe0 | ((start_addr >> 24) & 0x0f);
		ctrl = 0x08;
		/* this is 28-bit command; 4 bits of the address have already been
		 * placed to Device register, invalidate the remaining (if any) upper
		 * bits of the address and leave only 24 significant bits (just in case)
		 */
		start_addr &= 0xffffff;
1201
		count &= 0xff;
1202
		break;
1203
	case ATA_CMD_WRITE_EXT:
1204 1205 1206 1207 1208
	case ATA_CMD_READ_EXT:
		device = 0xe0;
		ctrl = 0x08;
		break;
	default:
1209 1210
		device = 0xe0;
		ctrl = 0x08;
1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230
	}

	cmd_tbl[0] = 0x27;                       // H2D register FIS
	cmd_tbl[1] = 0x80;                       // set C = 1
	cmd_tbl[2] = cmd;                        // ATA READ or WRITE DMA command as described in ATA/ATAPI command set
	cmd_tbl[3] = 0;                          // features(7:0)
	cmd_tbl[4] = start_addr & 0xff;          // LBA(7:0)
	cmd_tbl[5] = (start_addr >> 8)  & 0xff;  // LBA(15:8)
	cmd_tbl[6] = (start_addr >> 16) & 0xff;  // LBA(23:16)
	cmd_tbl[7] = device;                     // device
	cmd_tbl[8] = (start_addr >> 24)  & 0xff; // LBA(31:24)
	cmd_tbl[9] = (start_addr >> 32)  & 0xff; // LBA(39:32)
	cmd_tbl[10] = (start_addr >> 40) & 0xff; // LBA(47:40)
	cmd_tbl[11] = 0;                         // features(15:8)
	cmd_tbl[12] = count & 0xff;              // count(7:0)
	cmd_tbl[13] = (count >> 8) & 0xff;       // count(15:8)
	cmd_tbl[14] = 0;                         // ICC (isochronous command completion)
	cmd_tbl[15] = ctrl;                      // control
}

1231
/** Map S/G list to physical region descriptor table in AHCI controller command table */
1232
static inline void prep_prdt(struct fvec *sgl,   ///< pointer to S/G list which should be mapped to physical
1233 1234 1235 1236 1237 1238 1239
		                                         ///< region description table
		unsigned int n_elem,                     ///< the number of elements in @e sgl
		struct ahci_sg *ahci_sgl)                ///< pointer to physical region description table
		                                         ///< @return None
{
	unsigned int num = 0;

1240 1241 1242 1243
	for (num = 0; num < n_elem; num++) {
		ahci_sgl[num].addr = cpu_to_le32(sgl[num].iov_dma & 0xffffffff);
		ahci_sgl[num].addr_hi = cpu_to_le32((sgl[num].iov_dma >> 16) >> 16);
		ahci_sgl[num].flags_size = cpu_to_le32(sgl[num].iov_len - 1);
1244 1245 1246
	}
}

1247 1248 1249 1250
/** Prepare and issue read or write command */
static void elphel_cmd_issue(struct ata_port *ap,///< device port for which the command should be issued
		uint64_t start,                          ///< LBA start address
		uint16_t count,                          ///< the number of sectors to read or write
1251
		struct fvec *sgl,                        ///< S/G list pointing to data buffers
1252 1253 1254 1255
		unsigned int elem,                       ///< the number of elements in @e sgl
		uint8_t cmd)                             ///< the command to be issued; should be ATA_CMD_READ, ATA_CMD_READ_EXT,
		                                         ///< ATA_CMD_WRITE or ATA_CMD_WRITE_EXT, other commands are not tested
		                                         ///< @return None
1256
{
1257 1258
	uint32_t opts;
	uint8_t *cmd_tbl;
1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270
	unsigned int slot_num = 0;
	struct ahci_port_priv *pp = ap->private_data;
	struct ahci_host_priv *hpriv = ap->host->private_data;
	struct elphel_ahci_priv *dpriv = hpriv->plat_data;
	struct ahci_sg *ahci_sg;
	void __iomem *port_mmio = ahci_port_base(ap);

	dpriv->flags |= IRQ_SIMPLE;

	/* prepare command FIS */
	dma_sync_single_for_cpu(ap->dev, pp->cmd_tbl_dma, AHCI_CMD_TBL_AR_SZ, DMA_TO_DEVICE);
	cmd_tbl = pp->cmd_tbl + slot_num * AHCI_CMD_TBL_SZ;
1271
	prep_cfis(cmd_tbl, cmd, start, count);
1272 1273 1274

	/* prepare physical region descriptor table */
	ahci_sg = pp->cmd_tbl + slot_num * AHCI_CMD_TBL_SZ + AHCI_CMD_TBL_HDR_SZ;
1275
	prep_prdt(sgl, elem, ahci_sg);
1276 1277

	/* prepare command header */
1278 1279 1280
	opts = CMD_FIS_LEN | (elem << 16) | AHCI_CMD_PREFETCH | AHCI_CMD_CLR_BUSY;
	if (cmd == ATA_CMD_WRITE || cmd == ATA_CMD_WRITE_EXT)
		opts |= AHCI_CMD_WRITE;
1281
	ahci_fill_cmd_slot(pp, slot_num, opts);
1282

1283 1284
	dev_dbg(ap->dev, "dump command table content, first %d bytes, phys addr = 0x%x:\n", 16, pp->cmd_tbl_dma);
	print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, pp->cmd_tbl, 16);
1285

1286 1287 1288
	dma_sync_single_for_device(ap->dev, pp->cmd_tbl_dma, AHCI_CMD_TBL_AR_SZ, DMA_TO_DEVICE);

	/* issue command */
1289
	writel(0x11, port_mmio + PORT_CMD);
1290
	writel(1 << slot_num, port_mmio + PORT_CMD_ISSUE);
1291
}
1292

1293
/** Defer system command if internal command queue is not empty */
1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306
static int elphel_qc_defer(struct ata_queued_cmd *qc)
{
	int ret;
	unsigned long irq_flags;
	struct elphel_ahci_priv *dpriv = dev_get_dpriv(qc->ap->dev);

	/* First apply the usual rules */
	ret = ata_std_qc_defer(qc);
	if (ret != 0)
		return ret;

	/* And now check if internal command is in progress */
	spin_lock_irqsave(&dpriv->flags_lock, irq_flags);
1307
	if ((dpriv->flags & DISK_BUSY) || is_cmdq_empty(dpriv) == 0) {
1308 1309
		ret = ATA_DEFER_LINK;
	} else {
1310
		dpriv->flags |= DISK_BUSY;
1311 1312 1313 1314 1315 1316
	}
	spin_unlock_irqrestore(&dpriv->flags_lock, irq_flags);

	return ret;
}

1317
/** Return the stating position of disk buffer (in LBA) */
1318 1319 1320 1321 1322 1323 1324
static ssize_t lba_start_read(struct device *dev, struct device_attribute *attr, char *buff)
{
	struct ata_host *host = dev_get_drvdata(dev);
	struct ahci_host_priv *hpriv = host->private_data;
	struct elphel_ahci_priv *dpriv = hpriv->plat_data;

	return snprintf(buff, 20, "%llu\n", dpriv->lba_ptr.lba_start);
1325 1326
}

1327
/** Set the starting position of disk buffer (in LBA) */
1328
static ssize_t lba_start_write(struct device *dev, struct device_attribute *attr, const char *buff, size_t buff_sz)
1329
{
1330 1331
	struct ata_host *host = dev_get_drvdata(dev);
	struct ahci_host_priv *hpriv = host->private_data;
1332 1333
	struct elphel_ahci_priv *dpriv = hpriv->plat_data;

1334 1335
	if (kstrtoull(buff, 10, &dpriv->lba_ptr.lba_start) != 0)
		return -EINVAL;
1336

1337 1338
	if (dpriv->lba_ptr.lba_write < dpriv->lba_ptr.lba_start)
		dpriv->lba_ptr.lba_write = dpriv->lba_ptr.lba_start;
1339

1340 1341
	return buff_sz;
}
1342

1343
/** Return the ending position of disk buffer (in LBA) */
1344 1345 1346 1347 1348
static ssize_t lba_end_read(struct device *dev, struct device_attribute *attr, char *buff)
{
	struct ata_host *host = dev_get_drvdata(dev);
	struct ahci_host_priv *hpriv = host->private_data;
	struct elphel_ahci_priv *dpriv = hpriv->plat_data;
1349

1350 1351
	return snprintf(buff, 20, "%llu\n", dpriv->lba_ptr.lba_end);
}
1352

1353
/** Set the ending position of disk buffer (in LBA) */
1354 1355 1356 1357 1358
static ssize_t lba_end_write(struct device *dev, struct device_attribute *attr, const char *buff, size_t buff_sz)
{
	struct ata_host *host = dev_get_drvdata(dev);
	struct ahci_host_priv *hpriv = host->private_data;
	struct elphel_ahci_priv *dpriv = hpriv->plat_data;
1359

1360 1361
	if (kstrtoull(buff, 10, &dpriv->lba_ptr.lba_end) != 0)
		return -EINVAL;
1362

1363 1364 1365 1366
	if (dpriv->lba_ptr.lba_write > dpriv->lba_ptr.lba_end)
		dpriv->lba_ptr.lba_write = dpriv->lba_ptr.lba_end;

	return buff_sz;
1367 1368
}

1369
/** Return the current position of write pointer (in LBA) */
1370
static ssize_t lba_current_read(struct device *dev, struct device_attribute *attr, char *buff)
1371
{
1372
	struct ata_host *host = dev_get_drvdata(dev);
1373 1374 1375
	struct ahci_host_priv *hpriv = host->private_data;
	struct elphel_ahci_priv *dpriv = hpriv->