Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
L
linux-elphel
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Commits
Open sidebar
Elphel
linux-elphel
Commits
09fa190d
Commit
09fa190d
authored
Jun 14, 2023
by
Andrey Filippov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
making single-command write
parent
8209132f
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
915 additions
and
2 deletions
+915
-2
bio.c
src/block/bio.c
+17
-1
blk-merge.c
src/block/blk-merge.c
+897
-0
blk-mq.c
src/block/blk-mq.c
+1
-1
No files found.
src/block/bio.c
View file @
09fa190d
...
@@ -852,7 +852,23 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
...
@@ -852,7 +852,23 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
size_t
offset
;
size_t
offset
;
ssize_t
size
;
ssize_t
size
;
if
(
bio
->
bi_disk
&&
(
bio
->
bi_disk
->
major
==
8
))
{
// sda, sda*
if
(
bio
->
bi_disk
&&
(
bio
->
bi_disk
->
major
==
8
))
{
// sda, sda*
pr_debug
(
" *A* nr_pages=0x%04x,bio->bi_max_vecs=0x%04x, bio->bi_vcnt==0x%04x"
,
nr_pages
,
bio
->
bi_max_vecs
,
bio
->
bi_vcnt
);
// *A* nr_pages=0x009f,bio->bi_max_vecs=0x009f, bio->bi_vcnt==0x0000
pr_debug
(
" *A* nr_pages=0x%04x,bio->bi_max_vecs=0x%04x, bio->bi_vcnt=0x%04x"
,
nr_pages
,
bio
->
bi_max_vecs
,
bio
->
bi_vcnt
);
// *A* nr_pages=0x009f,bio->bi_max_vecs=0x009f, bio->bi_vcnt==0x0000
pr_debug
(
" *A0* max_hw_sectors=%d, max_dev_sectors=%d, max_sectors=%d, cluster=%d, max_segment_size=0x%08x"
,
\
bio
->
bi_disk
->
queue
->
limits
.
max_hw_sectors
,
bio
->
bi_disk
->
queue
->
limits
.
max_dev_sectors
,
bio
->
bi_disk
->
queue
->
limits
.
max_sectors
,
bio
->
bi_disk
->
queue
->
limits
.
cluster
,
bio
->
bi_disk
->
queue
->
limits
.
max_segment_size
);
// For now - just testing, not clear who/what sets // bio:__bio_iov_iter_get_pages:857: *A0* max_hw_sectors=65535, max_dev_sectors=65535, max_sectors=2560
// limited by BLK_DEF_MAX_SECTORS = 2560!
/* blk_queue_cluster(q) return q->limits.cluster; q->limits.max_segment_size
https://patchwork.kernel.org/project/linux-block/patch/21cf85d32278bbe5acbc3def0a6db75db98a2670.1459269590.git.shli@fb.com/
bio_alloc_bioset() allocates bvecs from bvec_slabs which can only
allocate maximum 256 bvec (eg, 1M for 4k pages). We can't bump
BLK_DEF_MAX_SECTORS to exceed this value otherwise bio_alloc_bioset will
fail.
*/
blk_queue_max_segment_size
(
bio
->
bi_disk
->
queue
,
0x100000
);
// blk_queue_max_hw_sectors(bio->bi_disk->queue, bio->bi_disk->queue->limits.max_hw_sectors); // still used constant
bio
->
bi_disk
->
queue
->
limits
.
max_sectors
=
min
(
bio
->
bi_disk
->
queue
->
limits
.
max_hw_sectors
,
bio
->
bi_disk
->
queue
->
limits
.
max_dev_sectors
);
pr_debug
(
" *A1* max_hw_sectors=%d, max_dev_sectors=%d, max_sectors=%d, cluster=%d, max_segment_size=0x%08x"
,
\
bio
->
bi_disk
->
queue
->
limits
.
max_hw_sectors
,
bio
->
bi_disk
->
queue
->
limits
.
max_dev_sectors
,
bio
->
bi_disk
->
queue
->
limits
.
max_sectors
,
bio
->
bi_disk
->
queue
->
limits
.
cluster
,
bio
->
bi_disk
->
queue
->
limits
.
max_segment_size
);
pr_debug
(
" *B* niter->type=%d, iter->iov_offset=0x%08x, iter->count=0x%08x, iter->nr_segs=0x%08lx"
,
\
pr_debug
(
" *B* niter->type=%d, iter->iov_offset=0x%08x, iter->count=0x%08x, iter->nr_segs=0x%08lx"
,
\
iter
->
type
,
iter
->
iov_offset
,
iter
->
count
,
iter
->
nr_segs
);
// *B* niter->type=1, iter->iov_offset=0x00000000, iter->count=0x0009f000, iter->nr_segs=0x00000001
iter
->
type
,
iter
->
iov_offset
,
iter
->
count
,
iter
->
nr_segs
);
// *B* niter->type=1, iter->iov_offset=0x00000000, iter->count=0x0009f000, iter->nr_segs=0x00000001
}
}
...
...
src/block/blk-merge.c
0 → 100644
View file @
09fa190d
// SPDX-License-Identifier: GPL-2.0
/*
* Functions related to segment and merge handling
*/
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
#include <linux/scatterlist.h>
#include <trace/events/block.h>
#include "blk.h"
static
struct
bio
*
blk_bio_discard_split
(
struct
request_queue
*
q
,
struct
bio
*
bio
,
struct
bio_set
*
bs
,
unsigned
*
nsegs
)
{
unsigned
int
max_discard_sectors
,
granularity
;
int
alignment
;
sector_t
tmp
;
unsigned
split_sectors
;
*
nsegs
=
1
;
/* Zero-sector (unknown) and one-sector granularities are the same. */
granularity
=
max
(
q
->
limits
.
discard_granularity
>>
9
,
1U
);
max_discard_sectors
=
min
(
q
->
limits
.
max_discard_sectors
,
UINT_MAX
>>
9
);
max_discard_sectors
-=
max_discard_sectors
%
granularity
;
if
(
unlikely
(
!
max_discard_sectors
))
{
/* XXX: warn */
return
NULL
;
}
if
(
bio_sectors
(
bio
)
<=
max_discard_sectors
)
return
NULL
;
split_sectors
=
max_discard_sectors
;
/*
* If the next starting sector would be misaligned, stop the discard at
* the previous aligned sector.
*/
alignment
=
(
q
->
limits
.
discard_alignment
>>
9
)
%
granularity
;
tmp
=
bio
->
bi_iter
.
bi_sector
+
split_sectors
-
alignment
;
tmp
=
sector_div
(
tmp
,
granularity
);
if
(
split_sectors
>
tmp
)
split_sectors
-=
tmp
;
return
bio_split
(
bio
,
split_sectors
,
GFP_NOIO
,
bs
);
}
static
struct
bio
*
blk_bio_write_zeroes_split
(
struct
request_queue
*
q
,
struct
bio
*
bio
,
struct
bio_set
*
bs
,
unsigned
*
nsegs
)
{
*
nsegs
=
1
;
if
(
!
q
->
limits
.
max_write_zeroes_sectors
)
return
NULL
;
if
(
bio_sectors
(
bio
)
<=
q
->
limits
.
max_write_zeroes_sectors
)
return
NULL
;
return
bio_split
(
bio
,
q
->
limits
.
max_write_zeroes_sectors
,
GFP_NOIO
,
bs
);
}
static
struct
bio
*
blk_bio_write_same_split
(
struct
request_queue
*
q
,
struct
bio
*
bio
,
struct
bio_set
*
bs
,
unsigned
*
nsegs
)
{
*
nsegs
=
1
;
if
(
!
q
->
limits
.
max_write_same_sectors
)
return
NULL
;
if
(
bio_sectors
(
bio
)
<=
q
->
limits
.
max_write_same_sectors
)
return
NULL
;
return
bio_split
(
bio
,
q
->
limits
.
max_write_same_sectors
,
GFP_NOIO
,
bs
);
}
static
inline
unsigned
get_max_io_size
(
struct
request_queue
*
q
,
struct
bio
*
bio
)
{
unsigned
sectors
=
blk_max_size_offset
(
q
,
bio
->
bi_iter
.
bi_sector
);
unsigned
mask
=
queue_logical_block_size
(
q
)
-
1
;
/* aligned to logical block size */
sectors
&=
~
(
mask
>>
9
);
return
sectors
;
}
static
struct
bio
*
blk_bio_segment_split
(
struct
request_queue
*
q
,
struct
bio
*
bio
,
struct
bio_set
*
bs
,
unsigned
*
segs
)
{
struct
bio_vec
bv
,
bvprv
,
*
bvprvp
=
NULL
;
struct
bvec_iter
iter
;
unsigned
seg_size
=
0
,
nsegs
=
0
,
sectors
=
0
;
unsigned
front_seg_size
=
bio
->
bi_seg_front_size
;
bool
do_split
=
true
;
struct
bio
*
new
=
NULL
;
const
unsigned
max_sectors
=
get_max_io_size
(
q
,
bio
);
bio_for_each_segment
(
bv
,
bio
,
iter
)
{
if
(
bio
->
bi_disk
&&
(
bio
->
bi_disk
->
major
==
8
))
{
// sda, sda*
pr_debug
(
" *X3* sectors=0x%08x, max_sectors=0x%08x, (bv.bv_len >> 9)=0x%08x"
,
sectors
,
max_sectors
,
bv
.
bv_len
>>
9
);
}
/*
* If the queue doesn't support SG gaps and adding this
* offset would create a gap, disallow it.
*/
if
(
bvprvp
&&
bvec_gap_to_prev
(
q
,
bvprvp
,
bv
.
bv_offset
))
{
if
(
bio
->
bi_disk
&&
(
bio
->
bi_disk
->
major
==
8
))
{
// sda, sda* // never
pr_debug
(
" *X4* "
);
}
goto
split
;
}
if
(
sectors
+
(
bv
.
bv_len
>>
9
)
>
max_sectors
)
{
if
(
bio
->
bi_disk
&&
(
bio
->
bi_disk
->
major
==
8
))
{
// sda, sda* // never
pr_debug
(
" *X5* "
);
}
/*
* Consider this a new segment if we're splitting in
* the middle of this vector.
*/
if
(
nsegs
<
queue_max_segments
(
q
)
&&
sectors
<
max_sectors
)
{
nsegs
++
;
sectors
=
max_sectors
;
}
goto
split
;
}
if
(
bvprvp
&&
blk_queue_cluster
(
q
))
{
// first - skip
if
(
bio
->
bi_disk
&&
(
bio
->
bi_disk
->
major
==
8
))
{
// sda, sda*
pr_debug
(
" *X6* "
);
}
if
(
seg_size
+
bv
.
bv_len
>
queue_max_segment_size
(
q
))
{
if
(
bio
->
bi_disk
&&
(
bio
->
bi_disk
->
major
==
8
))
{
// sda, sda* // here!
pr_debug
(
" *X61* "
);
}
goto
new_segment
;
}
if
(
!
BIOVEC_PHYS_MERGEABLE
(
bvprvp
,
&
bv
))
{
if
(
bio
->
bi_disk
&&
(
bio
->
bi_disk
->
major
==
8
))
{
// sda, sda*
pr_debug
(
" *X62* "
);
}
goto
new_segment
;
}
if
(
!
BIOVEC_SEG_BOUNDARY
(
q
,
bvprvp
,
&
bv
))
{
if
(
bio
->
bi_disk
&&
(
bio
->
bi_disk
->
major
==
8
))
{
// sda, sda*
pr_debug
(
" *X63* "
);
}
goto
new_segment
;
}
if
(
bio
->
bi_disk
&&
(
bio
->
bi_disk
->
major
==
8
))
{
// sda, sda*
pr_debug
(
" *X7* seg_size=0x%08x, bv.bv_len=0x%08x, blk_queue_cluster(q)=0x%x"
,
seg_size
,
bv
.
bv_len
,
blk_queue_cluster
(
q
));
}
seg_size
+=
bv
.
bv_len
;
bvprv
=
bv
;
bvprvp
=
&
bvprv
;
sectors
+=
bv
.
bv_len
>>
9
;
continue
;
}
new_segment:
if
(
bio
->
bi_disk
&&
(
bio
->
bi_disk
->
major
==
8
))
{
// sda, sda*
pr_debug
(
" *X8* nsegs=0x%08x"
,
nsegs
);
}
if
(
nsegs
==
queue_max_segments
(
q
))
goto
split
;
if
(
nsegs
==
1
&&
seg_size
>
front_seg_size
)
front_seg_size
=
seg_size
;
nsegs
++
;
bvprv
=
bv
;
bvprvp
=
&
bvprv
;
seg_size
=
bv
.
bv_len
;
sectors
+=
bv
.
bv_len
>>
9
;
}
do_split
=
false
;
split:
if
(
bio
->
bi_disk
&&
(
bio
->
bi_disk
->
major
==
8
))
{
// sda, sda*
pr_debug
(
" *X9* nsegs=0x%08x, do_split=%d, sectors=0x%08x"
,
nsegs
,
do_split
,
sectors
);
}
*
segs
=
nsegs
;
if
(
do_split
)
{
new
=
bio_split
(
bio
,
sectors
,
GFP_NOIO
,
bs
);
if
(
new
)
bio
=
new
;
}
if
(
nsegs
==
1
&&
seg_size
>
front_seg_size
)
front_seg_size
=
seg_size
;
bio
->
bi_seg_front_size
=
front_seg_size
;
if
(
seg_size
>
bio
->
bi_seg_back_size
)
bio
->
bi_seg_back_size
=
seg_size
;
return
do_split
?
new
:
NULL
;
}
void
blk_queue_split
(
struct
request_queue
*
q
,
struct
bio
**
bio
)
{
struct
bio
*
split
,
*
res
;
unsigned
nsegs
;
if
((
*
bio
)
->
bi_disk
&&
((
*
bio
)
->
bi_disk
->
major
==
8
))
{
// sda, sda*
pr_debug
(
" *X* (*bio)->bi_flags=0x%04x, (*bio)->bi_phys_segments=0x%08x, bio_op(*bio)=%d, get_max_io_size(q, *bio)=%d"
,
\
(
*
bio
)
->
bi_flags
,
(
*
bio
)
->
bi_phys_segments
,
bio_op
(
*
bio
),
get_max_io_size
(
q
,
*
bio
));
// bio_op(*bio) = 1
pr_debug
(
" *X1* mask =0x%08x, blk_max_size_offset(...) = 0x%08x"
,
\
queue_logical_block_size
(
q
)
-
1
,
blk_max_size_offset
(
q
,
(
*
bio
)
->
bi_iter
.
bi_sector
));
pr_debug
(
" *X2* q->limits.chunk_sectors =0x%08x, q->limits.max_sectors = 0x%08x, bio->bi_seg_front_size=0x%08x, bio->bi_seg_back_size=0x%08x"
,
\
q
->
limits
.
chunk_sectors
,
q
->
limits
.
max_sectors
,
(
*
bio
)
->
bi_seg_front_size
,
(
*
bio
)
->
bi_seg_back_size
);
}
//const unsigned max_sectors = get_max_io_size(q, bio);
switch
(
bio_op
(
*
bio
))
{
case
REQ_OP_DISCARD
:
case
REQ_OP_SECURE_ERASE
:
split
=
blk_bio_discard_split
(
q
,
*
bio
,
&
q
->
bio_split
,
&
nsegs
);
break
;
case
REQ_OP_WRITE_ZEROES
:
split
=
blk_bio_write_zeroes_split
(
q
,
*
bio
,
&
q
->
bio_split
,
&
nsegs
);
break
;
case
REQ_OP_WRITE_SAME
:
split
=
blk_bio_write_same_split
(
q
,
*
bio
,
&
q
->
bio_split
,
&
nsegs
);
break
;
default:
split
=
blk_bio_segment_split
(
q
,
*
bio
,
&
q
->
bio_split
,
&
nsegs
);
break
;
}
/* physical segments can be figured out during splitting */
res
=
split
?
split
:
*
bio
;
res
->
bi_phys_segments
=
nsegs
;
bio_set_flag
(
res
,
BIO_SEG_VALID
);
if
((
*
bio
)
->
bi_disk
&&
((
*
bio
)
->
bi_disk
->
major
==
8
))
{
// sda, sda*
pr_debug
(
" *Y* split = 0x%p, res->bi_phys_segments=%d"
,
split
,
res
->
bi_phys_segments
);
// null
}
if
(
split
)
{
/* there isn't chance to merge the splitted bio */
split
->
bi_opf
|=
REQ_NOMERGE
;
/*
* Since we're recursing into make_request here, ensure
* that we mark this bio as already having entered the queue.
* If not, and the queue is going away, we can get stuck
* forever on waiting for the queue reference to drop. But
* that will never happen, as we're already holding a
* reference to it.
*/
bio_set_flag
(
*
bio
,
BIO_QUEUE_ENTERED
);
bio_chain
(
split
,
*
bio
);
trace_block_split
(
q
,
split
,
(
*
bio
)
->
bi_iter
.
bi_sector
);
generic_make_request
(
*
bio
);
*
bio
=
split
;
}
}
EXPORT_SYMBOL
(
blk_queue_split
);
static
unsigned
int
__blk_recalc_rq_segments
(
struct
request_queue
*
q
,
struct
bio
*
bio
,
bool
no_sg_merge
)
{
struct
bio_vec
bv
,
bvprv
=
{
NULL
};
int
cluster
,
prev
=
0
;
unsigned
int
seg_size
,
nr_phys_segs
;
struct
bio
*
fbio
,
*
bbio
;
struct
bvec_iter
iter
;
if
(
!
bio
)
return
0
;
switch
(
bio_op
(
bio
))
{
case
REQ_OP_DISCARD
:
case
REQ_OP_SECURE_ERASE
:
case
REQ_OP_WRITE_ZEROES
:
return
0
;
case
REQ_OP_WRITE_SAME
:
return
1
;
}
fbio
=
bio
;
cluster
=
blk_queue_cluster
(
q
);
seg_size
=
0
;
nr_phys_segs
=
0
;
for_each_bio
(
bio
)
{
bio_for_each_segment
(
bv
,
bio
,
iter
)
{
/*
* If SG merging is disabled, each bio vector is
* a segment
*/
if
(
no_sg_merge
)
goto
new_segment
;
if
(
prev
&&
cluster
)
{
if
(
seg_size
+
bv
.
bv_len
>
queue_max_segment_size
(
q
))
goto
new_segment
;
if
(
!
BIOVEC_PHYS_MERGEABLE
(
&
bvprv
,
&
bv
))
goto
new_segment
;
if
(
!
BIOVEC_SEG_BOUNDARY
(
q
,
&
bvprv
,
&
bv
))
goto
new_segment
;
seg_size
+=
bv
.
bv_len
;
bvprv
=
bv
;
continue
;
}
new_segment:
if
(
nr_phys_segs
==
1
&&
seg_size
>
fbio
->
bi_seg_front_size
)
fbio
->
bi_seg_front_size
=
seg_size
;
nr_phys_segs
++
;
bvprv
=
bv
;
prev
=
1
;
seg_size
=
bv
.
bv_len
;
}
bbio
=
bio
;
}
if
(
nr_phys_segs
==
1
&&
seg_size
>
fbio
->
bi_seg_front_size
)
fbio
->
bi_seg_front_size
=
seg_size
;
if
(
seg_size
>
bbio
->
bi_seg_back_size
)
bbio
->
bi_seg_back_size
=
seg_size
;
return
nr_phys_segs
;
}
void
blk_recalc_rq_segments
(
struct
request
*
rq
)
{
bool
no_sg_merge
=
!!
test_bit
(
QUEUE_FLAG_NO_SG_MERGE
,
&
rq
->
q
->
queue_flags
);
rq
->
nr_phys_segments
=
__blk_recalc_rq_segments
(
rq
->
q
,
rq
->
bio
,
no_sg_merge
);
}
void
blk_recount_segments
(
struct
request_queue
*
q
,
struct
bio
*
bio
)
{
unsigned
short
seg_cnt
;
/* estimate segment number by bi_vcnt for non-cloned bio */
if
(
bio_flagged
(
bio
,
BIO_CLONED
))
seg_cnt
=
bio_segments
(
bio
);
else
seg_cnt
=
bio
->
bi_vcnt
;
if
(
test_bit
(
QUEUE_FLAG_NO_SG_MERGE
,
&
q
->
queue_flags
)
&&
(
seg_cnt
<
queue_max_segments
(
q
)))
bio
->
bi_phys_segments
=
seg_cnt
;
else
{
struct
bio
*
nxt
=
bio
->
bi_next
;
bio
->
bi_next
=
NULL
;
bio
->
bi_phys_segments
=
__blk_recalc_rq_segments
(
q
,
bio
,
false
);
bio
->
bi_next
=
nxt
;
}
bio_set_flag
(
bio
,
BIO_SEG_VALID
);
}
EXPORT_SYMBOL
(
blk_recount_segments
);
static
int
blk_phys_contig_segment
(
struct
request_queue
*
q
,
struct
bio
*
bio
,
struct
bio
*
nxt
)
{
struct
bio_vec
end_bv
=
{
NULL
},
nxt_bv
;
if
(
!
blk_queue_cluster
(
q
))
return
0
;
if
(
bio
->
bi_seg_back_size
+
nxt
->
bi_seg_front_size
>
queue_max_segment_size
(
q
))
return
0
;
if
(
!
bio_has_data
(
bio
))
return
1
;
bio_get_last_bvec
(
bio
,
&
end_bv
);
bio_get_first_bvec
(
nxt
,
&
nxt_bv
);
if
(
!
BIOVEC_PHYS_MERGEABLE
(
&
end_bv
,
&
nxt_bv
))
return
0
;
/*
* bio and nxt are contiguous in memory; check if the queue allows
* these two to be merged into one
*/
if
(
BIOVEC_SEG_BOUNDARY
(
q
,
&
end_bv
,
&
nxt_bv
))
return
1
;
return
0
;
}
static
inline
void
__blk_segment_map_sg
(
struct
request_queue
*
q
,
struct
bio_vec
*
bvec
,
struct
scatterlist
*
sglist
,
struct
bio_vec
*
bvprv
,
struct
scatterlist
**
sg
,
int
*
nsegs
,
int
*
cluster
)
{
int
nbytes
=
bvec
->
bv_len
;
if
(
*
sg
&&
*
cluster
)
{
if
((
*
sg
)
->
length
+
nbytes
>
queue_max_segment_size
(
q
))
goto
new_segment
;
if
(
!
BIOVEC_PHYS_MERGEABLE
(
bvprv
,
bvec
))
goto
new_segment
;
if
(
!
BIOVEC_SEG_BOUNDARY
(
q
,
bvprv
,
bvec
))
goto
new_segment
;
(
*
sg
)
->
length
+=
nbytes
;
}
else
{
new_segment:
if
(
!*
sg
)
*
sg
=
sglist
;
else
{
/*
* If the driver previously mapped a shorter
* list, we could see a termination bit
* prematurely unless it fully inits the sg
* table on each mapping. We KNOW that there
* must be more entries here or the driver
* would be buggy, so force clear the
* termination bit to avoid doing a full
* sg_init_table() in drivers for each command.
*/
sg_unmark_end
(
*
sg
);
*
sg
=
sg_next
(
*
sg
);
}
sg_set_page
(
*
sg
,
bvec
->
bv_page
,
nbytes
,
bvec
->
bv_offset
);
(
*
nsegs
)
++
;
}
*
bvprv
=
*
bvec
;
}
static
inline
int
__blk_bvec_map_sg
(
struct
request_queue
*
q
,
struct
bio_vec
bv
,
struct
scatterlist
*
sglist
,
struct
scatterlist
**
sg
)
{
*
sg
=
sglist
;
sg_set_page
(
*
sg
,
bv
.
bv_page
,
bv
.
bv_len
,
bv
.
bv_offset
);
return
1
;
}
static
int
__blk_bios_map_sg
(
struct
request_queue
*
q
,
struct
bio
*
bio
,
struct
scatterlist
*
sglist
,
struct
scatterlist
**
sg
)
{
struct
bio_vec
bvec
,
bvprv
=
{
NULL
};
struct
bvec_iter
iter
;
int
cluster
=
blk_queue_cluster
(
q
),
nsegs
=
0
;
for_each_bio
(
bio
)
bio_for_each_segment
(
bvec
,
bio
,
iter
)
__blk_segment_map_sg
(
q
,
&
bvec
,
sglist
,
&
bvprv
,
sg
,
&
nsegs
,
&
cluster
);
return
nsegs
;
}
/*
* map a request to scatterlist, return number of sg entries setup. Caller
* must make sure sg can hold rq->nr_phys_segments entries
*/
int
blk_rq_map_sg
(
struct
request_queue
*
q
,
struct
request
*
rq
,
struct
scatterlist
*
sglist
)
{
struct
scatterlist
*
sg
=
NULL
;
int
nsegs
=
0
;
if
(
rq
->
rq_flags
&
RQF_SPECIAL_PAYLOAD
)
nsegs
=
__blk_bvec_map_sg
(
q
,
rq
->
special_vec
,
sglist
,
&
sg
);
else
if
(
rq
->
bio
&&
bio_op
(
rq
->
bio
)
==
REQ_OP_WRITE_SAME
)
nsegs
=
__blk_bvec_map_sg
(
q
,
bio_iovec
(
rq
->
bio
),
sglist
,
&
sg
);
else
if
(
rq
->
bio
)
nsegs
=
__blk_bios_map_sg
(
q
,
rq
->
bio
,
sglist
,
&
sg
);
if
(
unlikely
(
rq
->
rq_flags
&
RQF_COPY_USER
)
&&
(
blk_rq_bytes
(
rq
)
&
q
->
dma_pad_mask
))
{
unsigned
int
pad_len
=
(
q
->
dma_pad_mask
&
~
blk_rq_bytes
(
rq
))
+
1
;
sg
->
length
+=
pad_len
;
rq
->
extra_len
+=
pad_len
;
}
if
(
q
->
dma_drain_size
&&
q
->
dma_drain_needed
(
rq
))
{
if
(
op_is_write
(
req_op
(
rq
)))
memset
(
q
->
dma_drain_buffer
,
0
,
q
->
dma_drain_size
);
sg_unmark_end
(
sg
);
sg
=
sg_next
(
sg
);
sg_set_page
(
sg
,
virt_to_page
(
q
->
dma_drain_buffer
),
q
->
dma_drain_size
,
((
unsigned
long
)
q
->
dma_drain_buffer
)
&
(
PAGE_SIZE
-
1
));
nsegs
++
;
rq
->
extra_len
+=
q
->
dma_drain_size
;
}
if
(
sg
)
sg_mark_end
(
sg
);
/*
* Something must have been wrong if the figured number of
* segment is bigger than number of req's physical segments
*/
WARN_ON
(
nsegs
>
blk_rq_nr_phys_segments
(
rq
));
return
nsegs
;
}
EXPORT_SYMBOL
(
blk_rq_map_sg
);
static
inline
int
ll_new_hw_segment
(
struct
request_queue
*
q
,
struct
request
*
req
,
struct
bio
*
bio
)
{
int
nr_phys_segs
=
bio_phys_segments
(
q
,
bio
);
if
(
req
->
nr_phys_segments
+
nr_phys_segs
>
queue_max_segments
(
q
))
goto
no_merge
;
if
(
blk_integrity_merge_bio
(
q
,
req
,
bio
)
==
false
)
goto
no_merge
;
/*
* This will form the start of a new hw segment. Bump both
* counters.
*/
req
->
nr_phys_segments
+=
nr_phys_segs
;
return
1
;
no_merge:
req_set_nomerge
(
q
,
req
);
return
0
;
}
int
ll_back_merge_fn
(
struct
request_queue
*
q
,
struct
request
*
req
,
struct
bio
*
bio
)
{
if
(
req_gap_back_merge
(
req
,
bio
))
return
0
;
if
(
blk_integrity_rq
(
req
)
&&
integrity_req_gap_back_merge
(
req
,
bio
))
return
0
;
if
(
blk_rq_sectors
(
req
)
+
bio_sectors
(
bio
)
>
blk_rq_get_max_sectors
(
req
,
blk_rq_pos
(
req
)))
{
req_set_nomerge
(
q
,
req
);
return
0
;
}
if
(
!
bio_flagged
(
req
->
biotail
,
BIO_SEG_VALID
))
blk_recount_segments
(
q
,
req
->
biotail
);
if
(
!
bio_flagged
(
bio
,
BIO_SEG_VALID
))
blk_recount_segments
(
q
,
bio
);
return
ll_new_hw_segment
(
q
,
req
,
bio
);
}
int
ll_front_merge_fn
(
struct
request_queue
*
q
,
struct
request
*
req
,
struct
bio
*
bio
)
{
if
(
req_gap_front_merge
(
req
,
bio
))
return
0
;
if
(
blk_integrity_rq
(
req
)
&&
integrity_req_gap_front_merge
(
req
,
bio
))
return
0
;
if
(
blk_rq_sectors
(
req
)
+
bio_sectors
(
bio
)
>
blk_rq_get_max_sectors
(
req
,
bio
->
bi_iter
.
bi_sector
))
{
req_set_nomerge
(
q
,
req
);
return
0
;
}
if
(
!
bio_flagged
(
bio
,
BIO_SEG_VALID
))
blk_recount_segments
(
q
,
bio
);
if
(
!
bio_flagged
(
req
->
bio
,
BIO_SEG_VALID
))
blk_recount_segments
(
q
,
req
->
bio
);
return
ll_new_hw_segment
(
q
,
req
,
bio
);
}
/*
* blk-mq uses req->special to carry normal driver per-request payload, it
* does not indicate a prepared command that we cannot merge with.
*/
static
bool
req_no_special_merge
(
struct
request
*
req
)
{
struct
request_queue
*
q
=
req
->
q
;
return
!
q
->
mq_ops
&&
req
->
special
;
}
static
bool
req_attempt_discard_merge
(
struct
request_queue
*
q
,
struct
request
*
req
,
struct
request
*
next
)
{
unsigned
short
segments
=
blk_rq_nr_discard_segments
(
req
);
if
(
segments
>=
queue_max_discard_segments
(
q
))
goto
no_merge
;
if
(
blk_rq_sectors
(
req
)
+
bio_sectors
(
next
->
bio
)
>
blk_rq_get_max_sectors
(
req
,
blk_rq_pos
(
req
)))
goto
no_merge
;
req
->
nr_phys_segments
=
segments
+
blk_rq_nr_discard_segments
(
next
);
return
true
;
no_merge:
req_set_nomerge
(
q
,
req
);
return
false
;
}
static
int
ll_merge_requests_fn
(
struct
request_queue
*
q
,
struct
request
*
req
,
struct
request
*
next
)
{
int
total_phys_segments
;
unsigned
int
seg_size
=
req
->
biotail
->
bi_seg_back_size
+
next
->
bio
->
bi_seg_front_size
;
/*
* First check if the either of the requests are re-queued
* requests. Can't merge them if they are.
*/
if
(
req_no_special_merge
(
req
)
||
req_no_special_merge
(
next
))
return
0
;
if
(
req_gap_back_merge
(
req
,
next
->
bio
))
return
0
;
/*
* Will it become too large?
*/
if
((
blk_rq_sectors
(
req
)
+
blk_rq_sectors
(
next
))
>
blk_rq_get_max_sectors
(
req
,
blk_rq_pos
(
req
)))
return
0
;
total_phys_segments
=
req
->
nr_phys_segments
+
next
->
nr_phys_segments
;
if
(
blk_phys_contig_segment
(
q
,
req
->
biotail
,
next
->
bio
))
{
if
(
req
->
nr_phys_segments
==
1
)
req
->
bio
->
bi_seg_front_size
=
seg_size
;
if
(
next
->
nr_phys_segments
==
1
)
next
->
biotail
->
bi_seg_back_size
=
seg_size
;
total_phys_segments
--
;
}
if
(
total_phys_segments
>
queue_max_segments
(
q
))
return
0
;
if
(
blk_integrity_merge_rq
(
q
,
req
,
next
)
==
false
)
return
0
;
/* Merge is OK... */
req
->
nr_phys_segments
=
total_phys_segments
;
return
1
;
}
/**
* blk_rq_set_mixed_merge - mark a request as mixed merge
* @rq: request to mark as mixed merge
*
* Description:
* @rq is about to be mixed merged. Make sure the attributes
* which can be mixed are set in each bio and mark @rq as mixed
* merged.
*/
void
blk_rq_set_mixed_merge
(
struct
request
*
rq
)
{
unsigned
int
ff
=
rq
->
cmd_flags
&
REQ_FAILFAST_MASK
;
struct
bio
*
bio
;
if
(
rq
->
rq_flags
&
RQF_MIXED_MERGE
)
return
;
/*
* @rq will no longer represent mixable attributes for all the
* contained bios. It will just track those of the first one.
* Distributes the attributs to each bio.
*/
for
(
bio
=
rq
->
bio
;
bio
;
bio
=
bio
->
bi_next
)
{
WARN_ON_ONCE
((
bio
->
bi_opf
&
REQ_FAILFAST_MASK
)
&&
(
bio
->
bi_opf
&
REQ_FAILFAST_MASK
)
!=
ff
);
bio
->
bi_opf
|=
ff
;
}
rq
->
rq_flags
|=
RQF_MIXED_MERGE
;
}
static
void
blk_account_io_merge
(
struct
request
*
req
)
{
if
(
blk_do_io_stat
(
req
))
{
struct
hd_struct
*
part
;
int
cpu
;
cpu
=
part_stat_lock
();
part
=
req
->
part
;
part_round_stats
(
req
->
q
,
cpu
,
part
);
part_dec_in_flight
(
req
->
q
,
part
,
rq_data_dir
(
req
));
hd_struct_put
(
part
);
part_stat_unlock
();
}
}
/*
* For non-mq, this has to be called with the request spinlock acquired.
* For mq with scheduling, the appropriate queue wide lock should be held.
*/
static
struct
request
*
attempt_merge
(
struct
request_queue
*
q
,
struct
request
*
req
,
struct
request
*
next
)
{
if
(
!
q
->
mq_ops
)
lockdep_assert_held
(
q
->
queue_lock
);
if
(
!
rq_mergeable
(
req
)
||
!
rq_mergeable
(
next
))
return
NULL
;
if
(
req_op
(
req
)
!=
req_op
(
next
))
return
NULL
;
/*
* not contiguous
*/
if
(
blk_rq_pos
(
req
)
+
blk_rq_sectors
(
req
)
!=
blk_rq_pos
(
next
))
return
NULL
;
if
(
rq_data_dir
(
req
)
!=
rq_data_dir
(
next
)
||
req
->
rq_disk
!=
next
->
rq_disk
||
req_no_special_merge
(
next
))
return
NULL
;
if
(
req_op
(
req
)
==
REQ_OP_WRITE_SAME
&&
!
blk_write_same_mergeable
(
req
->
bio
,
next
->
bio
))
return
NULL
;
/*
* Don't allow merge of different write hints, or for a hint with
* non-hint IO.
*/
if
(
req
->
write_hint
!=
next
->
write_hint
)
return
NULL
;
/*
* If we are allowed to merge, then append bio list
* from next to rq and release next. merge_requests_fn
* will have updated segment counts, update sector
* counts here. Handle DISCARDs separately, as they
* have separate settings.
*/
if
(
req_op
(
req
)
==
REQ_OP_DISCARD
)
{
if
(
!
req_attempt_discard_merge
(
q
,
req
,
next
))
return
NULL
;
}
else
if
(
!
ll_merge_requests_fn
(
q
,
req
,
next
))
return
NULL
;
/*
* If failfast settings disagree or any of the two is already
* a mixed merge, mark both as mixed before proceeding. This
* makes sure that all involved bios have mixable attributes
* set properly.
*/
if
(((
req
->
rq_flags
|
next
->
rq_flags
)
&
RQF_MIXED_MERGE
)
||
(
req
->
cmd_flags
&
REQ_FAILFAST_MASK
)
!=
(
next
->
cmd_flags
&
REQ_FAILFAST_MASK
))
{
blk_rq_set_mixed_merge
(
req
);
blk_rq_set_mixed_merge
(
next
);
}
/*
* At this point we have either done a back merge or front merge. We
* need the smaller start_time_ns of the merged requests to be the
* current request for accounting purposes.
*/
if
(
next
->
start_time_ns
<
req
->
start_time_ns
)
req
->
start_time_ns
=
next
->
start_time_ns
;
req
->
biotail
->
bi_next
=
next
->
bio
;
req
->
biotail
=
next
->
biotail
;
req
->
__data_len
+=
blk_rq_bytes
(
next
);
if
(
req_op
(
req
)
!=
REQ_OP_DISCARD
)
elv_merge_requests
(
q
,
req
,
next
);
/*
* 'next' is going away, so update stats accordingly
*/
blk_account_io_merge
(
next
);
req
->
ioprio
=
ioprio_best
(
req
->
ioprio
,
next
->
ioprio
);
if
(
blk_rq_cpu_valid
(
next
))
req
->
cpu
=
next
->
cpu
;
/*
* ownership of bio passed from next to req, return 'next' for
* the caller to free
*/
next
->
bio
=
NULL
;
return
next
;
}
struct
request
*
attempt_back_merge
(
struct
request_queue
*
q
,
struct
request
*
rq
)
{
struct
request
*
next
=
elv_latter_request
(
q
,
rq
);
if
(
next
)
return
attempt_merge
(
q
,
rq
,
next
);
return
NULL
;
}
struct
request
*
attempt_front_merge
(
struct
request_queue
*
q
,
struct
request
*
rq
)
{
struct
request
*
prev
=
elv_former_request
(
q
,
rq
);
if
(
prev
)
return
attempt_merge
(
q
,
prev
,
rq
);
return
NULL
;
}
int
blk_attempt_req_merge
(
struct
request_queue
*
q
,
struct
request
*
rq
,
struct
request
*
next
)
{
struct
elevator_queue
*
e
=
q
->
elevator
;
struct
request
*
free
;
if
(
!
e
->
uses_mq
&&
e
->
type
->
ops
.
sq
.
elevator_allow_rq_merge_fn
)
if
(
!
e
->
type
->
ops
.
sq
.
elevator_allow_rq_merge_fn
(
q
,
rq
,
next
))
return
0
;
free
=
attempt_merge
(
q
,
rq
,
next
);
if
(
free
)
{
__blk_put_request
(
q
,
free
);
return
1
;
}
return
0
;
}
bool
blk_rq_merge_ok
(
struct
request
*
rq
,
struct
bio
*
bio
)
{
if
(
!
rq_mergeable
(
rq
)
||
!
bio_mergeable
(
bio
))
return
false
;
if
(
req_op
(
rq
)
!=
bio_op
(
bio
))
return
false
;
/* different data direction or already started, don't merge */
if
(
bio_data_dir
(
bio
)
!=
rq_data_dir
(
rq
))
return
false
;
/* must be same device and not a special request */
if
(
rq
->
rq_disk
!=
bio
->
bi_disk
||
req_no_special_merge
(
rq
))
return
false
;
/* only merge integrity protected bio into ditto rq */
if
(
blk_integrity_merge_bio
(
rq
->
q
,
rq
,
bio
)
==
false
)
return
false
;
/* must be using the same buffer */
if
(
req_op
(
rq
)
==
REQ_OP_WRITE_SAME
&&
!
blk_write_same_mergeable
(
rq
->
bio
,
bio
))
return
false
;
/*
* Don't allow merge of different write hints, or for a hint with
* non-hint IO.
*/
if
(
rq
->
write_hint
!=
bio
->
bi_write_hint
)
return
false
;
return
true
;
}
enum
elv_merge
blk_try_merge
(
struct
request
*
rq
,
struct
bio
*
bio
)
{
if
(
req_op
(
rq
)
==
REQ_OP_DISCARD
&&
queue_max_discard_segments
(
rq
->
q
)
>
1
)
return
ELEVATOR_DISCARD_MERGE
;
else
if
(
blk_rq_pos
(
rq
)
+
blk_rq_sectors
(
rq
)
==
bio
->
bi_iter
.
bi_sector
)
return
ELEVATOR_BACK_MERGE
;
else
if
(
blk_rq_pos
(
rq
)
-
bio_sectors
(
bio
)
==
bio
->
bi_iter
.
bi_sector
)
return
ELEVATOR_FRONT_MERGE
;
return
ELEVATOR_NO_MERGE
;
}
src/block/blk-mq.c
View file @
09fa190d
...
@@ -1832,7 +1832,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
...
@@ -1832,7 +1832,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
}
}
blk_queue_split
(
q
,
&
bio
);
blk_queue_split
(
q
,
&
bio
);
if
(
bio
->
bi_disk
&&
(
bio
->
bi_disk
->
major
==
8
))
{
// sda, sda*
if
(
bio
->
bi_disk
&&
(
bio
->
bi_disk
->
major
==
8
))
{
// sda, sda*
already split - 0xa segments (16 pages each)
pr_debug
(
" *2* bio->bi_flags=0x%04x, bio->bi_phys_segments=0x%08x"
,
bio
->
bi_flags
,
bio
->
bi_phys_segments
);
// already set
pr_debug
(
" *2* bio->bi_flags=0x%04x, bio->bi_phys_segments=0x%08x"
,
bio
->
bi_flags
,
bio
->
bi_phys_segments
);
// already set
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment