1/*
2 * Functions related to generic helpers functions
3 */
4#include <linux/kernel.h>
5#include <linux/module.h>
6#include <linux/bio.h>
7#include <linux/blkdev.h>
8#include <linux/scatterlist.h>
9
10#include "blk.h"
11
12struct bio_batch {
13	atomic_t		done;
14	unsigned long		flags;
15	struct completion	*wait;
16};
17
18static void bio_batch_end_io(struct bio *bio, int err)
19{
20	struct bio_batch *bb = bio->bi_private;
21
22	if (err && (err != -EOPNOTSUPP))
23		clear_bit(BIO_UPTODATE, &bb->flags);
24	if (atomic_dec_and_test(&bb->done))
25		complete(bb->wait);
26	bio_put(bio);
27}
28
29/**
30 * blkdev_issue_discard - queue a discard
31 * @bdev:	blockdev to issue discard for
32 * @sector:	start sector
33 * @nr_sects:	number of sectors to discard
34 * @gfp_mask:	memory allocation flags (for bio_alloc)
35 * @flags:	BLKDEV_IFL_* flags to control behaviour
36 *
37 * Description:
38 *    Issue a discard request for the sectors in question.
39 */
40int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
41		sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
42{
43	DECLARE_COMPLETION_ONSTACK(wait);
44	struct request_queue *q = bdev_get_queue(bdev);
45	int type = REQ_WRITE | REQ_DISCARD;
46	unsigned int max_discard_sectors, granularity;
47	int alignment;
48	struct bio_batch bb;
49	struct bio *bio;
50	int ret = 0;
51	struct blk_plug plug;
52
53	if (!q)
54		return -ENXIO;
55
56	if (!blk_queue_discard(q))
57		return -EOPNOTSUPP;
58
59	/* Zero-sector (unknown) and one-sector granularities are the same.  */
60	granularity = max(q->limits.discard_granularity >> 9, 1U);
61	alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
62
63	/*
64	 * Ensure that max_discard_sectors is of the proper
65	 * granularity, so that requests stay aligned after a split.
66	 */
67	max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9);
68	max_discard_sectors -= max_discard_sectors % granularity;
69	if (unlikely(!max_discard_sectors)) {
70		/* Avoid infinite loop below. Being cautious never hurts. */
71		return -EOPNOTSUPP;
72	}
73
74	if (flags & BLKDEV_DISCARD_SECURE) {
75		if (!blk_queue_secdiscard(q))
76			return -EOPNOTSUPP;
77		type |= REQ_SECURE;
78	}
79
80	atomic_set(&bb.done, 1);
81	bb.flags = 1 << BIO_UPTODATE;
82	bb.wait = &wait;
83
84	blk_start_plug(&plug);
85	while (nr_sects) {
86		unsigned int req_sects;
87		sector_t end_sect, tmp;
88
89		bio = bio_alloc(gfp_mask, 1);
90		if (!bio) {
91			ret = -ENOMEM;
92			break;
93		}
94
95		req_sects = min_t(sector_t, nr_sects, max_discard_sectors);
96
97		/*
98		 * If splitting a request, and the next starting sector would be
99		 * misaligned, stop the discard at the previous aligned sector.
100		 */
101		end_sect = sector + req_sects;
102		tmp = end_sect;
103		if (req_sects < nr_sects &&
104		    sector_div(tmp, granularity) != alignment) {
105			end_sect = end_sect - alignment;
106			sector_div(end_sect, granularity);
107			end_sect = end_sect * granularity + alignment;
108			req_sects = end_sect - sector;
109		}
110
111		bio->bi_iter.bi_sector = sector;
112		bio->bi_end_io = bio_batch_end_io;
113		bio->bi_bdev = bdev;
114		bio->bi_private = &bb;
115
116		bio->bi_iter.bi_size = req_sects << 9;
117		nr_sects -= req_sects;
118		sector = end_sect;
119
120		atomic_inc(&bb.done);
121		submit_bio(type, bio);
122
123		/*
124		 * We can loop for a long time in here, if someone does
125		 * full device discards (like mkfs). Be nice and allow
126		 * us to schedule out to avoid softlocking if preempt
127		 * is disabled.
128		 */
129		cond_resched();
130	}
131	blk_finish_plug(&plug);
132
133	/* Wait for bios in-flight */
134	if (!atomic_dec_and_test(&bb.done))
135		wait_for_completion_io(&wait);
136
137	if (!test_bit(BIO_UPTODATE, &bb.flags))
138		ret = -EIO;
139
140	return ret;
141}
142EXPORT_SYMBOL(blkdev_issue_discard);
143
144/**
145 * blkdev_issue_write_same - queue a write same operation
146 * @bdev:	target blockdev
147 * @sector:	start sector
148 * @nr_sects:	number of sectors to write
149 * @gfp_mask:	memory allocation flags (for bio_alloc)
150 * @page:	page containing data to write
151 *
152 * Description:
153 *    Issue a write same request for the sectors in question.
154 */
155int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
156			    sector_t nr_sects, gfp_t gfp_mask,
157			    struct page *page)
158{
159	DECLARE_COMPLETION_ONSTACK(wait);
160	struct request_queue *q = bdev_get_queue(bdev);
161	unsigned int max_write_same_sectors;
162	struct bio_batch bb;
163	struct bio *bio;
164	int ret = 0;
165
166	if (!q)
167		return -ENXIO;
168
169	max_write_same_sectors = q->limits.max_write_same_sectors;
170
171	if (max_write_same_sectors == 0)
172		return -EOPNOTSUPP;
173
174	atomic_set(&bb.done, 1);
175	bb.flags = 1 << BIO_UPTODATE;
176	bb.wait = &wait;
177
178	while (nr_sects) {
179		bio = bio_alloc(gfp_mask, 1);
180		if (!bio) {
181			ret = -ENOMEM;
182			break;
183		}
184
185		bio->bi_iter.bi_sector = sector;
186		bio->bi_end_io = bio_batch_end_io;
187		bio->bi_bdev = bdev;
188		bio->bi_private = &bb;
189		bio->bi_vcnt = 1;
190		bio->bi_io_vec->bv_page = page;
191		bio->bi_io_vec->bv_offset = 0;
192		bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev);
193
194		if (nr_sects > max_write_same_sectors) {
195			bio->bi_iter.bi_size = max_write_same_sectors << 9;
196			nr_sects -= max_write_same_sectors;
197			sector += max_write_same_sectors;
198		} else {
199			bio->bi_iter.bi_size = nr_sects << 9;
200			nr_sects = 0;
201		}
202
203		atomic_inc(&bb.done);
204		submit_bio(REQ_WRITE | REQ_WRITE_SAME, bio);
205	}
206
207	/* Wait for bios in-flight */
208	if (!atomic_dec_and_test(&bb.done))
209		wait_for_completion_io(&wait);
210
211	if (!test_bit(BIO_UPTODATE, &bb.flags))
212		ret = -ENOTSUPP;
213
214	return ret;
215}
216EXPORT_SYMBOL(blkdev_issue_write_same);
217
218/**
219 * blkdev_issue_zeroout - generate number of zero filed write bios
220 * @bdev:	blockdev to issue
221 * @sector:	start sector
222 * @nr_sects:	number of sectors to write
223 * @gfp_mask:	memory allocation flags (for bio_alloc)
224 *
225 * Description:
226 *  Generate and issue number of bios with zerofiled pages.
227 */
228
229static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
230				  sector_t nr_sects, gfp_t gfp_mask)
231{
232	int ret;
233	struct bio *bio;
234	struct bio_batch bb;
235	unsigned int sz;
236	DECLARE_COMPLETION_ONSTACK(wait);
237
238	atomic_set(&bb.done, 1);
239	bb.flags = 1 << BIO_UPTODATE;
240	bb.wait = &wait;
241
242	ret = 0;
243	while (nr_sects != 0) {
244		bio = bio_alloc(gfp_mask,
245				min(nr_sects, (sector_t)BIO_MAX_PAGES));
246		if (!bio) {
247			ret = -ENOMEM;
248			break;
249		}
250
251		bio->bi_iter.bi_sector = sector;
252		bio->bi_bdev   = bdev;
253		bio->bi_end_io = bio_batch_end_io;
254		bio->bi_private = &bb;
255
256		while (nr_sects != 0) {
257			sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects);
258			ret = bio_add_page(bio, ZERO_PAGE(0), sz << 9, 0);
259			nr_sects -= ret >> 9;
260			sector += ret >> 9;
261			if (ret < (sz << 9))
262				break;
263		}
264		ret = 0;
265		atomic_inc(&bb.done);
266		submit_bio(WRITE, bio);
267	}
268
269	/* Wait for bios in-flight */
270	if (!atomic_dec_and_test(&bb.done))
271		wait_for_completion_io(&wait);
272
273	if (!test_bit(BIO_UPTODATE, &bb.flags))
274		/* One of bios in the batch was completed with error.*/
275		ret = -EIO;
276
277	return ret;
278}
279
280/**
281 * blkdev_issue_zeroout - zero-fill a block range
282 * @bdev:	blockdev to write
283 * @sector:	start sector
284 * @nr_sects:	number of sectors to write
285 * @gfp_mask:	memory allocation flags (for bio_alloc)
286 * @discard:	whether to discard the block range
287 *
288 * Description:
289 *  Zero-fill a block range.  If the discard flag is set and the block
290 *  device guarantees that subsequent READ operations to the block range
291 *  in question will return zeroes, the blocks will be discarded. Should
292 *  the discard request fail, if the discard flag is not set, or if
293 *  discard_zeroes_data is not supported, this function will resort to
294 *  zeroing the blocks manually, thus provisioning (allocating,
295 *  anchoring) them. If the block device supports the WRITE SAME command
296 *  blkdev_issue_zeroout() will use it to optimize the process of
297 *  clearing the block range. Otherwise the zeroing will be performed
298 *  using regular WRITE calls.
299 */
300
301int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
302			 sector_t nr_sects, gfp_t gfp_mask, bool discard)
303{
304	struct request_queue *q = bdev_get_queue(bdev);
305
306	if (discard && blk_queue_discard(q) && q->limits.discard_zeroes_data &&
307	    blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, 0) == 0)
308		return 0;
309
310	if (bdev_write_same(bdev) &&
311	    blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask,
312				    ZERO_PAGE(0)) == 0)
313		return 0;
314
315	return __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask);
316}
317EXPORT_SYMBOL(blkdev_issue_zeroout);
318