1 /*
2  *  Block device elevator/IO-scheduler.
3  *
4  *  Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
5  *
6  * 30042000 Jens Axboe <axboe@kernel.dk> :
7  *
8  * Split the elevator a bit so that it is possible to choose a different
9  * one or even write a new "plug in". There are three pieces:
10  * - elevator_fn, inserts a new request in the queue list
11  * - elevator_merge_fn, decides whether a new buffer can be merged with
12  *   an existing request
13  * - elevator_dequeue_fn, called when a request is taken off the active list
14  *
15  * 20082000 Dave Jones <davej@suse.de> :
16  * Removed tests for max-bomb-segments, which was breaking elvtune
17  *  when run without -bN
18  *
19  * Jens:
20  * - Rework again to work with bio instead of buffer_heads
21  * - loose bi_dev comparisons, partition handling is right now
22  * - completely modularize elevator setup and teardown
23  *
24  */
25 #include <linux/kernel.h>
26 #include <linux/fs.h>
27 #include <linux/blkdev.h>
28 #include <linux/elevator.h>
29 #include <linux/bio.h>
30 #include <linux/module.h>
31 #include <linux/slab.h>
32 #include <linux/init.h>
33 #include <linux/compiler.h>
34 #include <linux/blktrace_api.h>
35 #include <linux/hash.h>
36 #include <linux/uaccess.h>
37 #include <linux/pm_runtime.h>
38 #include <linux/blk-cgroup.h>
39 
40 #include <trace/events/block.h>
41 
42 #include "blk.h"
43 #include "blk-mq-sched.h"
44 #include "blk-wbt.h"
45 
46 static DEFINE_SPINLOCK(elv_list_lock);
47 static LIST_HEAD(elv_list);
48 
49 /*
50  * Merge hash stuff.
51  */
52 #define rq_hash_key(rq)		(blk_rq_pos(rq) + blk_rq_sectors(rq))
53 
54 /*
55  * Query io scheduler to see if the current process issuing bio may be
56  * merged with rq.
57  */
elv_iosched_allow_bio_merge(struct request * rq,struct bio * bio)58 static int elv_iosched_allow_bio_merge(struct request *rq, struct bio *bio)
59 {
60 	struct request_queue *q = rq->q;
61 	struct elevator_queue *e = q->elevator;
62 
63 	if (e->uses_mq && e->type->ops.mq.allow_merge)
64 		return e->type->ops.mq.allow_merge(q, rq, bio);
65 	else if (!e->uses_mq && e->type->ops.sq.elevator_allow_bio_merge_fn)
66 		return e->type->ops.sq.elevator_allow_bio_merge_fn(q, rq, bio);
67 
68 	return 1;
69 }
70 
71 /*
72  * can we safely merge with this request?
73  */
elv_bio_merge_ok(struct request * rq,struct bio * bio)74 bool elv_bio_merge_ok(struct request *rq, struct bio *bio)
75 {
76 	if (!blk_rq_merge_ok(rq, bio))
77 		return false;
78 
79 	if (!elv_iosched_allow_bio_merge(rq, bio))
80 		return false;
81 
82 	return true;
83 }
84 EXPORT_SYMBOL(elv_bio_merge_ok);
85 
elevator_match(const struct elevator_type * e,const char * name)86 static bool elevator_match(const struct elevator_type *e, const char *name)
87 {
88 	if (!strcmp(e->elevator_name, name))
89 		return true;
90 	if (e->elevator_alias && !strcmp(e->elevator_alias, name))
91 		return true;
92 
93 	return false;
94 }
95 
96 /*
97  * Return scheduler with name 'name' and with matching 'mq capability
98  */
elevator_find(const char * name,bool mq)99 static struct elevator_type *elevator_find(const char *name, bool mq)
100 {
101 	struct elevator_type *e;
102 
103 	list_for_each_entry(e, &elv_list, list) {
104 		if (elevator_match(e, name) && (mq == e->uses_mq))
105 			return e;
106 	}
107 
108 	return NULL;
109 }
110 
elevator_put(struct elevator_type * e)111 static void elevator_put(struct elevator_type *e)
112 {
113 	module_put(e->elevator_owner);
114 }
115 
elevator_get(struct request_queue * q,const char * name,bool try_loading)116 static struct elevator_type *elevator_get(struct request_queue *q,
117 					  const char *name, bool try_loading)
118 {
119 	struct elevator_type *e;
120 
121 	spin_lock(&elv_list_lock);
122 
123 	e = elevator_find(name, q->mq_ops != NULL);
124 	if (!e && try_loading) {
125 		spin_unlock(&elv_list_lock);
126 		request_module("%s-iosched", name);
127 		spin_lock(&elv_list_lock);
128 		e = elevator_find(name, q->mq_ops != NULL);
129 	}
130 
131 	if (e && !try_module_get(e->elevator_owner))
132 		e = NULL;
133 
134 	spin_unlock(&elv_list_lock);
135 	return e;
136 }
137 
138 static char chosen_elevator[ELV_NAME_MAX];
139 
elevator_setup(char * str)140 static int __init elevator_setup(char *str)
141 {
142 	/*
143 	 * Be backwards-compatible with previous kernels, so users
144 	 * won't get the wrong elevator.
145 	 */
146 	strncpy(chosen_elevator, str, sizeof(chosen_elevator) - 1);
147 	return 1;
148 }
149 
150 __setup("elevator=", elevator_setup);
151 
152 /* called during boot to load the elevator chosen by the elevator param */
load_default_elevator_module(void)153 void __init load_default_elevator_module(void)
154 {
155 	struct elevator_type *e;
156 
157 	if (!chosen_elevator[0])
158 		return;
159 
160 	/*
161 	 * Boot parameter is deprecated, we haven't supported that for MQ.
162 	 * Only look for non-mq schedulers from here.
163 	 */
164 	spin_lock(&elv_list_lock);
165 	e = elevator_find(chosen_elevator, false);
166 	spin_unlock(&elv_list_lock);
167 
168 	if (!e)
169 		request_module("%s-iosched", chosen_elevator);
170 }
171 
172 static struct kobj_type elv_ktype;
173 
elevator_alloc(struct request_queue * q,struct elevator_type * e)174 struct elevator_queue *elevator_alloc(struct request_queue *q,
175 				  struct elevator_type *e)
176 {
177 	struct elevator_queue *eq;
178 
179 	eq = kzalloc_node(sizeof(*eq), GFP_KERNEL, q->node);
180 	if (unlikely(!eq))
181 		return NULL;
182 
183 	eq->type = e;
184 	kobject_init(&eq->kobj, &elv_ktype);
185 	mutex_init(&eq->sysfs_lock);
186 	hash_init(eq->hash);
187 	eq->uses_mq = e->uses_mq;
188 
189 	return eq;
190 }
191 EXPORT_SYMBOL(elevator_alloc);
192 
elevator_release(struct kobject * kobj)193 static void elevator_release(struct kobject *kobj)
194 {
195 	struct elevator_queue *e;
196 
197 	e = container_of(kobj, struct elevator_queue, kobj);
198 	elevator_put(e->type);
199 	kfree(e);
200 }
201 
202 /*
203  * Use the default elevator specified by config boot param for non-mq devices,
204  * or by config option.  Don't try to load modules as we could be running off
205  * async and request_module() isn't allowed from async.
206  */
elevator_init(struct request_queue * q)207 int elevator_init(struct request_queue *q)
208 {
209 	struct elevator_type *e = NULL;
210 	int err = 0;
211 
212 	/*
213 	 * q->sysfs_lock must be held to provide mutual exclusion between
214 	 * elevator_switch() and here.
215 	 */
216 	mutex_lock(&q->sysfs_lock);
217 	if (unlikely(q->elevator))
218 		goto out_unlock;
219 
220 	if (*chosen_elevator) {
221 		e = elevator_get(q, chosen_elevator, false);
222 		if (!e)
223 			printk(KERN_ERR "I/O scheduler %s not found\n",
224 							chosen_elevator);
225 	}
226 
227 	if (!e)
228 		e = elevator_get(q, CONFIG_DEFAULT_IOSCHED, false);
229 	if (!e) {
230 		printk(KERN_ERR
231 			"Default I/O scheduler not found. Using noop.\n");
232 		e = elevator_get(q, "noop", false);
233 	}
234 
235 	err = e->ops.sq.elevator_init_fn(q, e);
236 	if (err)
237 		elevator_put(e);
238 out_unlock:
239 	mutex_unlock(&q->sysfs_lock);
240 	return err;
241 }
242 
elevator_exit(struct request_queue * q,struct elevator_queue * e)243 void elevator_exit(struct request_queue *q, struct elevator_queue *e)
244 {
245 	mutex_lock(&e->sysfs_lock);
246 	if (e->uses_mq && e->type->ops.mq.exit_sched)
247 		blk_mq_exit_sched(q, e);
248 	else if (!e->uses_mq && e->type->ops.sq.elevator_exit_fn)
249 		e->type->ops.sq.elevator_exit_fn(e);
250 	mutex_unlock(&e->sysfs_lock);
251 
252 	kobject_put(&e->kobj);
253 }
254 
__elv_rqhash_del(struct request * rq)255 static inline void __elv_rqhash_del(struct request *rq)
256 {
257 	hash_del(&rq->hash);
258 	rq->rq_flags &= ~RQF_HASHED;
259 }
260 
elv_rqhash_del(struct request_queue * q,struct request * rq)261 void elv_rqhash_del(struct request_queue *q, struct request *rq)
262 {
263 	if (ELV_ON_HASH(rq))
264 		__elv_rqhash_del(rq);
265 }
266 EXPORT_SYMBOL_GPL(elv_rqhash_del);
267 
elv_rqhash_add(struct request_queue * q,struct request * rq)268 void elv_rqhash_add(struct request_queue *q, struct request *rq)
269 {
270 	struct elevator_queue *e = q->elevator;
271 
272 	BUG_ON(ELV_ON_HASH(rq));
273 	hash_add(e->hash, &rq->hash, rq_hash_key(rq));
274 	rq->rq_flags |= RQF_HASHED;
275 }
276 EXPORT_SYMBOL_GPL(elv_rqhash_add);
277 
elv_rqhash_reposition(struct request_queue * q,struct request * rq)278 void elv_rqhash_reposition(struct request_queue *q, struct request *rq)
279 {
280 	__elv_rqhash_del(rq);
281 	elv_rqhash_add(q, rq);
282 }
283 
elv_rqhash_find(struct request_queue * q,sector_t offset)284 struct request *elv_rqhash_find(struct request_queue *q, sector_t offset)
285 {
286 	struct elevator_queue *e = q->elevator;
287 	struct hlist_node *next;
288 	struct request *rq;
289 
290 	hash_for_each_possible_safe(e->hash, rq, next, hash, offset) {
291 		BUG_ON(!ELV_ON_HASH(rq));
292 
293 		if (unlikely(!rq_mergeable(rq))) {
294 			__elv_rqhash_del(rq);
295 			continue;
296 		}
297 
298 		if (rq_hash_key(rq) == offset)
299 			return rq;
300 	}
301 
302 	return NULL;
303 }
304 
305 /*
306  * RB-tree support functions for inserting/lookup/removal of requests
307  * in a sorted RB tree.
308  */
elv_rb_add(struct rb_root * root,struct request * rq)309 void elv_rb_add(struct rb_root *root, struct request *rq)
310 {
311 	struct rb_node **p = &root->rb_node;
312 	struct rb_node *parent = NULL;
313 	struct request *__rq;
314 
315 	while (*p) {
316 		parent = *p;
317 		__rq = rb_entry(parent, struct request, rb_node);
318 
319 		if (blk_rq_pos(rq) < blk_rq_pos(__rq))
320 			p = &(*p)->rb_left;
321 		else if (blk_rq_pos(rq) >= blk_rq_pos(__rq))
322 			p = &(*p)->rb_right;
323 	}
324 
325 	rb_link_node(&rq->rb_node, parent, p);
326 	rb_insert_color(&rq->rb_node, root);
327 }
328 EXPORT_SYMBOL(elv_rb_add);
329 
elv_rb_del(struct rb_root * root,struct request * rq)330 void elv_rb_del(struct rb_root *root, struct request *rq)
331 {
332 	BUG_ON(RB_EMPTY_NODE(&rq->rb_node));
333 	rb_erase(&rq->rb_node, root);
334 	RB_CLEAR_NODE(&rq->rb_node);
335 }
336 EXPORT_SYMBOL(elv_rb_del);
337 
elv_rb_find(struct rb_root * root,sector_t sector)338 struct request *elv_rb_find(struct rb_root *root, sector_t sector)
339 {
340 	struct rb_node *n = root->rb_node;
341 	struct request *rq;
342 
343 	while (n) {
344 		rq = rb_entry(n, struct request, rb_node);
345 
346 		if (sector < blk_rq_pos(rq))
347 			n = n->rb_left;
348 		else if (sector > blk_rq_pos(rq))
349 			n = n->rb_right;
350 		else
351 			return rq;
352 	}
353 
354 	return NULL;
355 }
356 EXPORT_SYMBOL(elv_rb_find);
357 
358 /*
359  * Insert rq into dispatch queue of q.  Queue lock must be held on
360  * entry.  rq is sort instead into the dispatch queue. To be used by
361  * specific elevators.
362  */
elv_dispatch_sort(struct request_queue * q,struct request * rq)363 void elv_dispatch_sort(struct request_queue *q, struct request *rq)
364 {
365 	sector_t boundary;
366 	struct list_head *entry;
367 
368 	if (q->last_merge == rq)
369 		q->last_merge = NULL;
370 
371 	elv_rqhash_del(q, rq);
372 
373 	q->nr_sorted--;
374 
375 	boundary = q->end_sector;
376 	list_for_each_prev(entry, &q->queue_head) {
377 		struct request *pos = list_entry_rq(entry);
378 
379 		if (req_op(rq) != req_op(pos))
380 			break;
381 		if (rq_data_dir(rq) != rq_data_dir(pos))
382 			break;
383 		if (pos->rq_flags & (RQF_STARTED | RQF_SOFTBARRIER))
384 			break;
385 		if (blk_rq_pos(rq) >= boundary) {
386 			if (blk_rq_pos(pos) < boundary)
387 				continue;
388 		} else {
389 			if (blk_rq_pos(pos) >= boundary)
390 				break;
391 		}
392 		if (blk_rq_pos(rq) >= blk_rq_pos(pos))
393 			break;
394 	}
395 
396 	list_add(&rq->queuelist, entry);
397 }
398 EXPORT_SYMBOL(elv_dispatch_sort);
399 
400 /*
401  * Insert rq into dispatch queue of q.  Queue lock must be held on
402  * entry.  rq is added to the back of the dispatch queue. To be used by
403  * specific elevators.
404  */
elv_dispatch_add_tail(struct request_queue * q,struct request * rq)405 void elv_dispatch_add_tail(struct request_queue *q, struct request *rq)
406 {
407 	if (q->last_merge == rq)
408 		q->last_merge = NULL;
409 
410 	elv_rqhash_del(q, rq);
411 
412 	q->nr_sorted--;
413 
414 	q->end_sector = rq_end_sector(rq);
415 	q->boundary_rq = rq;
416 	list_add_tail(&rq->queuelist, &q->queue_head);
417 }
418 EXPORT_SYMBOL(elv_dispatch_add_tail);
419 
elv_merge(struct request_queue * q,struct request ** req,struct bio * bio)420 enum elv_merge elv_merge(struct request_queue *q, struct request **req,
421 		struct bio *bio)
422 {
423 	struct elevator_queue *e = q->elevator;
424 	struct request *__rq;
425 
426 	/*
427 	 * Levels of merges:
428 	 * 	nomerges:  No merges at all attempted
429 	 * 	noxmerges: Only simple one-hit cache try
430 	 * 	merges:	   All merge tries attempted
431 	 */
432 	if (blk_queue_nomerges(q) || !bio_mergeable(bio))
433 		return ELEVATOR_NO_MERGE;
434 
435 	/*
436 	 * First try one-hit cache.
437 	 */
438 	if (q->last_merge && elv_bio_merge_ok(q->last_merge, bio)) {
439 		enum elv_merge ret = blk_try_merge(q->last_merge, bio);
440 
441 		if (ret != ELEVATOR_NO_MERGE) {
442 			*req = q->last_merge;
443 			return ret;
444 		}
445 	}
446 
447 	if (blk_queue_noxmerges(q))
448 		return ELEVATOR_NO_MERGE;
449 
450 	/*
451 	 * See if our hash lookup can find a potential backmerge.
452 	 */
453 	__rq = elv_rqhash_find(q, bio->bi_iter.bi_sector);
454 	if (__rq && elv_bio_merge_ok(__rq, bio)) {
455 		*req = __rq;
456 		return ELEVATOR_BACK_MERGE;
457 	}
458 
459 	if (e->uses_mq && e->type->ops.mq.request_merge)
460 		return e->type->ops.mq.request_merge(q, req, bio);
461 	else if (!e->uses_mq && e->type->ops.sq.elevator_merge_fn)
462 		return e->type->ops.sq.elevator_merge_fn(q, req, bio);
463 
464 	return ELEVATOR_NO_MERGE;
465 }
466 
467 /*
468  * Attempt to do an insertion back merge. Only check for the case where
469  * we can append 'rq' to an existing request, so we can throw 'rq' away
470  * afterwards.
471  *
472  * Returns true if we merged, false otherwise
473  */
elv_attempt_insert_merge(struct request_queue * q,struct request * rq)474 bool elv_attempt_insert_merge(struct request_queue *q, struct request *rq)
475 {
476 	struct request *__rq;
477 	bool ret;
478 
479 	if (blk_queue_nomerges(q))
480 		return false;
481 
482 	/*
483 	 * First try one-hit cache.
484 	 */
485 	if (q->last_merge && blk_attempt_req_merge(q, q->last_merge, rq))
486 		return true;
487 
488 	if (blk_queue_noxmerges(q))
489 		return false;
490 
491 	ret = false;
492 	/*
493 	 * See if our hash lookup can find a potential backmerge.
494 	 */
495 	while (1) {
496 		__rq = elv_rqhash_find(q, blk_rq_pos(rq));
497 		if (!__rq || !blk_attempt_req_merge(q, __rq, rq))
498 			break;
499 
500 		/* The merged request could be merged with others, try again */
501 		ret = true;
502 		rq = __rq;
503 	}
504 
505 	return ret;
506 }
507 
elv_merged_request(struct request_queue * q,struct request * rq,enum elv_merge type)508 void elv_merged_request(struct request_queue *q, struct request *rq,
509 		enum elv_merge type)
510 {
511 	struct elevator_queue *e = q->elevator;
512 
513 	if (e->uses_mq && e->type->ops.mq.request_merged)
514 		e->type->ops.mq.request_merged(q, rq, type);
515 	else if (!e->uses_mq && e->type->ops.sq.elevator_merged_fn)
516 		e->type->ops.sq.elevator_merged_fn(q, rq, type);
517 
518 	if (type == ELEVATOR_BACK_MERGE)
519 		elv_rqhash_reposition(q, rq);
520 
521 	q->last_merge = rq;
522 }
523 
elv_merge_requests(struct request_queue * q,struct request * rq,struct request * next)524 void elv_merge_requests(struct request_queue *q, struct request *rq,
525 			     struct request *next)
526 {
527 	struct elevator_queue *e = q->elevator;
528 	bool next_sorted = false;
529 
530 	if (e->uses_mq && e->type->ops.mq.requests_merged)
531 		e->type->ops.mq.requests_merged(q, rq, next);
532 	else if (e->type->ops.sq.elevator_merge_req_fn) {
533 		next_sorted = (__force bool)(next->rq_flags & RQF_SORTED);
534 		if (next_sorted)
535 			e->type->ops.sq.elevator_merge_req_fn(q, rq, next);
536 	}
537 
538 	elv_rqhash_reposition(q, rq);
539 
540 	if (next_sorted) {
541 		elv_rqhash_del(q, next);
542 		q->nr_sorted--;
543 	}
544 
545 	q->last_merge = rq;
546 }
547 
elv_bio_merged(struct request_queue * q,struct request * rq,struct bio * bio)548 void elv_bio_merged(struct request_queue *q, struct request *rq,
549 			struct bio *bio)
550 {
551 	struct elevator_queue *e = q->elevator;
552 
553 	if (WARN_ON_ONCE(e->uses_mq))
554 		return;
555 
556 	if (e->type->ops.sq.elevator_bio_merged_fn)
557 		e->type->ops.sq.elevator_bio_merged_fn(q, rq, bio);
558 }
559 
560 #ifdef CONFIG_PM
blk_pm_requeue_request(struct request * rq)561 static void blk_pm_requeue_request(struct request *rq)
562 {
563 	if (rq->q->dev && !(rq->rq_flags & RQF_PM))
564 		rq->q->nr_pending--;
565 }
566 
blk_pm_add_request(struct request_queue * q,struct request * rq)567 static void blk_pm_add_request(struct request_queue *q, struct request *rq)
568 {
569 	if (q->dev && !(rq->rq_flags & RQF_PM) && q->nr_pending++ == 0 &&
570 	    (q->rpm_status == RPM_SUSPENDED || q->rpm_status == RPM_SUSPENDING))
571 		pm_request_resume(q->dev);
572 }
573 #else
blk_pm_requeue_request(struct request * rq)574 static inline void blk_pm_requeue_request(struct request *rq) {}
blk_pm_add_request(struct request_queue * q,struct request * rq)575 static inline void blk_pm_add_request(struct request_queue *q,
576 				      struct request *rq)
577 {
578 }
579 #endif
580 
elv_requeue_request(struct request_queue * q,struct request * rq)581 void elv_requeue_request(struct request_queue *q, struct request *rq)
582 {
583 	/*
584 	 * it already went through dequeue, we need to decrement the
585 	 * in_flight count again
586 	 */
587 	if (blk_account_rq(rq)) {
588 		q->in_flight[rq_is_sync(rq)]--;
589 		if (rq->rq_flags & RQF_SORTED)
590 			elv_deactivate_rq(q, rq);
591 	}
592 
593 	rq->rq_flags &= ~RQF_STARTED;
594 
595 	blk_pm_requeue_request(rq);
596 
597 	__elv_add_request(q, rq, ELEVATOR_INSERT_REQUEUE);
598 }
599 
elv_drain_elevator(struct request_queue * q)600 void elv_drain_elevator(struct request_queue *q)
601 {
602 	struct elevator_queue *e = q->elevator;
603 	static int printed;
604 
605 	if (WARN_ON_ONCE(e->uses_mq))
606 		return;
607 
608 	lockdep_assert_held(q->queue_lock);
609 
610 	while (e->type->ops.sq.elevator_dispatch_fn(q, 1))
611 		;
612 	if (q->nr_sorted && !blk_queue_is_zoned(q) && printed++ < 10 ) {
613 		printk(KERN_ERR "%s: forced dispatching is broken "
614 		       "(nr_sorted=%u), please report this\n",
615 		       q->elevator->type->elevator_name, q->nr_sorted);
616 	}
617 }
618 
__elv_add_request(struct request_queue * q,struct request * rq,int where)619 void __elv_add_request(struct request_queue *q, struct request *rq, int where)
620 {
621 	trace_block_rq_insert(q, rq);
622 
623 	blk_pm_add_request(q, rq);
624 
625 	rq->q = q;
626 
627 	if (rq->rq_flags & RQF_SOFTBARRIER) {
628 		/* barriers are scheduling boundary, update end_sector */
629 		if (!blk_rq_is_passthrough(rq)) {
630 			q->end_sector = rq_end_sector(rq);
631 			q->boundary_rq = rq;
632 		}
633 	} else if (!(rq->rq_flags & RQF_ELVPRIV) &&
634 		    (where == ELEVATOR_INSERT_SORT ||
635 		     where == ELEVATOR_INSERT_SORT_MERGE))
636 		where = ELEVATOR_INSERT_BACK;
637 
638 	switch (where) {
639 	case ELEVATOR_INSERT_REQUEUE:
640 	case ELEVATOR_INSERT_FRONT:
641 		rq->rq_flags |= RQF_SOFTBARRIER;
642 		list_add(&rq->queuelist, &q->queue_head);
643 		break;
644 
645 	case ELEVATOR_INSERT_BACK:
646 		rq->rq_flags |= RQF_SOFTBARRIER;
647 		elv_drain_elevator(q);
648 		list_add_tail(&rq->queuelist, &q->queue_head);
649 		/*
650 		 * We kick the queue here for the following reasons.
651 		 * - The elevator might have returned NULL previously
652 		 *   to delay requests and returned them now.  As the
653 		 *   queue wasn't empty before this request, ll_rw_blk
654 		 *   won't run the queue on return, resulting in hang.
655 		 * - Usually, back inserted requests won't be merged
656 		 *   with anything.  There's no point in delaying queue
657 		 *   processing.
658 		 */
659 		__blk_run_queue(q);
660 		break;
661 
662 	case ELEVATOR_INSERT_SORT_MERGE:
663 		/*
664 		 * If we succeed in merging this request with one in the
665 		 * queue already, we are done - rq has now been freed,
666 		 * so no need to do anything further.
667 		 */
668 		if (elv_attempt_insert_merge(q, rq))
669 			break;
670 		/* fall through */
671 	case ELEVATOR_INSERT_SORT:
672 		BUG_ON(blk_rq_is_passthrough(rq));
673 		rq->rq_flags |= RQF_SORTED;
674 		q->nr_sorted++;
675 		if (rq_mergeable(rq)) {
676 			elv_rqhash_add(q, rq);
677 			if (!q->last_merge)
678 				q->last_merge = rq;
679 		}
680 
681 		/*
682 		 * Some ioscheds (cfq) run q->request_fn directly, so
683 		 * rq cannot be accessed after calling
684 		 * elevator_add_req_fn.
685 		 */
686 		q->elevator->type->ops.sq.elevator_add_req_fn(q, rq);
687 		break;
688 
689 	case ELEVATOR_INSERT_FLUSH:
690 		rq->rq_flags |= RQF_SOFTBARRIER;
691 		blk_insert_flush(rq);
692 		break;
693 	default:
694 		printk(KERN_ERR "%s: bad insertion point %d\n",
695 		       __func__, where);
696 		BUG();
697 	}
698 }
699 EXPORT_SYMBOL(__elv_add_request);
700 
elv_add_request(struct request_queue * q,struct request * rq,int where)701 void elv_add_request(struct request_queue *q, struct request *rq, int where)
702 {
703 	unsigned long flags;
704 
705 	spin_lock_irqsave(q->queue_lock, flags);
706 	__elv_add_request(q, rq, where);
707 	spin_unlock_irqrestore(q->queue_lock, flags);
708 }
709 EXPORT_SYMBOL(elv_add_request);
710 
elv_latter_request(struct request_queue * q,struct request * rq)711 struct request *elv_latter_request(struct request_queue *q, struct request *rq)
712 {
713 	struct elevator_queue *e = q->elevator;
714 
715 	if (e->uses_mq && e->type->ops.mq.next_request)
716 		return e->type->ops.mq.next_request(q, rq);
717 	else if (!e->uses_mq && e->type->ops.sq.elevator_latter_req_fn)
718 		return e->type->ops.sq.elevator_latter_req_fn(q, rq);
719 
720 	return NULL;
721 }
722 
elv_former_request(struct request_queue * q,struct request * rq)723 struct request *elv_former_request(struct request_queue *q, struct request *rq)
724 {
725 	struct elevator_queue *e = q->elevator;
726 
727 	if (e->uses_mq && e->type->ops.mq.former_request)
728 		return e->type->ops.mq.former_request(q, rq);
729 	if (!e->uses_mq && e->type->ops.sq.elevator_former_req_fn)
730 		return e->type->ops.sq.elevator_former_req_fn(q, rq);
731 	return NULL;
732 }
733 
elv_set_request(struct request_queue * q,struct request * rq,struct bio * bio,gfp_t gfp_mask)734 int elv_set_request(struct request_queue *q, struct request *rq,
735 		    struct bio *bio, gfp_t gfp_mask)
736 {
737 	struct elevator_queue *e = q->elevator;
738 
739 	if (WARN_ON_ONCE(e->uses_mq))
740 		return 0;
741 
742 	if (e->type->ops.sq.elevator_set_req_fn)
743 		return e->type->ops.sq.elevator_set_req_fn(q, rq, bio, gfp_mask);
744 	return 0;
745 }
746 
elv_put_request(struct request_queue * q,struct request * rq)747 void elv_put_request(struct request_queue *q, struct request *rq)
748 {
749 	struct elevator_queue *e = q->elevator;
750 
751 	if (WARN_ON_ONCE(e->uses_mq))
752 		return;
753 
754 	if (e->type->ops.sq.elevator_put_req_fn)
755 		e->type->ops.sq.elevator_put_req_fn(rq);
756 }
757 
elv_may_queue(struct request_queue * q,unsigned int op)758 int elv_may_queue(struct request_queue *q, unsigned int op)
759 {
760 	struct elevator_queue *e = q->elevator;
761 
762 	if (WARN_ON_ONCE(e->uses_mq))
763 		return 0;
764 
765 	if (e->type->ops.sq.elevator_may_queue_fn)
766 		return e->type->ops.sq.elevator_may_queue_fn(q, op);
767 
768 	return ELV_MQUEUE_MAY;
769 }
770 
elv_completed_request(struct request_queue * q,struct request * rq)771 void elv_completed_request(struct request_queue *q, struct request *rq)
772 {
773 	struct elevator_queue *e = q->elevator;
774 
775 	if (WARN_ON_ONCE(e->uses_mq))
776 		return;
777 
778 	/*
779 	 * request is released from the driver, io must be done
780 	 */
781 	if (blk_account_rq(rq)) {
782 		q->in_flight[rq_is_sync(rq)]--;
783 		if ((rq->rq_flags & RQF_SORTED) &&
784 		    e->type->ops.sq.elevator_completed_req_fn)
785 			e->type->ops.sq.elevator_completed_req_fn(q, rq);
786 	}
787 }
788 
789 #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr)
790 
791 static ssize_t
elv_attr_show(struct kobject * kobj,struct attribute * attr,char * page)792 elv_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
793 {
794 	struct elv_fs_entry *entry = to_elv(attr);
795 	struct elevator_queue *e;
796 	ssize_t error;
797 
798 	if (!entry->show)
799 		return -EIO;
800 
801 	e = container_of(kobj, struct elevator_queue, kobj);
802 	mutex_lock(&e->sysfs_lock);
803 	error = e->type ? entry->show(e, page) : -ENOENT;
804 	mutex_unlock(&e->sysfs_lock);
805 	return error;
806 }
807 
808 static ssize_t
elv_attr_store(struct kobject * kobj,struct attribute * attr,const char * page,size_t length)809 elv_attr_store(struct kobject *kobj, struct attribute *attr,
810 	       const char *page, size_t length)
811 {
812 	struct elv_fs_entry *entry = to_elv(attr);
813 	struct elevator_queue *e;
814 	ssize_t error;
815 
816 	if (!entry->store)
817 		return -EIO;
818 
819 	e = container_of(kobj, struct elevator_queue, kobj);
820 	mutex_lock(&e->sysfs_lock);
821 	error = e->type ? entry->store(e, page, length) : -ENOENT;
822 	mutex_unlock(&e->sysfs_lock);
823 	return error;
824 }
825 
826 static const struct sysfs_ops elv_sysfs_ops = {
827 	.show	= elv_attr_show,
828 	.store	= elv_attr_store,
829 };
830 
831 static struct kobj_type elv_ktype = {
832 	.sysfs_ops	= &elv_sysfs_ops,
833 	.release	= elevator_release,
834 };
835 
836 /*
837  * elv_register_queue is called from either blk_register_queue or
838  * elevator_switch, elevator switch is prevented from being happen
839  * in the two paths, so it is safe to not hold q->sysfs_lock.
840  */
elv_register_queue(struct request_queue * q,bool uevent)841 int elv_register_queue(struct request_queue *q, bool uevent)
842 {
843 	struct elevator_queue *e = q->elevator;
844 	int error;
845 
846 	error = kobject_add(&e->kobj, &q->kobj, "%s", "iosched");
847 	if (!error) {
848 		struct elv_fs_entry *attr = e->type->elevator_attrs;
849 		if (attr) {
850 			while (attr->attr.name) {
851 				if (sysfs_create_file(&e->kobj, &attr->attr))
852 					break;
853 				attr++;
854 			}
855 		}
856 		if (uevent)
857 			kobject_uevent(&e->kobj, KOBJ_ADD);
858 
859 		e->registered = 1;
860 		if (!e->uses_mq && e->type->ops.sq.elevator_registered_fn)
861 			e->type->ops.sq.elevator_registered_fn(q);
862 	}
863 	return error;
864 }
865 
866 /*
867  * elv_unregister_queue is called from either blk_unregister_queue or
868  * elevator_switch, elevator switch is prevented from being happen
869  * in the two paths, so it is safe to not hold q->sysfs_lock.
870  */
elv_unregister_queue(struct request_queue * q)871 void elv_unregister_queue(struct request_queue *q)
872 {
873 	if (q) {
874 		struct elevator_queue *e = q->elevator;
875 
876 		kobject_uevent(&e->kobj, KOBJ_REMOVE);
877 		kobject_del(&e->kobj);
878 
879 		e->registered = 0;
880 	}
881 }
882 
elv_register(struct elevator_type * e)883 int elv_register(struct elevator_type *e)
884 {
885 	char *def = "";
886 
887 	/* create icq_cache if requested */
888 	if (e->icq_size) {
889 		if (WARN_ON(e->icq_size < sizeof(struct io_cq)) ||
890 		    WARN_ON(e->icq_align < __alignof__(struct io_cq)))
891 			return -EINVAL;
892 
893 		snprintf(e->icq_cache_name, sizeof(e->icq_cache_name),
894 			 "%s_io_cq", e->elevator_name);
895 		e->icq_cache = kmem_cache_create(e->icq_cache_name, e->icq_size,
896 						 e->icq_align, 0, NULL);
897 		if (!e->icq_cache)
898 			return -ENOMEM;
899 	}
900 
901 	/* register, don't allow duplicate names */
902 	spin_lock(&elv_list_lock);
903 	if (elevator_find(e->elevator_name, e->uses_mq)) {
904 		spin_unlock(&elv_list_lock);
905 		kmem_cache_destroy(e->icq_cache);
906 		return -EBUSY;
907 	}
908 	list_add_tail(&e->list, &elv_list);
909 	spin_unlock(&elv_list_lock);
910 
911 	/* print pretty message */
912 	if (elevator_match(e, chosen_elevator) ||
913 			(!*chosen_elevator &&
914 			 elevator_match(e, CONFIG_DEFAULT_IOSCHED)))
915 				def = " (default)";
916 
917 	printk(KERN_INFO "io scheduler %s registered%s\n", e->elevator_name,
918 								def);
919 	return 0;
920 }
921 EXPORT_SYMBOL_GPL(elv_register);
922 
elv_unregister(struct elevator_type * e)923 void elv_unregister(struct elevator_type *e)
924 {
925 	/* unregister */
926 	spin_lock(&elv_list_lock);
927 	list_del_init(&e->list);
928 	spin_unlock(&elv_list_lock);
929 
930 	/*
931 	 * Destroy icq_cache if it exists.  icq's are RCU managed.  Make
932 	 * sure all RCU operations are complete before proceeding.
933 	 */
934 	if (e->icq_cache) {
935 		rcu_barrier();
936 		kmem_cache_destroy(e->icq_cache);
937 		e->icq_cache = NULL;
938 	}
939 }
940 EXPORT_SYMBOL_GPL(elv_unregister);
941 
elevator_switch_mq(struct request_queue * q,struct elevator_type * new_e)942 int elevator_switch_mq(struct request_queue *q,
943 			      struct elevator_type *new_e)
944 {
945 	int ret;
946 
947 	lockdep_assert_held(&q->sysfs_lock);
948 
949 	if (q->elevator) {
950 		if (q->elevator->registered)
951 			elv_unregister_queue(q);
952 
953 		ioc_clear_queue(q);
954 		elevator_exit(q, q->elevator);
955 	}
956 
957 	ret = blk_mq_init_sched(q, new_e);
958 	if (ret)
959 		goto out;
960 
961 	if (new_e) {
962 		ret = elv_register_queue(q, true);
963 		if (ret) {
964 			elevator_exit(q, q->elevator);
965 			goto out;
966 		}
967 	}
968 
969 	if (new_e)
970 		blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name);
971 	else
972 		blk_add_trace_msg(q, "elv switch: none");
973 
974 out:
975 	return ret;
976 }
977 
978 /*
979  * For blk-mq devices, we default to using mq-deadline, if available, for single
980  * queue devices.  If deadline isn't available OR we have multiple queues,
981  * default to "none".
982  */
elevator_init_mq(struct request_queue * q)983 int elevator_init_mq(struct request_queue *q)
984 {
985 	struct elevator_type *e;
986 	int err = 0;
987 
988 	if (q->nr_hw_queues != 1)
989 		return 0;
990 
991 	WARN_ON_ONCE(test_bit(QUEUE_FLAG_REGISTERED, &q->queue_flags));
992 
993 	if (unlikely(q->elevator))
994 		goto out;
995 
996 	e = elevator_get(q, "mq-deadline", false);
997 	if (!e)
998 		goto out;
999 
1000 	err = blk_mq_init_sched(q, e);
1001 	if (err)
1002 		elevator_put(e);
1003 out:
1004 	return err;
1005 }
1006 
1007 
1008 /*
1009  * switch to new_e io scheduler. be careful not to introduce deadlocks -
1010  * we don't free the old io scheduler, before we have allocated what we
1011  * need for the new one. this way we have a chance of going back to the old
1012  * one, if the new one fails init for some reason.
1013  */
elevator_switch(struct request_queue * q,struct elevator_type * new_e)1014 static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
1015 {
1016 	struct elevator_queue *old = q->elevator;
1017 	bool old_registered = false;
1018 	int err;
1019 
1020 	lockdep_assert_held(&q->sysfs_lock);
1021 
1022 	if (q->mq_ops) {
1023 		blk_mq_freeze_queue(q);
1024 		blk_mq_quiesce_queue(q);
1025 
1026 		err = elevator_switch_mq(q, new_e);
1027 
1028 		blk_mq_unquiesce_queue(q);
1029 		blk_mq_unfreeze_queue(q);
1030 
1031 		return err;
1032 	}
1033 
1034 	/*
1035 	 * Turn on BYPASS and drain all requests w/ elevator private data.
1036 	 * Block layer doesn't call into a quiesced elevator - all requests
1037 	 * are directly put on the dispatch list without elevator data
1038 	 * using INSERT_BACK.  All requests have SOFTBARRIER set and no
1039 	 * merge happens either.
1040 	 */
1041 	if (old) {
1042 		old_registered = old->registered;
1043 
1044 		blk_queue_bypass_start(q);
1045 
1046 		/* unregister and clear all auxiliary data of the old elevator */
1047 		if (old_registered)
1048 			elv_unregister_queue(q);
1049 
1050 		ioc_clear_queue(q);
1051 	}
1052 
1053 	/* allocate, init and register new elevator */
1054 	err = new_e->ops.sq.elevator_init_fn(q, new_e);
1055 	if (err)
1056 		goto fail_init;
1057 
1058 	err = elv_register_queue(q, true);
1059 	if (err)
1060 		goto fail_register;
1061 
1062 	/* done, kill the old one and finish */
1063 	if (old) {
1064 		elevator_exit(q, old);
1065 		blk_queue_bypass_end(q);
1066 	}
1067 
1068 	blk_add_trace_msg(q, "elv switch: %s", new_e->elevator_name);
1069 
1070 	return 0;
1071 
1072 fail_register:
1073 	elevator_exit(q, q->elevator);
1074 fail_init:
1075 	/* switch failed, restore and re-register old elevator */
1076 	if (old) {
1077 		q->elevator = old;
1078 		elv_register_queue(q, true);
1079 		blk_queue_bypass_end(q);
1080 	}
1081 
1082 	return err;
1083 }
1084 
1085 /*
1086  * Switch this queue to the given IO scheduler.
1087  */
__elevator_change(struct request_queue * q,const char * name)1088 static int __elevator_change(struct request_queue *q, const char *name)
1089 {
1090 	char elevator_name[ELV_NAME_MAX];
1091 	struct elevator_type *e;
1092 
1093 	/* Make sure queue is not in the middle of being removed */
1094 	if (!blk_queue_registered(q))
1095 		return -ENOENT;
1096 
1097 	/*
1098 	 * Special case for mq, turn off scheduling
1099 	 */
1100 	if (q->mq_ops && !strncmp(name, "none", 4))
1101 		return elevator_switch(q, NULL);
1102 
1103 	strlcpy(elevator_name, name, sizeof(elevator_name));
1104 	e = elevator_get(q, strstrip(elevator_name), true);
1105 	if (!e)
1106 		return -EINVAL;
1107 
1108 	if (q->elevator && elevator_match(q->elevator->type, elevator_name)) {
1109 		elevator_put(e);
1110 		return 0;
1111 	}
1112 
1113 	return elevator_switch(q, e);
1114 }
1115 
elv_support_iosched(struct request_queue * q)1116 static inline bool elv_support_iosched(struct request_queue *q)
1117 {
1118 	if (q->mq_ops && q->tag_set && (q->tag_set->flags &
1119 				BLK_MQ_F_NO_SCHED))
1120 		return false;
1121 	return true;
1122 }
1123 
elv_iosched_store(struct request_queue * q,const char * name,size_t count)1124 ssize_t elv_iosched_store(struct request_queue *q, const char *name,
1125 			  size_t count)
1126 {
1127 	int ret;
1128 
1129 	if (!(q->mq_ops || q->request_fn) || !elv_support_iosched(q))
1130 		return count;
1131 
1132 	ret = __elevator_change(q, name);
1133 	if (!ret)
1134 		return count;
1135 
1136 	return ret;
1137 }
1138 
elv_iosched_show(struct request_queue * q,char * name)1139 ssize_t elv_iosched_show(struct request_queue *q, char *name)
1140 {
1141 	struct elevator_queue *e = q->elevator;
1142 	struct elevator_type *elv = NULL;
1143 	struct elevator_type *__e;
1144 	bool uses_mq = q->mq_ops != NULL;
1145 	int len = 0;
1146 
1147 	if (!queue_is_rq_based(q))
1148 		return sprintf(name, "none\n");
1149 
1150 	if (!q->elevator)
1151 		len += sprintf(name+len, "[none] ");
1152 	else
1153 		elv = e->type;
1154 
1155 	spin_lock(&elv_list_lock);
1156 	list_for_each_entry(__e, &elv_list, list) {
1157 		if (elv && elevator_match(elv, __e->elevator_name) &&
1158 		    (__e->uses_mq == uses_mq)) {
1159 			len += sprintf(name+len, "[%s] ", elv->elevator_name);
1160 			continue;
1161 		}
1162 		if (__e->uses_mq && q->mq_ops && elv_support_iosched(q))
1163 			len += sprintf(name+len, "%s ", __e->elevator_name);
1164 		else if (!__e->uses_mq && !q->mq_ops)
1165 			len += sprintf(name+len, "%s ", __e->elevator_name);
1166 	}
1167 	spin_unlock(&elv_list_lock);
1168 
1169 	if (q->mq_ops && q->elevator)
1170 		len += sprintf(name+len, "none");
1171 
1172 	len += sprintf(len+name, "\n");
1173 	return len;
1174 }
1175 
elv_rb_former_request(struct request_queue * q,struct request * rq)1176 struct request *elv_rb_former_request(struct request_queue *q,
1177 				      struct request *rq)
1178 {
1179 	struct rb_node *rbprev = rb_prev(&rq->rb_node);
1180 
1181 	if (rbprev)
1182 		return rb_entry_rq(rbprev);
1183 
1184 	return NULL;
1185 }
1186 EXPORT_SYMBOL(elv_rb_former_request);
1187 
elv_rb_latter_request(struct request_queue * q,struct request * rq)1188 struct request *elv_rb_latter_request(struct request_queue *q,
1189 				      struct request *rq)
1190 {
1191 	struct rb_node *rbnext = rb_next(&rq->rb_node);
1192 
1193 	if (rbnext)
1194 		return rb_entry_rq(rbnext);
1195 
1196 	return NULL;
1197 }
1198 EXPORT_SYMBOL(elv_rb_latter_request);
1199