1Block io priorities
2===================
3
4
5Intro
6-----
7
8With the introduction of cfq v3 (aka cfq-ts or time sliced cfq), basic io
9priorities are supported for reads on files.  This enables users to io nice
10processes or process groups, similar to what has been possible with cpu
11scheduling for ages.  This document mainly details the current possibilities
12with cfq; other io schedulers do not support io priorities thus far.
13
14Scheduling classes
15------------------
16
17CFQ implements three generic scheduling classes that determine how io is
18served for a process.
19
20IOPRIO_CLASS_RT: This is the realtime io class. This scheduling class is given
21higher priority than any other in the system, processes from this class are
22given first access to the disk every time. Thus it needs to be used with some
23care, one io RT process can starve the entire system. Within the RT class,
24there are 8 levels of class data that determine exactly how much time this
25process needs the disk for on each service. In the future this might change
26to be more directly mappable to performance, by passing in a wanted data
27rate instead.
28
29IOPRIO_CLASS_BE: This is the best-effort scheduling class, which is the default
30for any process that hasn't set a specific io priority. The class data
31determines how much io bandwidth the process will get, it's directly mappable
32to the cpu nice levels just more coarsely implemented. 0 is the highest
33BE prio level, 7 is the lowest. The mapping between cpu nice level and io
34nice level is determined as: io_nice = (cpu_nice + 20) / 5.
35
36IOPRIO_CLASS_IDLE: This is the idle scheduling class, processes running at this
37level only get io time when no one else needs the disk. The idle class has no
38class data, since it doesn't really apply here.
39
40Tools
41-----
42
43See below for a sample ionice tool. Usage:
44
45# ionice -c<class> -n<level> -p<pid>
46
47If pid isn't given, the current process is assumed. IO priority settings
48are inherited on fork, so you can use ionice to start the process at a given
49level:
50
51# ionice -c2 -n0 /bin/ls
52
53will run ls at the best-effort scheduling class at the highest priority.
54For a running process, you can give the pid instead:
55
56# ionice -c1 -n2 -p100
57
58will change pid 100 to run at the realtime scheduling class, at priority 2.
59
60---> snip ionice.c tool <---
61
62#include <stdio.h>
63#include <stdlib.h>
64#include <errno.h>
65#include <getopt.h>
66#include <unistd.h>
67#include <sys/ptrace.h>
68#include <asm/unistd.h>
69
70extern int sys_ioprio_set(int, int, int);
71extern int sys_ioprio_get(int, int);
72
73#if defined(__i386__)
74#define __NR_ioprio_set		289
75#define __NR_ioprio_get		290
76#elif defined(__ppc__)
77#define __NR_ioprio_set		273
78#define __NR_ioprio_get		274
79#elif defined(__x86_64__)
80#define __NR_ioprio_set		251
81#define __NR_ioprio_get		252
82#elif defined(__ia64__)
83#define __NR_ioprio_set		1274
84#define __NR_ioprio_get		1275
85#else
86#error "Unsupported arch"
87#endif
88
89static inline int ioprio_set(int which, int who, int ioprio)
90{
91	return syscall(__NR_ioprio_set, which, who, ioprio);
92}
93
94static inline int ioprio_get(int which, int who)
95{
96	return syscall(__NR_ioprio_get, which, who);
97}
98
99enum {
100	IOPRIO_CLASS_NONE,
101	IOPRIO_CLASS_RT,
102	IOPRIO_CLASS_BE,
103	IOPRIO_CLASS_IDLE,
104};
105
106enum {
107	IOPRIO_WHO_PROCESS = 1,
108	IOPRIO_WHO_PGRP,
109	IOPRIO_WHO_USER,
110};
111
112#define IOPRIO_CLASS_SHIFT	13
113
114const char *to_prio[] = { "none", "realtime", "best-effort", "idle", };
115
116int main(int argc, char *argv[])
117{
118	int ioprio = 4, set = 0, ioprio_class = IOPRIO_CLASS_BE;
119	int c, pid = 0;
120
121	while ((c = getopt(argc, argv, "+n:c:p:")) != EOF) {
122		switch (c) {
123		case 'n':
124			ioprio = strtol(optarg, NULL, 10);
125			set = 1;
126			break;
127		case 'c':
128			ioprio_class = strtol(optarg, NULL, 10);
129			set = 1;
130			break;
131		case 'p':
132			pid = strtol(optarg, NULL, 10);
133			break;
134		}
135	}
136
137	switch (ioprio_class) {
138		case IOPRIO_CLASS_NONE:
139			ioprio_class = IOPRIO_CLASS_BE;
140			break;
141		case IOPRIO_CLASS_RT:
142		case IOPRIO_CLASS_BE:
143			break;
144		case IOPRIO_CLASS_IDLE:
145			ioprio = 7;
146			break;
147		default:
148			printf("bad prio class %d\n", ioprio_class);
149			return 1;
150	}
151
152	if (!set) {
153		if (!pid && argv[optind])
154			pid = strtol(argv[optind], NULL, 10);
155
156		ioprio = ioprio_get(IOPRIO_WHO_PROCESS, pid);
157
158		printf("pid=%d, %d\n", pid, ioprio);
159
160		if (ioprio == -1)
161			perror("ioprio_get");
162		else {
163			ioprio_class = ioprio >> IOPRIO_CLASS_SHIFT;
164			ioprio = ioprio & 0xff;
165			printf("%s: prio %d\n", to_prio[ioprio_class], ioprio);
166		}
167	} else {
168		if (ioprio_set(IOPRIO_WHO_PROCESS, pid, ioprio | ioprio_class << IOPRIO_CLASS_SHIFT) == -1) {
169			perror("ioprio_set");
170			return 1;
171		}
172
173		if (argv[optind])
174			execvp(argv[optind], &argv[optind]);
175	}
176
177	return 0;
178}
179
180---> snip ionice.c tool <---
181
182
183March 11 2005, Jens Axboe <jens.axboe@oracle.com>
184