1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * Copyright (C) 2017 Google, Inc. 4 * 5 */ 6 7 #ifndef _UAPI_LINUX_VSOC_SHM_H 8 #define _UAPI_LINUX_VSOC_SHM_H 9 10 #include <linux/types.h> 11 12 /** 13 * A permission is a token that permits a receiver to read and/or write an area 14 * of memory within a Vsoc region. 15 * 16 * An fd_scoped permission grants both read and write access, and can be 17 * attached to a file description (see open(2)). 18 * Ownership of the area can then be shared by passing a file descriptor 19 * among processes. 20 * 21 * begin_offset and end_offset define the area of memory that is controlled by 22 * the permission. owner_offset points to a word, also in shared memory, that 23 * controls ownership of the area. 24 * 25 * ownership of the region expires when the associated file description is 26 * released. 27 * 28 * At most one permission can be attached to each file description. 29 * 30 * This is useful when implementing HALs like gralloc that scope and pass 31 * ownership of shared resources via file descriptors. 32 * 33 * The caller is responsibe for doing any fencing. 34 * 35 * The calling process will normally identify a currently free area of 36 * memory. It will construct a proposed fd_scoped_permission_arg structure: 37 * 38 * begin_offset and end_offset describe the area being claimed 39 * 40 * owner_offset points to the location in shared memory that indicates the 41 * owner of the area. 42 * 43 * owned_value is the value that will be stored in owner_offset iff the 44 * permission can be granted. It must be different than VSOC_REGION_FREE. 45 * 46 * Two fd_scoped_permission structures are compatible if they vary only by 47 * their owned_value fields. 48 * 49 * The driver ensures that, for any group of simultaneous callers proposing 50 * compatible fd_scoped_permissions, it will accept exactly one of the 51 * propopsals. The other callers will get a failure with errno of EAGAIN. 52 * 53 * A process receiving a file descriptor can identify the region being 54 * granted using the VSOC_GET_FD_SCOPED_PERMISSION ioctl. 55 */ 56 struct fd_scoped_permission { 57 __u32 begin_offset; 58 __u32 end_offset; 59 __u32 owner_offset; 60 __u32 owned_value; 61 }; 62 63 /* 64 * This value represents a free area of memory. The driver expects to see this 65 * value at owner_offset when creating a permission otherwise it will not do it, 66 * and will write this value back once the permission is no longer needed. 67 */ 68 #define VSOC_REGION_FREE ((__u32)0) 69 70 /** 71 * ioctl argument for VSOC_CREATE_FD_SCOPE_PERMISSION 72 */ 73 struct fd_scoped_permission_arg { 74 struct fd_scoped_permission perm; 75 __s32 managed_region_fd; 76 }; 77 78 #define VSOC_NODE_FREE ((__u32)0) 79 80 /* 81 * Describes a signal table in shared memory. Each non-zero entry in the 82 * table indicates that the receiver should signal the futex at the given 83 * offset. Offsets are relative to the region, not the shared memory window. 84 * 85 * interrupt_signalled_offset is used to reliably signal interrupts across the 86 * vmm boundary. There are two roles: transmitter and receiver. For example, 87 * in the host_to_guest_signal_table the host is the transmitter and the 88 * guest is the receiver. The protocol is as follows: 89 * 90 * 1. The transmitter should convert the offset of the futex to an offset 91 * in the signal table [0, (1 << num_nodes_lg2)) 92 * The transmitter can choose any appropriate hashing algorithm, including 93 * hash = futex_offset & ((1 << num_nodes_lg2) - 1) 94 * 95 * 3. The transmitter should atomically compare and swap futex_offset with 0 96 * at hash. There are 3 possible outcomes 97 * a. The swap fails because the futex_offset is already in the table. 98 * The transmitter should stop. 99 * b. Some other offset is in the table. This is a hash collision. The 100 * transmitter should move to another table slot and try again. One 101 * possible algorithm: 102 * hash = (hash + 1) & ((1 << num_nodes_lg2) - 1) 103 * c. The swap worked. Continue below. 104 * 105 * 3. The transmitter atomically swaps 1 with the value at the 106 * interrupt_signalled_offset. There are two outcomes: 107 * a. The prior value was 1. In this case an interrupt has already been 108 * posted. The transmitter is done. 109 * b. The prior value was 0, indicating that the receiver may be sleeping. 110 * The transmitter will issue an interrupt. 111 * 112 * 4. On waking the receiver immediately exchanges a 0 with the 113 * interrupt_signalled_offset. If it receives a 0 then this a spurious 114 * interrupt. That may occasionally happen in the current protocol, but 115 * should be rare. 116 * 117 * 5. The receiver scans the signal table by atomicaly exchanging 0 at each 118 * location. If a non-zero offset is returned from the exchange the 119 * receiver wakes all sleepers at the given offset: 120 * futex((int*)(region_base + old_value), FUTEX_WAKE, MAX_INT); 121 * 122 * 6. The receiver thread then does a conditional wait, waking immediately 123 * if the value at interrupt_signalled_offset is non-zero. This catches cases 124 * here additional signals were posted while the table was being scanned. 125 * On the guest the wait is handled via the VSOC_WAIT_FOR_INCOMING_INTERRUPT 126 * ioctl. 127 */ 128 struct vsoc_signal_table_layout { 129 /* log_2(Number of signal table entries) */ 130 __u32 num_nodes_lg2; 131 /* 132 * Offset to the first signal table entry relative to the start of the 133 * region 134 */ 135 __u32 futex_uaddr_table_offset; 136 /* 137 * Offset to an atomic_t / atomic uint32_t. A non-zero value indicates 138 * that one or more offsets are currently posted in the table. 139 * semi-unique access to an entry in the table 140 */ 141 __u32 interrupt_signalled_offset; 142 }; 143 144 #define VSOC_REGION_WHOLE ((__s32)0) 145 #define VSOC_DEVICE_NAME_SZ 16 146 147 /** 148 * Each HAL would (usually) talk to a single device region 149 * Mulitple entities care about these regions: 150 * - The ivshmem_server will populate the regions in shared memory 151 * - The guest kernel will read the region, create minor device nodes, and 152 * allow interested parties to register for FUTEX_WAKE events in the region 153 * - HALs will access via the minor device nodes published by the guest kernel 154 * - Host side processes will access the region via the ivshmem_server: 155 * 1. Pass name to ivshmem_server at a UNIX socket 156 * 2. ivshmemserver will reply with 2 fds: 157 * - host->guest doorbell fd 158 * - guest->host doorbell fd 159 * - fd for the shared memory region 160 * - region offset 161 * 3. Start a futex receiver thread on the doorbell fd pointed at the 162 * signal_nodes 163 */ 164 struct vsoc_device_region { 165 __u16 current_version; 166 __u16 min_compatible_version; 167 __u32 region_begin_offset; 168 __u32 region_end_offset; 169 __u32 offset_of_region_data; 170 struct vsoc_signal_table_layout guest_to_host_signal_table; 171 struct vsoc_signal_table_layout host_to_guest_signal_table; 172 /* Name of the device. Must always be terminated with a '\0', so 173 * the longest supported device name is 15 characters. 174 */ 175 char device_name[VSOC_DEVICE_NAME_SZ]; 176 /* There are two ways that permissions to access regions are handled: 177 * - When subdivided_by is VSOC_REGION_WHOLE, any process that can 178 * open the device node for the region gains complete access to it. 179 * - When subdivided is set processes that open the region cannot 180 * access it. Access to a sub-region must be established by invoking 181 * the VSOC_CREATE_FD_SCOPE_PERMISSION ioctl on the region 182 * referenced in subdivided_by, providing a fileinstance 183 * (represented by a fd) opened on this region. 184 */ 185 __u32 managed_by; 186 }; 187 188 /* 189 * The vsoc layout descriptor. 190 * The first 4K should be reserved for the shm header and region descriptors. 191 * The regions should be page aligned. 192 */ 193 194 struct vsoc_shm_layout_descriptor { 195 __u16 major_version; 196 __u16 minor_version; 197 198 /* size of the shm. This may be redundant but nice to have */ 199 __u32 size; 200 201 /* number of shared memory regions */ 202 __u32 region_count; 203 204 /* The offset to the start of region descriptors */ 205 __u32 vsoc_region_desc_offset; 206 }; 207 208 /* 209 * This specifies the current version that should be stored in 210 * vsoc_shm_layout_descriptor.major_version and 211 * vsoc_shm_layout_descriptor.minor_version. 212 * It should be updated only if the vsoc_device_region and 213 * vsoc_shm_layout_descriptor structures have changed. 214 * Versioning within each region is transferred 215 * via the min_compatible_version and current_version fields in 216 * vsoc_device_region. The driver does not consult these fields: they are left 217 * for the HALs and host processes and will change independently of the layout 218 * version. 219 */ 220 #define CURRENT_VSOC_LAYOUT_MAJOR_VERSION 2 221 #define CURRENT_VSOC_LAYOUT_MINOR_VERSION 0 222 223 #define VSOC_CREATE_FD_SCOPED_PERMISSION \ 224 _IOW(0xF5, 0, struct fd_scoped_permission) 225 #define VSOC_GET_FD_SCOPED_PERMISSION _IOR(0xF5, 1, struct fd_scoped_permission) 226 227 /* 228 * This is used to signal the host to scan the guest_to_host_signal_table 229 * for new futexes to wake. This sends an interrupt if one is not already 230 * in flight. 231 */ 232 #define VSOC_MAYBE_SEND_INTERRUPT_TO_HOST _IO(0xF5, 2) 233 234 /* 235 * When this returns the guest will scan host_to_guest_signal_table to 236 * check for new futexes to wake. 237 */ 238 /* TODO(ghartman): Consider moving this to the bottom half */ 239 #define VSOC_WAIT_FOR_INCOMING_INTERRUPT _IO(0xF5, 3) 240 241 /* 242 * Guest HALs will use this to retrieve the region description after 243 * opening their device node. 244 */ 245 #define VSOC_DESCRIBE_REGION _IOR(0xF5, 4, struct vsoc_device_region) 246 247 /* 248 * Wake any threads that may be waiting for a host interrupt on this region. 249 * This is mostly used during shutdown. 250 */ 251 #define VSOC_SELF_INTERRUPT _IO(0xF5, 5) 252 253 /* 254 * This is used to signal the host to scan the guest_to_host_signal_table 255 * for new futexes to wake. This sends an interrupt unconditionally. 256 */ 257 #define VSOC_SEND_INTERRUPT_TO_HOST _IO(0xF5, 6) 258 259 enum wait_types { 260 VSOC_WAIT_UNDEFINED = 0, 261 VSOC_WAIT_IF_EQUAL = 1, 262 VSOC_WAIT_IF_EQUAL_TIMEOUT = 2 263 }; 264 265 /* 266 * Wait for a condition to be true 267 * 268 * Note, this is sized and aligned so the 32 bit and 64 bit layouts are 269 * identical. 270 */ 271 struct vsoc_cond_wait { 272 /* Input: Offset of the 32 bit word to check */ 273 __u32 offset; 274 /* Input: Value that will be compared with the offset */ 275 __u32 value; 276 /* Monotonic time to wake at in seconds */ 277 __u64 wake_time_sec; 278 /* Input: Monotonic time to wait in nanoseconds */ 279 __u32 wake_time_nsec; 280 /* Input: Type of wait */ 281 __u32 wait_type; 282 /* Output: Number of times the thread woke before returning. */ 283 __u32 wakes; 284 /* Ensure that we're 8-byte aligned and 8 byte length for 32/64 bit 285 * compatibility. 286 */ 287 __u32 reserved_1; 288 }; 289 290 #define VSOC_COND_WAIT _IOWR(0xF5, 7, struct vsoc_cond_wait) 291 292 /* Wake any local threads waiting at the offset given in arg */ 293 #define VSOC_COND_WAKE _IO(0xF5, 8) 294 295 #endif /* _UAPI_LINUX_VSOC_SHM_H */