root/net/vmw_vsock/hyperv_transport.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. is_valid_srv_id
  2. get_port_by_srv_id
  3. hvs_addr_init
  4. hvs_set_channel_pending_send_size
  5. hvs_channel_readable
  6. hvs_channel_readable_payload
  7. hvs_channel_writable_bytes
  8. hvs_send_data
  9. hvs_channel_cb
  10. hvs_do_close_lock_held
  11. hvs_close_connection
  12. hvs_open_connection
  13. hvs_get_local_cid
  14. hvs_sock_init
  15. hvs_connect
  16. hvs_shutdown_lock_held
  17. hvs_shutdown
  18. hvs_close_timeout
  19. hvs_close_lock_held
  20. hvs_release
  21. hvs_destruct
  22. hvs_dgram_bind
  23. hvs_dgram_dequeue
  24. hvs_dgram_enqueue
  25. hvs_dgram_allow
  26. hvs_update_recv_data
  27. hvs_stream_dequeue
  28. hvs_stream_enqueue
  29. hvs_stream_has_data
  30. hvs_stream_has_space
  31. hvs_stream_rcvhiwat
  32. hvs_stream_is_active
  33. hvs_stream_allow
  34. hvs_notify_poll_in
  35. hvs_notify_poll_out
  36. hvs_notify_recv_init
  37. hvs_notify_recv_pre_block
  38. hvs_notify_recv_pre_dequeue
  39. hvs_notify_recv_post_dequeue
  40. hvs_notify_send_init
  41. hvs_notify_send_pre_block
  42. hvs_notify_send_pre_enqueue
  43. hvs_notify_send_post_enqueue
  44. hvs_set_buffer_size
  45. hvs_set_min_buffer_size
  46. hvs_set_max_buffer_size
  47. hvs_get_buffer_size
  48. hvs_get_min_buffer_size
  49. hvs_get_max_buffer_size
  50. hvs_probe
  51. hvs_remove
  52. hvs_init
  53. hvs_exit

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * Hyper-V transport for vsock
   4  *
   5  * Hyper-V Sockets supplies a byte-stream based communication mechanism
   6  * between the host and the VM. This driver implements the necessary
   7  * support in the VM by introducing the new vsock transport.
   8  *
   9  * Copyright (c) 2017, Microsoft Corporation.
  10  */
  11 #include <linux/module.h>
  12 #include <linux/vmalloc.h>
  13 #include <linux/hyperv.h>
  14 #include <net/sock.h>
  15 #include <net/af_vsock.h>
  16 
  17 /* Older (VMBUS version 'VERSION_WIN10' or before) Windows hosts have some
  18  * stricter requirements on the hv_sock ring buffer size of six 4K pages. Newer
  19  * hosts don't have this limitation; but, keep the defaults the same for compat.
  20  */
  21 #define PAGE_SIZE_4K            4096
  22 #define RINGBUFFER_HVS_RCV_SIZE (PAGE_SIZE_4K * 6)
  23 #define RINGBUFFER_HVS_SND_SIZE (PAGE_SIZE_4K * 6)
  24 #define RINGBUFFER_HVS_MAX_SIZE (PAGE_SIZE_4K * 64)
  25 
  26 /* The MTU is 16KB per the host side's design */
  27 #define HVS_MTU_SIZE            (1024 * 16)
  28 
  29 /* How long to wait for graceful shutdown of a connection */
  30 #define HVS_CLOSE_TIMEOUT (8 * HZ)
  31 
  32 struct vmpipe_proto_header {
  33         u32 pkt_type;
  34         u32 data_size;
  35 };
  36 
  37 /* For recv, we use the VMBus in-place packet iterator APIs to directly copy
  38  * data from the ringbuffer into the userspace buffer.
  39  */
  40 struct hvs_recv_buf {
  41         /* The header before the payload data */
  42         struct vmpipe_proto_header hdr;
  43 
  44         /* The payload */
  45         u8 data[HVS_MTU_SIZE];
  46 };
  47 
  48 /* We can send up to HVS_MTU_SIZE bytes of payload to the host, but let's use
  49  * a smaller size, i.e. HVS_SEND_BUF_SIZE, to maximize concurrency between the
  50  * guest and the host processing as one VMBUS packet is the smallest processing
  51  * unit.
  52  *
  53  * Note: the buffer can be eliminated in the future when we add new VMBus
  54  * ringbuffer APIs that allow us to directly copy data from userspace buffer
  55  * to VMBus ringbuffer.
  56  */
  57 #define HVS_SEND_BUF_SIZE (PAGE_SIZE_4K - sizeof(struct vmpipe_proto_header))
  58 
  59 struct hvs_send_buf {
  60         /* The header before the payload data */
  61         struct vmpipe_proto_header hdr;
  62 
  63         /* The payload */
  64         u8 data[HVS_SEND_BUF_SIZE];
  65 };
  66 
  67 #define HVS_HEADER_LEN  (sizeof(struct vmpacket_descriptor) + \
  68                          sizeof(struct vmpipe_proto_header))
  69 
  70 /* See 'prev_indices' in hv_ringbuffer_read(), hv_ringbuffer_write(), and
  71  * __hv_pkt_iter_next().
  72  */
  73 #define VMBUS_PKT_TRAILER_SIZE  (sizeof(u64))
  74 
  75 #define HVS_PKT_LEN(payload_len)        (HVS_HEADER_LEN + \
  76                                          ALIGN((payload_len), 8) + \
  77                                          VMBUS_PKT_TRAILER_SIZE)
  78 
  79 union hvs_service_id {
  80         guid_t  srv_id;
  81 
  82         struct {
  83                 unsigned int svm_port;
  84                 unsigned char b[sizeof(guid_t) - sizeof(unsigned int)];
  85         };
  86 };
  87 
  88 /* Per-socket state (accessed via vsk->trans) */
  89 struct hvsock {
  90         struct vsock_sock *vsk;
  91 
  92         guid_t vm_srv_id;
  93         guid_t host_srv_id;
  94 
  95         struct vmbus_channel *chan;
  96         struct vmpacket_descriptor *recv_desc;
  97 
  98         /* The length of the payload not delivered to userland yet */
  99         u32 recv_data_len;
 100         /* The offset of the payload */
 101         u32 recv_data_off;
 102 
 103         /* Have we sent the zero-length packet (FIN)? */
 104         bool fin_sent;
 105 };
 106 
 107 /* In the VM, we support Hyper-V Sockets with AF_VSOCK, and the endpoint is
 108  * <cid, port> (see struct sockaddr_vm). Note: cid is not really used here:
 109  * when we write apps to connect to the host, we can only use VMADDR_CID_ANY
 110  * or VMADDR_CID_HOST (both are equivalent) as the remote cid, and when we
 111  * write apps to bind() & listen() in the VM, we can only use VMADDR_CID_ANY
 112  * as the local cid.
 113  *
 114  * On the host, Hyper-V Sockets are supported by Winsock AF_HYPERV:
 115  * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/user-
 116  * guide/make-integration-service, and the endpoint is <VmID, ServiceId> with
 117  * the below sockaddr:
 118  *
 119  * struct SOCKADDR_HV
 120  * {
 121  *    ADDRESS_FAMILY Family;
 122  *    USHORT Reserved;
 123  *    GUID VmId;
 124  *    GUID ServiceId;
 125  * };
 126  * Note: VmID is not used by Linux VM and actually it isn't transmitted via
 127  * VMBus, because here it's obvious the host and the VM can easily identify
 128  * each other. Though the VmID is useful on the host, especially in the case
 129  * of Windows container, Linux VM doesn't need it at all.
 130  *
 131  * To make use of the AF_VSOCK infrastructure in Linux VM, we have to limit
 132  * the available GUID space of SOCKADDR_HV so that we can create a mapping
 133  * between AF_VSOCK port and SOCKADDR_HV Service GUID. The rule of writing
 134  * Hyper-V Sockets apps on the host and in Linux VM is:
 135  *
 136  ****************************************************************************
 137  * The only valid Service GUIDs, from the perspectives of both the host and *
 138  * Linux VM, that can be connected by the other end, must conform to this   *
 139  * format: <port>-facb-11e6-bd58-64006a7986d3.                              *
 140  ****************************************************************************
 141  *
 142  * When we write apps on the host to connect(), the GUID ServiceID is used.
 143  * When we write apps in Linux VM to connect(), we only need to specify the
 144  * port and the driver will form the GUID and use that to request the host.
 145  *
 146  */
 147 
 148 /* 00000000-facb-11e6-bd58-64006a7986d3 */
 149 static const guid_t srv_id_template =
 150         GUID_INIT(0x00000000, 0xfacb, 0x11e6, 0xbd, 0x58,
 151                   0x64, 0x00, 0x6a, 0x79, 0x86, 0xd3);
 152 
 153 static bool is_valid_srv_id(const guid_t *id)
 154 {
 155         return !memcmp(&id->b[4], &srv_id_template.b[4], sizeof(guid_t) - 4);
 156 }
 157 
 158 static unsigned int get_port_by_srv_id(const guid_t *svr_id)
 159 {
 160         return *((unsigned int *)svr_id);
 161 }
 162 
 163 static void hvs_addr_init(struct sockaddr_vm *addr, const guid_t *svr_id)
 164 {
 165         unsigned int port = get_port_by_srv_id(svr_id);
 166 
 167         vsock_addr_init(addr, VMADDR_CID_ANY, port);
 168 }
 169 
 170 static void hvs_set_channel_pending_send_size(struct vmbus_channel *chan)
 171 {
 172         set_channel_pending_send_size(chan,
 173                                       HVS_PKT_LEN(HVS_SEND_BUF_SIZE));
 174 
 175         virt_mb();
 176 }
 177 
 178 static bool hvs_channel_readable(struct vmbus_channel *chan)
 179 {
 180         u32 readable = hv_get_bytes_to_read(&chan->inbound);
 181 
 182         /* 0-size payload means FIN */
 183         return readable >= HVS_PKT_LEN(0);
 184 }
 185 
 186 static int hvs_channel_readable_payload(struct vmbus_channel *chan)
 187 {
 188         u32 readable = hv_get_bytes_to_read(&chan->inbound);
 189 
 190         if (readable > HVS_PKT_LEN(0)) {
 191                 /* At least we have 1 byte to read. We don't need to return
 192                  * the exact readable bytes: see vsock_stream_recvmsg() ->
 193                  * vsock_stream_has_data().
 194                  */
 195                 return 1;
 196         }
 197 
 198         if (readable == HVS_PKT_LEN(0)) {
 199                 /* 0-size payload means FIN */
 200                 return 0;
 201         }
 202 
 203         /* No payload or FIN */
 204         return -1;
 205 }
 206 
 207 static size_t hvs_channel_writable_bytes(struct vmbus_channel *chan)
 208 {
 209         u32 writeable = hv_get_bytes_to_write(&chan->outbound);
 210         size_t ret;
 211 
 212         /* The ringbuffer mustn't be 100% full, and we should reserve a
 213          * zero-length-payload packet for the FIN: see hv_ringbuffer_write()
 214          * and hvs_shutdown().
 215          */
 216         if (writeable <= HVS_PKT_LEN(1) + HVS_PKT_LEN(0))
 217                 return 0;
 218 
 219         ret = writeable - HVS_PKT_LEN(1) - HVS_PKT_LEN(0);
 220 
 221         return round_down(ret, 8);
 222 }
 223 
 224 static int hvs_send_data(struct vmbus_channel *chan,
 225                          struct hvs_send_buf *send_buf, size_t to_write)
 226 {
 227         send_buf->hdr.pkt_type = 1;
 228         send_buf->hdr.data_size = to_write;
 229         return vmbus_sendpacket(chan, &send_buf->hdr,
 230                                 sizeof(send_buf->hdr) + to_write,
 231                                 0, VM_PKT_DATA_INBAND, 0);
 232 }
 233 
 234 static void hvs_channel_cb(void *ctx)
 235 {
 236         struct sock *sk = (struct sock *)ctx;
 237         struct vsock_sock *vsk = vsock_sk(sk);
 238         struct hvsock *hvs = vsk->trans;
 239         struct vmbus_channel *chan = hvs->chan;
 240 
 241         if (hvs_channel_readable(chan))
 242                 sk->sk_data_ready(sk);
 243 
 244         if (hv_get_bytes_to_write(&chan->outbound) > 0)
 245                 sk->sk_write_space(sk);
 246 }
 247 
 248 static void hvs_do_close_lock_held(struct vsock_sock *vsk,
 249                                    bool cancel_timeout)
 250 {
 251         struct sock *sk = sk_vsock(vsk);
 252 
 253         sock_set_flag(sk, SOCK_DONE);
 254         vsk->peer_shutdown = SHUTDOWN_MASK;
 255         if (vsock_stream_has_data(vsk) <= 0)
 256                 sk->sk_state = TCP_CLOSING;
 257         sk->sk_state_change(sk);
 258         if (vsk->close_work_scheduled &&
 259             (!cancel_timeout || cancel_delayed_work(&vsk->close_work))) {
 260                 vsk->close_work_scheduled = false;
 261                 vsock_remove_sock(vsk);
 262 
 263                 /* Release the reference taken while scheduling the timeout */
 264                 sock_put(sk);
 265         }
 266 }
 267 
 268 static void hvs_close_connection(struct vmbus_channel *chan)
 269 {
 270         struct sock *sk = get_per_channel_state(chan);
 271 
 272         lock_sock(sk);
 273         hvs_do_close_lock_held(vsock_sk(sk), true);
 274         release_sock(sk);
 275 
 276         /* Release the refcnt for the channel that's opened in
 277          * hvs_open_connection().
 278          */
 279         sock_put(sk);
 280 }
 281 
 282 static void hvs_open_connection(struct vmbus_channel *chan)
 283 {
 284         guid_t *if_instance, *if_type;
 285         unsigned char conn_from_host;
 286 
 287         struct sockaddr_vm addr;
 288         struct sock *sk, *new = NULL;
 289         struct vsock_sock *vnew = NULL;
 290         struct hvsock *hvs = NULL;
 291         struct hvsock *hvs_new = NULL;
 292         int rcvbuf;
 293         int ret;
 294         int sndbuf;
 295 
 296         if_type = &chan->offermsg.offer.if_type;
 297         if_instance = &chan->offermsg.offer.if_instance;
 298         conn_from_host = chan->offermsg.offer.u.pipe.user_def[0];
 299         if (!is_valid_srv_id(if_type))
 300                 return;
 301 
 302         hvs_addr_init(&addr, conn_from_host ? if_type : if_instance);
 303         sk = vsock_find_bound_socket(&addr);
 304         if (!sk)
 305                 return;
 306 
 307         lock_sock(sk);
 308         if ((conn_from_host && sk->sk_state != TCP_LISTEN) ||
 309             (!conn_from_host && sk->sk_state != TCP_SYN_SENT))
 310                 goto out;
 311 
 312         if (conn_from_host) {
 313                 if (sk->sk_ack_backlog >= sk->sk_max_ack_backlog)
 314                         goto out;
 315 
 316                 new = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL,
 317                                      sk->sk_type, 0);
 318                 if (!new)
 319                         goto out;
 320 
 321                 new->sk_state = TCP_SYN_SENT;
 322                 vnew = vsock_sk(new);
 323 
 324                 hvs_addr_init(&vnew->local_addr, if_type);
 325 
 326                 /* Remote peer is always the host */
 327                 vsock_addr_init(&vnew->remote_addr,
 328                                 VMADDR_CID_HOST, VMADDR_PORT_ANY);
 329                 vnew->remote_addr.svm_port = get_port_by_srv_id(if_instance);
 330                 hvs_new = vnew->trans;
 331                 hvs_new->chan = chan;
 332         } else {
 333                 hvs = vsock_sk(sk)->trans;
 334                 hvs->chan = chan;
 335         }
 336 
 337         set_channel_read_mode(chan, HV_CALL_DIRECT);
 338 
 339         /* Use the socket buffer sizes as hints for the VMBUS ring size. For
 340          * server side sockets, 'sk' is the parent socket and thus, this will
 341          * allow the child sockets to inherit the size from the parent. Keep
 342          * the mins to the default value and align to page size as per VMBUS
 343          * requirements.
 344          * For the max, the socket core library will limit the socket buffer
 345          * size that can be set by the user, but, since currently, the hv_sock
 346          * VMBUS ring buffer is physically contiguous allocation, restrict it
 347          * further.
 348          * Older versions of hv_sock host side code cannot handle bigger VMBUS
 349          * ring buffer size. Use the version number to limit the change to newer
 350          * versions.
 351          */
 352         if (vmbus_proto_version < VERSION_WIN10_V5) {
 353                 sndbuf = RINGBUFFER_HVS_SND_SIZE;
 354                 rcvbuf = RINGBUFFER_HVS_RCV_SIZE;
 355         } else {
 356                 sndbuf = max_t(int, sk->sk_sndbuf, RINGBUFFER_HVS_SND_SIZE);
 357                 sndbuf = min_t(int, sndbuf, RINGBUFFER_HVS_MAX_SIZE);
 358                 sndbuf = ALIGN(sndbuf, PAGE_SIZE);
 359                 rcvbuf = max_t(int, sk->sk_rcvbuf, RINGBUFFER_HVS_RCV_SIZE);
 360                 rcvbuf = min_t(int, rcvbuf, RINGBUFFER_HVS_MAX_SIZE);
 361                 rcvbuf = ALIGN(rcvbuf, PAGE_SIZE);
 362         }
 363 
 364         ret = vmbus_open(chan, sndbuf, rcvbuf, NULL, 0, hvs_channel_cb,
 365                          conn_from_host ? new : sk);
 366         if (ret != 0) {
 367                 if (conn_from_host) {
 368                         hvs_new->chan = NULL;
 369                         sock_put(new);
 370                 } else {
 371                         hvs->chan = NULL;
 372                 }
 373                 goto out;
 374         }
 375 
 376         set_per_channel_state(chan, conn_from_host ? new : sk);
 377 
 378         /* This reference will be dropped by hvs_close_connection(). */
 379         sock_hold(conn_from_host ? new : sk);
 380         vmbus_set_chn_rescind_callback(chan, hvs_close_connection);
 381 
 382         /* Set the pending send size to max packet size to always get
 383          * notifications from the host when there is enough writable space.
 384          * The host is optimized to send notifications only when the pending
 385          * size boundary is crossed, and not always.
 386          */
 387         hvs_set_channel_pending_send_size(chan);
 388 
 389         if (conn_from_host) {
 390                 new->sk_state = TCP_ESTABLISHED;
 391                 sk->sk_ack_backlog++;
 392 
 393                 hvs_addr_init(&vnew->local_addr, if_type);
 394                 hvs_new->vm_srv_id = *if_type;
 395                 hvs_new->host_srv_id = *if_instance;
 396 
 397                 vsock_insert_connected(vnew);
 398 
 399                 vsock_enqueue_accept(sk, new);
 400         } else {
 401                 sk->sk_state = TCP_ESTABLISHED;
 402                 sk->sk_socket->state = SS_CONNECTED;
 403 
 404                 vsock_insert_connected(vsock_sk(sk));
 405         }
 406 
 407         sk->sk_state_change(sk);
 408 
 409 out:
 410         /* Release refcnt obtained when we called vsock_find_bound_socket() */
 411         sock_put(sk);
 412 
 413         release_sock(sk);
 414 }
 415 
 416 static u32 hvs_get_local_cid(void)
 417 {
 418         return VMADDR_CID_ANY;
 419 }
 420 
 421 static int hvs_sock_init(struct vsock_sock *vsk, struct vsock_sock *psk)
 422 {
 423         struct hvsock *hvs;
 424         struct sock *sk = sk_vsock(vsk);
 425 
 426         hvs = kzalloc(sizeof(*hvs), GFP_KERNEL);
 427         if (!hvs)
 428                 return -ENOMEM;
 429 
 430         vsk->trans = hvs;
 431         hvs->vsk = vsk;
 432         sk->sk_sndbuf = RINGBUFFER_HVS_SND_SIZE;
 433         sk->sk_rcvbuf = RINGBUFFER_HVS_RCV_SIZE;
 434         return 0;
 435 }
 436 
 437 static int hvs_connect(struct vsock_sock *vsk)
 438 {
 439         union hvs_service_id vm, host;
 440         struct hvsock *h = vsk->trans;
 441 
 442         vm.srv_id = srv_id_template;
 443         vm.svm_port = vsk->local_addr.svm_port;
 444         h->vm_srv_id = vm.srv_id;
 445 
 446         host.srv_id = srv_id_template;
 447         host.svm_port = vsk->remote_addr.svm_port;
 448         h->host_srv_id = host.srv_id;
 449 
 450         return vmbus_send_tl_connect_request(&h->vm_srv_id, &h->host_srv_id);
 451 }
 452 
 453 static void hvs_shutdown_lock_held(struct hvsock *hvs, int mode)
 454 {
 455         struct vmpipe_proto_header hdr;
 456 
 457         if (hvs->fin_sent || !hvs->chan)
 458                 return;
 459 
 460         /* It can't fail: see hvs_channel_writable_bytes(). */
 461         (void)hvs_send_data(hvs->chan, (struct hvs_send_buf *)&hdr, 0);
 462         hvs->fin_sent = true;
 463 }
 464 
 465 static int hvs_shutdown(struct vsock_sock *vsk, int mode)
 466 {
 467         struct sock *sk = sk_vsock(vsk);
 468 
 469         if (!(mode & SEND_SHUTDOWN))
 470                 return 0;
 471 
 472         lock_sock(sk);
 473         hvs_shutdown_lock_held(vsk->trans, mode);
 474         release_sock(sk);
 475         return 0;
 476 }
 477 
 478 static void hvs_close_timeout(struct work_struct *work)
 479 {
 480         struct vsock_sock *vsk =
 481                 container_of(work, struct vsock_sock, close_work.work);
 482         struct sock *sk = sk_vsock(vsk);
 483 
 484         sock_hold(sk);
 485         lock_sock(sk);
 486         if (!sock_flag(sk, SOCK_DONE))
 487                 hvs_do_close_lock_held(vsk, false);
 488 
 489         vsk->close_work_scheduled = false;
 490         release_sock(sk);
 491         sock_put(sk);
 492 }
 493 
 494 /* Returns true, if it is safe to remove socket; false otherwise */
 495 static bool hvs_close_lock_held(struct vsock_sock *vsk)
 496 {
 497         struct sock *sk = sk_vsock(vsk);
 498 
 499         if (!(sk->sk_state == TCP_ESTABLISHED ||
 500               sk->sk_state == TCP_CLOSING))
 501                 return true;
 502 
 503         if ((sk->sk_shutdown & SHUTDOWN_MASK) != SHUTDOWN_MASK)
 504                 hvs_shutdown_lock_held(vsk->trans, SHUTDOWN_MASK);
 505 
 506         if (sock_flag(sk, SOCK_DONE))
 507                 return true;
 508 
 509         /* This reference will be dropped by the delayed close routine */
 510         sock_hold(sk);
 511         INIT_DELAYED_WORK(&vsk->close_work, hvs_close_timeout);
 512         vsk->close_work_scheduled = true;
 513         schedule_delayed_work(&vsk->close_work, HVS_CLOSE_TIMEOUT);
 514         return false;
 515 }
 516 
 517 static void hvs_release(struct vsock_sock *vsk)
 518 {
 519         struct sock *sk = sk_vsock(vsk);
 520         bool remove_sock;
 521 
 522         lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
 523         remove_sock = hvs_close_lock_held(vsk);
 524         release_sock(sk);
 525         if (remove_sock)
 526                 vsock_remove_sock(vsk);
 527 }
 528 
 529 static void hvs_destruct(struct vsock_sock *vsk)
 530 {
 531         struct hvsock *hvs = vsk->trans;
 532         struct vmbus_channel *chan = hvs->chan;
 533 
 534         if (chan)
 535                 vmbus_hvsock_device_unregister(chan);
 536 
 537         kfree(hvs);
 538 }
 539 
 540 static int hvs_dgram_bind(struct vsock_sock *vsk, struct sockaddr_vm *addr)
 541 {
 542         return -EOPNOTSUPP;
 543 }
 544 
 545 static int hvs_dgram_dequeue(struct vsock_sock *vsk, struct msghdr *msg,
 546                              size_t len, int flags)
 547 {
 548         return -EOPNOTSUPP;
 549 }
 550 
 551 static int hvs_dgram_enqueue(struct vsock_sock *vsk,
 552                              struct sockaddr_vm *remote, struct msghdr *msg,
 553                              size_t dgram_len)
 554 {
 555         return -EOPNOTSUPP;
 556 }
 557 
 558 static bool hvs_dgram_allow(u32 cid, u32 port)
 559 {
 560         return false;
 561 }
 562 
 563 static int hvs_update_recv_data(struct hvsock *hvs)
 564 {
 565         struct hvs_recv_buf *recv_buf;
 566         u32 payload_len;
 567 
 568         recv_buf = (struct hvs_recv_buf *)(hvs->recv_desc + 1);
 569         payload_len = recv_buf->hdr.data_size;
 570 
 571         if (payload_len > HVS_MTU_SIZE)
 572                 return -EIO;
 573 
 574         if (payload_len == 0)
 575                 hvs->vsk->peer_shutdown |= SEND_SHUTDOWN;
 576 
 577         hvs->recv_data_len = payload_len;
 578         hvs->recv_data_off = 0;
 579 
 580         return 0;
 581 }
 582 
 583 static ssize_t hvs_stream_dequeue(struct vsock_sock *vsk, struct msghdr *msg,
 584                                   size_t len, int flags)
 585 {
 586         struct hvsock *hvs = vsk->trans;
 587         bool need_refill = !hvs->recv_desc;
 588         struct hvs_recv_buf *recv_buf;
 589         u32 to_read;
 590         int ret;
 591 
 592         if (flags & MSG_PEEK)
 593                 return -EOPNOTSUPP;
 594 
 595         if (need_refill) {
 596                 hvs->recv_desc = hv_pkt_iter_first(hvs->chan);
 597                 ret = hvs_update_recv_data(hvs);
 598                 if (ret)
 599                         return ret;
 600         }
 601 
 602         recv_buf = (struct hvs_recv_buf *)(hvs->recv_desc + 1);
 603         to_read = min_t(u32, len, hvs->recv_data_len);
 604         ret = memcpy_to_msg(msg, recv_buf->data + hvs->recv_data_off, to_read);
 605         if (ret != 0)
 606                 return ret;
 607 
 608         hvs->recv_data_len -= to_read;
 609         if (hvs->recv_data_len == 0) {
 610                 hvs->recv_desc = hv_pkt_iter_next(hvs->chan, hvs->recv_desc);
 611                 if (hvs->recv_desc) {
 612                         ret = hvs_update_recv_data(hvs);
 613                         if (ret)
 614                                 return ret;
 615                 }
 616         } else {
 617                 hvs->recv_data_off += to_read;
 618         }
 619 
 620         return to_read;
 621 }
 622 
 623 static ssize_t hvs_stream_enqueue(struct vsock_sock *vsk, struct msghdr *msg,
 624                                   size_t len)
 625 {
 626         struct hvsock *hvs = vsk->trans;
 627         struct vmbus_channel *chan = hvs->chan;
 628         struct hvs_send_buf *send_buf;
 629         ssize_t to_write, max_writable;
 630         ssize_t ret = 0;
 631         ssize_t bytes_written = 0;
 632 
 633         BUILD_BUG_ON(sizeof(*send_buf) != PAGE_SIZE_4K);
 634 
 635         send_buf = kmalloc(sizeof(*send_buf), GFP_KERNEL);
 636         if (!send_buf)
 637                 return -ENOMEM;
 638 
 639         /* Reader(s) could be draining data from the channel as we write.
 640          * Maximize bandwidth, by iterating until the channel is found to be
 641          * full.
 642          */
 643         while (len) {
 644                 max_writable = hvs_channel_writable_bytes(chan);
 645                 if (!max_writable)
 646                         break;
 647                 to_write = min_t(ssize_t, len, max_writable);
 648                 to_write = min_t(ssize_t, to_write, HVS_SEND_BUF_SIZE);
 649                 /* memcpy_from_msg is safe for loop as it advances the offsets
 650                  * within the message iterator.
 651                  */
 652                 ret = memcpy_from_msg(send_buf->data, msg, to_write);
 653                 if (ret < 0)
 654                         goto out;
 655 
 656                 ret = hvs_send_data(hvs->chan, send_buf, to_write);
 657                 if (ret < 0)
 658                         goto out;
 659 
 660                 bytes_written += to_write;
 661                 len -= to_write;
 662         }
 663 out:
 664         /* If any data has been sent, return that */
 665         if (bytes_written)
 666                 ret = bytes_written;
 667         kfree(send_buf);
 668         return ret;
 669 }
 670 
 671 static s64 hvs_stream_has_data(struct vsock_sock *vsk)
 672 {
 673         struct hvsock *hvs = vsk->trans;
 674         s64 ret;
 675 
 676         if (hvs->recv_data_len > 0)
 677                 return 1;
 678 
 679         switch (hvs_channel_readable_payload(hvs->chan)) {
 680         case 1:
 681                 ret = 1;
 682                 break;
 683         case 0:
 684                 vsk->peer_shutdown |= SEND_SHUTDOWN;
 685                 ret = 0;
 686                 break;
 687         default: /* -1 */
 688                 ret = 0;
 689                 break;
 690         }
 691 
 692         return ret;
 693 }
 694 
 695 static s64 hvs_stream_has_space(struct vsock_sock *vsk)
 696 {
 697         struct hvsock *hvs = vsk->trans;
 698 
 699         return hvs_channel_writable_bytes(hvs->chan);
 700 }
 701 
 702 static u64 hvs_stream_rcvhiwat(struct vsock_sock *vsk)
 703 {
 704         return HVS_MTU_SIZE + 1;
 705 }
 706 
 707 static bool hvs_stream_is_active(struct vsock_sock *vsk)
 708 {
 709         struct hvsock *hvs = vsk->trans;
 710 
 711         return hvs->chan != NULL;
 712 }
 713 
 714 static bool hvs_stream_allow(u32 cid, u32 port)
 715 {
 716         if (cid == VMADDR_CID_HOST)
 717                 return true;
 718 
 719         return false;
 720 }
 721 
 722 static
 723 int hvs_notify_poll_in(struct vsock_sock *vsk, size_t target, bool *readable)
 724 {
 725         struct hvsock *hvs = vsk->trans;
 726 
 727         *readable = hvs_channel_readable(hvs->chan);
 728         return 0;
 729 }
 730 
 731 static
 732 int hvs_notify_poll_out(struct vsock_sock *vsk, size_t target, bool *writable)
 733 {
 734         *writable = hvs_stream_has_space(vsk) > 0;
 735 
 736         return 0;
 737 }
 738 
 739 static
 740 int hvs_notify_recv_init(struct vsock_sock *vsk, size_t target,
 741                          struct vsock_transport_recv_notify_data *d)
 742 {
 743         return 0;
 744 }
 745 
 746 static
 747 int hvs_notify_recv_pre_block(struct vsock_sock *vsk, size_t target,
 748                               struct vsock_transport_recv_notify_data *d)
 749 {
 750         return 0;
 751 }
 752 
 753 static
 754 int hvs_notify_recv_pre_dequeue(struct vsock_sock *vsk, size_t target,
 755                                 struct vsock_transport_recv_notify_data *d)
 756 {
 757         return 0;
 758 }
 759 
 760 static
 761 int hvs_notify_recv_post_dequeue(struct vsock_sock *vsk, size_t target,
 762                                  ssize_t copied, bool data_read,
 763                                  struct vsock_transport_recv_notify_data *d)
 764 {
 765         return 0;
 766 }
 767 
 768 static
 769 int hvs_notify_send_init(struct vsock_sock *vsk,
 770                          struct vsock_transport_send_notify_data *d)
 771 {
 772         return 0;
 773 }
 774 
 775 static
 776 int hvs_notify_send_pre_block(struct vsock_sock *vsk,
 777                               struct vsock_transport_send_notify_data *d)
 778 {
 779         return 0;
 780 }
 781 
 782 static
 783 int hvs_notify_send_pre_enqueue(struct vsock_sock *vsk,
 784                                 struct vsock_transport_send_notify_data *d)
 785 {
 786         return 0;
 787 }
 788 
 789 static
 790 int hvs_notify_send_post_enqueue(struct vsock_sock *vsk, ssize_t written,
 791                                  struct vsock_transport_send_notify_data *d)
 792 {
 793         return 0;
 794 }
 795 
 796 static void hvs_set_buffer_size(struct vsock_sock *vsk, u64 val)
 797 {
 798         /* Ignored. */
 799 }
 800 
 801 static void hvs_set_min_buffer_size(struct vsock_sock *vsk, u64 val)
 802 {
 803         /* Ignored. */
 804 }
 805 
 806 static void hvs_set_max_buffer_size(struct vsock_sock *vsk, u64 val)
 807 {
 808         /* Ignored. */
 809 }
 810 
 811 static u64 hvs_get_buffer_size(struct vsock_sock *vsk)
 812 {
 813         return -ENOPROTOOPT;
 814 }
 815 
 816 static u64 hvs_get_min_buffer_size(struct vsock_sock *vsk)
 817 {
 818         return -ENOPROTOOPT;
 819 }
 820 
 821 static u64 hvs_get_max_buffer_size(struct vsock_sock *vsk)
 822 {
 823         return -ENOPROTOOPT;
 824 }
 825 
 826 static struct vsock_transport hvs_transport = {
 827         .get_local_cid            = hvs_get_local_cid,
 828 
 829         .init                     = hvs_sock_init,
 830         .destruct                 = hvs_destruct,
 831         .release                  = hvs_release,
 832         .connect                  = hvs_connect,
 833         .shutdown                 = hvs_shutdown,
 834 
 835         .dgram_bind               = hvs_dgram_bind,
 836         .dgram_dequeue            = hvs_dgram_dequeue,
 837         .dgram_enqueue            = hvs_dgram_enqueue,
 838         .dgram_allow              = hvs_dgram_allow,
 839 
 840         .stream_dequeue           = hvs_stream_dequeue,
 841         .stream_enqueue           = hvs_stream_enqueue,
 842         .stream_has_data          = hvs_stream_has_data,
 843         .stream_has_space         = hvs_stream_has_space,
 844         .stream_rcvhiwat          = hvs_stream_rcvhiwat,
 845         .stream_is_active         = hvs_stream_is_active,
 846         .stream_allow             = hvs_stream_allow,
 847 
 848         .notify_poll_in           = hvs_notify_poll_in,
 849         .notify_poll_out          = hvs_notify_poll_out,
 850         .notify_recv_init         = hvs_notify_recv_init,
 851         .notify_recv_pre_block    = hvs_notify_recv_pre_block,
 852         .notify_recv_pre_dequeue  = hvs_notify_recv_pre_dequeue,
 853         .notify_recv_post_dequeue = hvs_notify_recv_post_dequeue,
 854         .notify_send_init         = hvs_notify_send_init,
 855         .notify_send_pre_block    = hvs_notify_send_pre_block,
 856         .notify_send_pre_enqueue  = hvs_notify_send_pre_enqueue,
 857         .notify_send_post_enqueue = hvs_notify_send_post_enqueue,
 858 
 859         .set_buffer_size          = hvs_set_buffer_size,
 860         .set_min_buffer_size      = hvs_set_min_buffer_size,
 861         .set_max_buffer_size      = hvs_set_max_buffer_size,
 862         .get_buffer_size          = hvs_get_buffer_size,
 863         .get_min_buffer_size      = hvs_get_min_buffer_size,
 864         .get_max_buffer_size      = hvs_get_max_buffer_size,
 865 };
 866 
 867 static int hvs_probe(struct hv_device *hdev,
 868                      const struct hv_vmbus_device_id *dev_id)
 869 {
 870         struct vmbus_channel *chan = hdev->channel;
 871 
 872         hvs_open_connection(chan);
 873 
 874         /* Always return success to suppress the unnecessary error message
 875          * in vmbus_probe(): on error the host will rescind the device in
 876          * 30 seconds and we can do cleanup at that time in
 877          * vmbus_onoffer_rescind().
 878          */
 879         return 0;
 880 }
 881 
 882 static int hvs_remove(struct hv_device *hdev)
 883 {
 884         struct vmbus_channel *chan = hdev->channel;
 885 
 886         vmbus_close(chan);
 887 
 888         return 0;
 889 }
 890 
 891 /* This isn't really used. See vmbus_match() and vmbus_probe() */
 892 static const struct hv_vmbus_device_id id_table[] = {
 893         {},
 894 };
 895 
 896 static struct hv_driver hvs_drv = {
 897         .name           = "hv_sock",
 898         .hvsock         = true,
 899         .id_table       = id_table,
 900         .probe          = hvs_probe,
 901         .remove         = hvs_remove,
 902 };
 903 
 904 static int __init hvs_init(void)
 905 {
 906         int ret;
 907 
 908         if (vmbus_proto_version < VERSION_WIN10)
 909                 return -ENODEV;
 910 
 911         ret = vmbus_driver_register(&hvs_drv);
 912         if (ret != 0)
 913                 return ret;
 914 
 915         ret = vsock_core_init(&hvs_transport);
 916         if (ret) {
 917                 vmbus_driver_unregister(&hvs_drv);
 918                 return ret;
 919         }
 920 
 921         return 0;
 922 }
 923 
 924 static void __exit hvs_exit(void)
 925 {
 926         vsock_core_exit();
 927         vmbus_driver_unregister(&hvs_drv);
 928 }
 929 
 930 module_init(hvs_init);
 931 module_exit(hvs_exit);
 932 
 933 MODULE_DESCRIPTION("Hyper-V Sockets");
 934 MODULE_VERSION("1.0.0");
 935 MODULE_LICENSE("GPL");
 936 MODULE_ALIAS_NETPROTO(PF_VSOCK);

/* [<][>][^][v][top][bottom][index][help] */