1 /*
2  * VMware VMCI Driver
3  *
4  * Copyright (C) 2012 VMware, Inc. All rights reserved.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License as published by the
8  * Free Software Foundation version 2 and no later version.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13  * for more details.
14  */
15 
16 #include <linux/vmw_vmci_defs.h>
17 #include <linux/vmw_vmci_api.h>
18 #include <linux/highmem.h>
19 #include <linux/kernel.h>
20 #include <linux/module.h>
21 #include <linux/sched.h>
22 #include <linux/cred.h>
23 #include <linux/slab.h>
24 
25 #include "vmci_queue_pair.h"
26 #include "vmci_datagram.h"
27 #include "vmci_doorbell.h"
28 #include "vmci_context.h"
29 #include "vmci_driver.h"
30 #include "vmci_event.h"
31 
32 /* Use a wide upper bound for the maximum contexts. */
33 #define VMCI_MAX_CONTEXTS 2000
34 
35 /*
36  * List of current VMCI contexts.  Contexts can be added by
37  * vmci_ctx_create() and removed via vmci_ctx_destroy().
38  * These, along with context lookup, are protected by the
39  * list structure's lock.
40  */
41 static struct {
42 	struct list_head head;
43 	spinlock_t lock; /* Spinlock for context list operations */
44 } ctx_list = {
45 	.head = LIST_HEAD_INIT(ctx_list.head),
46 	.lock = __SPIN_LOCK_UNLOCKED(ctx_list.lock),
47 };
48 
49 /* Used by contexts that did not set up notify flag pointers */
50 static bool ctx_dummy_notify;
51 
ctx_signal_notify(struct vmci_ctx * context)52 static void ctx_signal_notify(struct vmci_ctx *context)
53 {
54 	*context->notify = true;
55 }
56 
ctx_clear_notify(struct vmci_ctx * context)57 static void ctx_clear_notify(struct vmci_ctx *context)
58 {
59 	*context->notify = false;
60 }
61 
62 /*
63  * If nothing requires the attention of the guest, clears both
64  * notify flag and call.
65  */
ctx_clear_notify_call(struct vmci_ctx * context)66 static void ctx_clear_notify_call(struct vmci_ctx *context)
67 {
68 	if (context->pending_datagrams == 0 &&
69 	    vmci_handle_arr_get_size(context->pending_doorbell_array) == 0)
70 		ctx_clear_notify(context);
71 }
72 
73 /*
74  * Sets the context's notify flag iff datagrams are pending for this
75  * context.  Called from vmci_setup_notify().
76  */
vmci_ctx_check_signal_notify(struct vmci_ctx * context)77 void vmci_ctx_check_signal_notify(struct vmci_ctx *context)
78 {
79 	spin_lock(&context->lock);
80 	if (context->pending_datagrams)
81 		ctx_signal_notify(context);
82 	spin_unlock(&context->lock);
83 }
84 
85 /*
86  * Allocates and initializes a VMCI context.
87  */
vmci_ctx_create(u32 cid,u32 priv_flags,uintptr_t event_hnd,int user_version,const struct cred * cred)88 struct vmci_ctx *vmci_ctx_create(u32 cid, u32 priv_flags,
89 				 uintptr_t event_hnd,
90 				 int user_version,
91 				 const struct cred *cred)
92 {
93 	struct vmci_ctx *context;
94 	int error;
95 
96 	if (cid == VMCI_INVALID_ID) {
97 		pr_devel("Invalid context ID for VMCI context\n");
98 		error = -EINVAL;
99 		goto err_out;
100 	}
101 
102 	if (priv_flags & ~VMCI_PRIVILEGE_ALL_FLAGS) {
103 		pr_devel("Invalid flag (flags=0x%x) for VMCI context\n",
104 			 priv_flags);
105 		error = -EINVAL;
106 		goto err_out;
107 	}
108 
109 	if (user_version == 0) {
110 		pr_devel("Invalid suer_version %d\n", user_version);
111 		error = -EINVAL;
112 		goto err_out;
113 	}
114 
115 	context = kzalloc(sizeof(*context), GFP_KERNEL);
116 	if (!context) {
117 		pr_warn("Failed to allocate memory for VMCI context\n");
118 		error = -EINVAL;
119 		goto err_out;
120 	}
121 
122 	kref_init(&context->kref);
123 	spin_lock_init(&context->lock);
124 	INIT_LIST_HEAD(&context->list_item);
125 	INIT_LIST_HEAD(&context->datagram_queue);
126 	INIT_LIST_HEAD(&context->notifier_list);
127 
128 	/* Initialize host-specific VMCI context. */
129 	init_waitqueue_head(&context->host_context.wait_queue);
130 
131 	context->queue_pair_array =
132 		vmci_handle_arr_create(0, VMCI_MAX_GUEST_QP_COUNT);
133 	if (!context->queue_pair_array) {
134 		error = -ENOMEM;
135 		goto err_free_ctx;
136 	}
137 
138 	context->doorbell_array =
139 		vmci_handle_arr_create(0, VMCI_MAX_GUEST_DOORBELL_COUNT);
140 	if (!context->doorbell_array) {
141 		error = -ENOMEM;
142 		goto err_free_qp_array;
143 	}
144 
145 	context->pending_doorbell_array =
146 		vmci_handle_arr_create(0, VMCI_MAX_GUEST_DOORBELL_COUNT);
147 	if (!context->pending_doorbell_array) {
148 		error = -ENOMEM;
149 		goto err_free_db_array;
150 	}
151 
152 	context->user_version = user_version;
153 
154 	context->priv_flags = priv_flags;
155 
156 	if (cred)
157 		context->cred = get_cred(cred);
158 
159 	context->notify = &ctx_dummy_notify;
160 	context->notify_page = NULL;
161 
162 	/*
163 	 * If we collide with an existing context we generate a new
164 	 * and use it instead. The VMX will determine if regeneration
165 	 * is okay. Since there isn't 4B - 16 VMs running on a given
166 	 * host, the below loop will terminate.
167 	 */
168 	spin_lock(&ctx_list.lock);
169 
170 	while (vmci_ctx_exists(cid)) {
171 		/* We reserve the lowest 16 ids for fixed contexts. */
172 		cid = max(cid, VMCI_RESERVED_CID_LIMIT - 1) + 1;
173 		if (cid == VMCI_INVALID_ID)
174 			cid = VMCI_RESERVED_CID_LIMIT;
175 	}
176 	context->cid = cid;
177 
178 	list_add_tail_rcu(&context->list_item, &ctx_list.head);
179 	spin_unlock(&ctx_list.lock);
180 
181 	return context;
182 
183  err_free_db_array:
184 	vmci_handle_arr_destroy(context->doorbell_array);
185  err_free_qp_array:
186 	vmci_handle_arr_destroy(context->queue_pair_array);
187  err_free_ctx:
188 	kfree(context);
189  err_out:
190 	return ERR_PTR(error);
191 }
192 
193 /*
194  * Destroy VMCI context.
195  */
vmci_ctx_destroy(struct vmci_ctx * context)196 void vmci_ctx_destroy(struct vmci_ctx *context)
197 {
198 	spin_lock(&ctx_list.lock);
199 	list_del_rcu(&context->list_item);
200 	spin_unlock(&ctx_list.lock);
201 	synchronize_rcu();
202 
203 	vmci_ctx_put(context);
204 }
205 
206 /*
207  * Fire notification for all contexts interested in given cid.
208  */
ctx_fire_notification(u32 context_id,u32 priv_flags)209 static int ctx_fire_notification(u32 context_id, u32 priv_flags)
210 {
211 	u32 i, array_size;
212 	struct vmci_ctx *sub_ctx;
213 	struct vmci_handle_arr *subscriber_array;
214 	struct vmci_handle context_handle =
215 		vmci_make_handle(context_id, VMCI_EVENT_HANDLER);
216 
217 	/*
218 	 * We create an array to hold the subscribers we find when
219 	 * scanning through all contexts.
220 	 */
221 	subscriber_array = vmci_handle_arr_create(0, VMCI_MAX_CONTEXTS);
222 	if (subscriber_array == NULL)
223 		return VMCI_ERROR_NO_MEM;
224 
225 	/*
226 	 * Scan all contexts to find who is interested in being
227 	 * notified about given contextID.
228 	 */
229 	rcu_read_lock();
230 	list_for_each_entry_rcu(sub_ctx, &ctx_list.head, list_item) {
231 		struct vmci_handle_list *node;
232 
233 		/*
234 		 * We only deliver notifications of the removal of
235 		 * contexts, if the two contexts are allowed to
236 		 * interact.
237 		 */
238 		if (vmci_deny_interaction(priv_flags, sub_ctx->priv_flags))
239 			continue;
240 
241 		list_for_each_entry_rcu(node, &sub_ctx->notifier_list, node) {
242 			if (!vmci_handle_is_equal(node->handle, context_handle))
243 				continue;
244 
245 			vmci_handle_arr_append_entry(&subscriber_array,
246 					vmci_make_handle(sub_ctx->cid,
247 							 VMCI_EVENT_HANDLER));
248 		}
249 	}
250 	rcu_read_unlock();
251 
252 	/* Fire event to all subscribers. */
253 	array_size = vmci_handle_arr_get_size(subscriber_array);
254 	for (i = 0; i < array_size; i++) {
255 		int result;
256 		struct vmci_event_ctx ev;
257 
258 		ev.msg.hdr.dst = vmci_handle_arr_get_entry(subscriber_array, i);
259 		ev.msg.hdr.src = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
260 						  VMCI_CONTEXT_RESOURCE_ID);
261 		ev.msg.hdr.payload_size = sizeof(ev) - sizeof(ev.msg.hdr);
262 		ev.msg.event_data.event = VMCI_EVENT_CTX_REMOVED;
263 		ev.payload.context_id = context_id;
264 
265 		result = vmci_datagram_dispatch(VMCI_HYPERVISOR_CONTEXT_ID,
266 						&ev.msg.hdr, false);
267 		if (result < VMCI_SUCCESS) {
268 			pr_devel("Failed to enqueue event datagram (type=%d) for context (ID=0x%x)\n",
269 				 ev.msg.event_data.event,
270 				 ev.msg.hdr.dst.context);
271 			/* We continue to enqueue on next subscriber. */
272 		}
273 	}
274 	vmci_handle_arr_destroy(subscriber_array);
275 
276 	return VMCI_SUCCESS;
277 }
278 
279 /*
280  * Returns the current number of pending datagrams. The call may
281  * also serve as a synchronization point for the datagram queue,
282  * as no enqueue operations can occur concurrently.
283  */
vmci_ctx_pending_datagrams(u32 cid,u32 * pending)284 int vmci_ctx_pending_datagrams(u32 cid, u32 *pending)
285 {
286 	struct vmci_ctx *context;
287 
288 	context = vmci_ctx_get(cid);
289 	if (context == NULL)
290 		return VMCI_ERROR_INVALID_ARGS;
291 
292 	spin_lock(&context->lock);
293 	if (pending)
294 		*pending = context->pending_datagrams;
295 	spin_unlock(&context->lock);
296 	vmci_ctx_put(context);
297 
298 	return VMCI_SUCCESS;
299 }
300 
301 /*
302  * Queues a VMCI datagram for the appropriate target VM context.
303  */
vmci_ctx_enqueue_datagram(u32 cid,struct vmci_datagram * dg)304 int vmci_ctx_enqueue_datagram(u32 cid, struct vmci_datagram *dg)
305 {
306 	struct vmci_datagram_queue_entry *dq_entry;
307 	struct vmci_ctx *context;
308 	struct vmci_handle dg_src;
309 	size_t vmci_dg_size;
310 
311 	vmci_dg_size = VMCI_DG_SIZE(dg);
312 	if (vmci_dg_size > VMCI_MAX_DG_SIZE) {
313 		pr_devel("Datagram too large (bytes=%zu)\n", vmci_dg_size);
314 		return VMCI_ERROR_INVALID_ARGS;
315 	}
316 
317 	/* Get the target VM's VMCI context. */
318 	context = vmci_ctx_get(cid);
319 	if (!context) {
320 		pr_devel("Invalid context (ID=0x%x)\n", cid);
321 		return VMCI_ERROR_INVALID_ARGS;
322 	}
323 
324 	/* Allocate guest call entry and add it to the target VM's queue. */
325 	dq_entry = kmalloc(sizeof(*dq_entry), GFP_KERNEL);
326 	if (dq_entry == NULL) {
327 		pr_warn("Failed to allocate memory for datagram\n");
328 		vmci_ctx_put(context);
329 		return VMCI_ERROR_NO_MEM;
330 	}
331 	dq_entry->dg = dg;
332 	dq_entry->dg_size = vmci_dg_size;
333 	dg_src = dg->src;
334 	INIT_LIST_HEAD(&dq_entry->list_item);
335 
336 	spin_lock(&context->lock);
337 
338 	/*
339 	 * We put a higher limit on datagrams from the hypervisor.  If
340 	 * the pending datagram is not from hypervisor, then we check
341 	 * if enqueueing it would exceed the
342 	 * VMCI_MAX_DATAGRAM_QUEUE_SIZE limit on the destination.  If
343 	 * the pending datagram is from hypervisor, we allow it to be
344 	 * queued at the destination side provided we don't reach the
345 	 * VMCI_MAX_DATAGRAM_AND_EVENT_QUEUE_SIZE limit.
346 	 */
347 	if (context->datagram_queue_size + vmci_dg_size >=
348 	    VMCI_MAX_DATAGRAM_QUEUE_SIZE &&
349 	    (!vmci_handle_is_equal(dg_src,
350 				vmci_make_handle
351 				(VMCI_HYPERVISOR_CONTEXT_ID,
352 				 VMCI_CONTEXT_RESOURCE_ID)) ||
353 	     context->datagram_queue_size + vmci_dg_size >=
354 	     VMCI_MAX_DATAGRAM_AND_EVENT_QUEUE_SIZE)) {
355 		spin_unlock(&context->lock);
356 		vmci_ctx_put(context);
357 		kfree(dq_entry);
358 		pr_devel("Context (ID=0x%x) receive queue is full\n", cid);
359 		return VMCI_ERROR_NO_RESOURCES;
360 	}
361 
362 	list_add(&dq_entry->list_item, &context->datagram_queue);
363 	context->pending_datagrams++;
364 	context->datagram_queue_size += vmci_dg_size;
365 	ctx_signal_notify(context);
366 	wake_up(&context->host_context.wait_queue);
367 	spin_unlock(&context->lock);
368 	vmci_ctx_put(context);
369 
370 	return vmci_dg_size;
371 }
372 
373 /*
374  * Verifies whether a context with the specified context ID exists.
375  * FIXME: utility is dubious as no decisions can be reliably made
376  * using this data as context can appear and disappear at any time.
377  */
vmci_ctx_exists(u32 cid)378 bool vmci_ctx_exists(u32 cid)
379 {
380 	struct vmci_ctx *context;
381 	bool exists = false;
382 
383 	rcu_read_lock();
384 
385 	list_for_each_entry_rcu(context, &ctx_list.head, list_item) {
386 		if (context->cid == cid) {
387 			exists = true;
388 			break;
389 		}
390 	}
391 
392 	rcu_read_unlock();
393 	return exists;
394 }
395 
396 /*
397  * Retrieves VMCI context corresponding to the given cid.
398  */
vmci_ctx_get(u32 cid)399 struct vmci_ctx *vmci_ctx_get(u32 cid)
400 {
401 	struct vmci_ctx *c, *context = NULL;
402 
403 	if (cid == VMCI_INVALID_ID)
404 		return NULL;
405 
406 	rcu_read_lock();
407 	list_for_each_entry_rcu(c, &ctx_list.head, list_item) {
408 		if (c->cid == cid) {
409 			/*
410 			 * The context owner drops its own reference to the
411 			 * context only after removing it from the list and
412 			 * waiting for RCU grace period to expire. This
413 			 * means that we are not about to increase the
414 			 * reference count of something that is in the
415 			 * process of being destroyed.
416 			 */
417 			context = c;
418 			kref_get(&context->kref);
419 			break;
420 		}
421 	}
422 	rcu_read_unlock();
423 
424 	return context;
425 }
426 
427 /*
428  * Deallocates all parts of a context data structure. This
429  * function doesn't lock the context, because it assumes that
430  * the caller was holding the last reference to context.
431  */
ctx_free_ctx(struct kref * kref)432 static void ctx_free_ctx(struct kref *kref)
433 {
434 	struct vmci_ctx *context = container_of(kref, struct vmci_ctx, kref);
435 	struct vmci_datagram_queue_entry *dq_entry, *dq_entry_tmp;
436 	struct vmci_handle temp_handle;
437 	struct vmci_handle_list *notifier, *tmp;
438 
439 	/*
440 	 * Fire event to all contexts interested in knowing this
441 	 * context is dying.
442 	 */
443 	ctx_fire_notification(context->cid, context->priv_flags);
444 
445 	/*
446 	 * Cleanup all queue pair resources attached to context.  If
447 	 * the VM dies without cleaning up, this code will make sure
448 	 * that no resources are leaked.
449 	 */
450 	temp_handle = vmci_handle_arr_get_entry(context->queue_pair_array, 0);
451 	while (!vmci_handle_is_equal(temp_handle, VMCI_INVALID_HANDLE)) {
452 		if (vmci_qp_broker_detach(temp_handle,
453 					  context) < VMCI_SUCCESS) {
454 			/*
455 			 * When vmci_qp_broker_detach() succeeds it
456 			 * removes the handle from the array.  If
457 			 * detach fails, we must remove the handle
458 			 * ourselves.
459 			 */
460 			vmci_handle_arr_remove_entry(context->queue_pair_array,
461 						     temp_handle);
462 		}
463 		temp_handle =
464 		    vmci_handle_arr_get_entry(context->queue_pair_array, 0);
465 	}
466 
467 	/*
468 	 * It is fine to destroy this without locking the callQueue, as
469 	 * this is the only thread having a reference to the context.
470 	 */
471 	list_for_each_entry_safe(dq_entry, dq_entry_tmp,
472 				 &context->datagram_queue, list_item) {
473 		WARN_ON(dq_entry->dg_size != VMCI_DG_SIZE(dq_entry->dg));
474 		list_del(&dq_entry->list_item);
475 		kfree(dq_entry->dg);
476 		kfree(dq_entry);
477 	}
478 
479 	list_for_each_entry_safe(notifier, tmp,
480 				 &context->notifier_list, node) {
481 		list_del(&notifier->node);
482 		kfree(notifier);
483 	}
484 
485 	vmci_handle_arr_destroy(context->queue_pair_array);
486 	vmci_handle_arr_destroy(context->doorbell_array);
487 	vmci_handle_arr_destroy(context->pending_doorbell_array);
488 	vmci_ctx_unset_notify(context);
489 	if (context->cred)
490 		put_cred(context->cred);
491 	kfree(context);
492 }
493 
494 /*
495  * Drops reference to VMCI context. If this is the last reference to
496  * the context it will be deallocated. A context is created with
497  * a reference count of one, and on destroy, it is removed from
498  * the context list before its reference count is decremented. Thus,
499  * if we reach zero, we are sure that nobody else are about to increment
500  * it (they need the entry in the context list for that), and so there
501  * is no need for locking.
502  */
vmci_ctx_put(struct vmci_ctx * context)503 void vmci_ctx_put(struct vmci_ctx *context)
504 {
505 	kref_put(&context->kref, ctx_free_ctx);
506 }
507 
508 /*
509  * Dequeues the next datagram and returns it to caller.
510  * The caller passes in a pointer to the max size datagram
511  * it can handle and the datagram is only unqueued if the
512  * size is less than max_size. If larger max_size is set to
513  * the size of the datagram to give the caller a chance to
514  * set up a larger buffer for the guestcall.
515  */
vmci_ctx_dequeue_datagram(struct vmci_ctx * context,size_t * max_size,struct vmci_datagram ** dg)516 int vmci_ctx_dequeue_datagram(struct vmci_ctx *context,
517 			      size_t *max_size,
518 			      struct vmci_datagram **dg)
519 {
520 	struct vmci_datagram_queue_entry *dq_entry;
521 	struct list_head *list_item;
522 	int rv;
523 
524 	/* Dequeue the next datagram entry. */
525 	spin_lock(&context->lock);
526 	if (context->pending_datagrams == 0) {
527 		ctx_clear_notify_call(context);
528 		spin_unlock(&context->lock);
529 		pr_devel("No datagrams pending\n");
530 		return VMCI_ERROR_NO_MORE_DATAGRAMS;
531 	}
532 
533 	list_item = context->datagram_queue.next;
534 
535 	dq_entry =
536 	    list_entry(list_item, struct vmci_datagram_queue_entry, list_item);
537 
538 	/* Check size of caller's buffer. */
539 	if (*max_size < dq_entry->dg_size) {
540 		*max_size = dq_entry->dg_size;
541 		spin_unlock(&context->lock);
542 		pr_devel("Caller's buffer should be at least (size=%u bytes)\n",
543 			 (u32) *max_size);
544 		return VMCI_ERROR_NO_MEM;
545 	}
546 
547 	list_del(list_item);
548 	context->pending_datagrams--;
549 	context->datagram_queue_size -= dq_entry->dg_size;
550 	if (context->pending_datagrams == 0) {
551 		ctx_clear_notify_call(context);
552 		rv = VMCI_SUCCESS;
553 	} else {
554 		/*
555 		 * Return the size of the next datagram.
556 		 */
557 		struct vmci_datagram_queue_entry *next_entry;
558 
559 		list_item = context->datagram_queue.next;
560 		next_entry =
561 		    list_entry(list_item, struct vmci_datagram_queue_entry,
562 			       list_item);
563 
564 		/*
565 		 * The following size_t -> int truncation is fine as
566 		 * the maximum size of a (routable) datagram is 68KB.
567 		 */
568 		rv = (int)next_entry->dg_size;
569 	}
570 	spin_unlock(&context->lock);
571 
572 	/* Caller must free datagram. */
573 	*dg = dq_entry->dg;
574 	dq_entry->dg = NULL;
575 	kfree(dq_entry);
576 
577 	return rv;
578 }
579 
580 /*
581  * Reverts actions set up by vmci_setup_notify().  Unmaps and unlocks the
582  * page mapped/locked by vmci_setup_notify().
583  */
vmci_ctx_unset_notify(struct vmci_ctx * context)584 void vmci_ctx_unset_notify(struct vmci_ctx *context)
585 {
586 	struct page *notify_page;
587 
588 	spin_lock(&context->lock);
589 
590 	notify_page = context->notify_page;
591 	context->notify = &ctx_dummy_notify;
592 	context->notify_page = NULL;
593 
594 	spin_unlock(&context->lock);
595 
596 	if (notify_page) {
597 		kunmap(notify_page);
598 		put_page(notify_page);
599 	}
600 }
601 
602 /*
603  * Add remote_cid to list of contexts current contexts wants
604  * notifications from/about.
605  */
vmci_ctx_add_notification(u32 context_id,u32 remote_cid)606 int vmci_ctx_add_notification(u32 context_id, u32 remote_cid)
607 {
608 	struct vmci_ctx *context;
609 	struct vmci_handle_list *notifier, *n;
610 	int result;
611 	bool exists = false;
612 
613 	context = vmci_ctx_get(context_id);
614 	if (!context)
615 		return VMCI_ERROR_NOT_FOUND;
616 
617 	if (VMCI_CONTEXT_IS_VM(context_id) && VMCI_CONTEXT_IS_VM(remote_cid)) {
618 		pr_devel("Context removed notifications for other VMs not supported (src=0x%x, remote=0x%x)\n",
619 			 context_id, remote_cid);
620 		result = VMCI_ERROR_DST_UNREACHABLE;
621 		goto out;
622 	}
623 
624 	if (context->priv_flags & VMCI_PRIVILEGE_FLAG_RESTRICTED) {
625 		result = VMCI_ERROR_NO_ACCESS;
626 		goto out;
627 	}
628 
629 	notifier = kmalloc(sizeof(struct vmci_handle_list), GFP_KERNEL);
630 	if (!notifier) {
631 		result = VMCI_ERROR_NO_MEM;
632 		goto out;
633 	}
634 
635 	INIT_LIST_HEAD(&notifier->node);
636 	notifier->handle = vmci_make_handle(remote_cid, VMCI_EVENT_HANDLER);
637 
638 	spin_lock(&context->lock);
639 
640 	if (context->n_notifiers < VMCI_MAX_CONTEXTS) {
641 		list_for_each_entry(n, &context->notifier_list, node) {
642 			if (vmci_handle_is_equal(n->handle, notifier->handle)) {
643 				exists = true;
644 				break;
645 			}
646 		}
647 
648 		if (exists) {
649 			kfree(notifier);
650 			result = VMCI_ERROR_ALREADY_EXISTS;
651 		} else {
652 			list_add_tail_rcu(&notifier->node,
653 					  &context->notifier_list);
654 			context->n_notifiers++;
655 			result = VMCI_SUCCESS;
656 		}
657 	} else {
658 		kfree(notifier);
659 		result = VMCI_ERROR_NO_MEM;
660 	}
661 
662 	spin_unlock(&context->lock);
663 
664  out:
665 	vmci_ctx_put(context);
666 	return result;
667 }
668 
669 /*
670  * Remove remote_cid from current context's list of contexts it is
671  * interested in getting notifications from/about.
672  */
vmci_ctx_remove_notification(u32 context_id,u32 remote_cid)673 int vmci_ctx_remove_notification(u32 context_id, u32 remote_cid)
674 {
675 	struct vmci_ctx *context;
676 	struct vmci_handle_list *notifier, *tmp;
677 	struct vmci_handle handle;
678 	bool found = false;
679 
680 	context = vmci_ctx_get(context_id);
681 	if (!context)
682 		return VMCI_ERROR_NOT_FOUND;
683 
684 	handle = vmci_make_handle(remote_cid, VMCI_EVENT_HANDLER);
685 
686 	spin_lock(&context->lock);
687 	list_for_each_entry_safe(notifier, tmp,
688 				 &context->notifier_list, node) {
689 		if (vmci_handle_is_equal(notifier->handle, handle)) {
690 			list_del_rcu(&notifier->node);
691 			context->n_notifiers--;
692 			found = true;
693 			break;
694 		}
695 	}
696 	spin_unlock(&context->lock);
697 
698 	if (found) {
699 		synchronize_rcu();
700 		kfree(notifier);
701 	}
702 
703 	vmci_ctx_put(context);
704 
705 	return found ? VMCI_SUCCESS : VMCI_ERROR_NOT_FOUND;
706 }
707 
vmci_ctx_get_chkpt_notifiers(struct vmci_ctx * context,u32 * buf_size,void ** pbuf)708 static int vmci_ctx_get_chkpt_notifiers(struct vmci_ctx *context,
709 					u32 *buf_size, void **pbuf)
710 {
711 	u32 *notifiers;
712 	size_t data_size;
713 	struct vmci_handle_list *entry;
714 	int i = 0;
715 
716 	if (context->n_notifiers == 0) {
717 		*buf_size = 0;
718 		*pbuf = NULL;
719 		return VMCI_SUCCESS;
720 	}
721 
722 	data_size = context->n_notifiers * sizeof(*notifiers);
723 	if (*buf_size < data_size) {
724 		*buf_size = data_size;
725 		return VMCI_ERROR_MORE_DATA;
726 	}
727 
728 	notifiers = kmalloc(data_size, GFP_ATOMIC); /* FIXME: want GFP_KERNEL */
729 	if (!notifiers)
730 		return VMCI_ERROR_NO_MEM;
731 
732 	list_for_each_entry(entry, &context->notifier_list, node)
733 		notifiers[i++] = entry->handle.context;
734 
735 	*buf_size = data_size;
736 	*pbuf = notifiers;
737 	return VMCI_SUCCESS;
738 }
739 
vmci_ctx_get_chkpt_doorbells(struct vmci_ctx * context,u32 * buf_size,void ** pbuf)740 static int vmci_ctx_get_chkpt_doorbells(struct vmci_ctx *context,
741 					u32 *buf_size, void **pbuf)
742 {
743 	struct dbell_cpt_state *dbells;
744 	u32 i, n_doorbells;
745 
746 	n_doorbells = vmci_handle_arr_get_size(context->doorbell_array);
747 	if (n_doorbells > 0) {
748 		size_t data_size = n_doorbells * sizeof(*dbells);
749 		if (*buf_size < data_size) {
750 			*buf_size = data_size;
751 			return VMCI_ERROR_MORE_DATA;
752 		}
753 
754 		dbells = kzalloc(data_size, GFP_ATOMIC);
755 		if (!dbells)
756 			return VMCI_ERROR_NO_MEM;
757 
758 		for (i = 0; i < n_doorbells; i++)
759 			dbells[i].handle = vmci_handle_arr_get_entry(
760 						context->doorbell_array, i);
761 
762 		*buf_size = data_size;
763 		*pbuf = dbells;
764 	} else {
765 		*buf_size = 0;
766 		*pbuf = NULL;
767 	}
768 
769 	return VMCI_SUCCESS;
770 }
771 
772 /*
773  * Get current context's checkpoint state of given type.
774  */
vmci_ctx_get_chkpt_state(u32 context_id,u32 cpt_type,u32 * buf_size,void ** pbuf)775 int vmci_ctx_get_chkpt_state(u32 context_id,
776 			     u32 cpt_type,
777 			     u32 *buf_size,
778 			     void **pbuf)
779 {
780 	struct vmci_ctx *context;
781 	int result;
782 
783 	context = vmci_ctx_get(context_id);
784 	if (!context)
785 		return VMCI_ERROR_NOT_FOUND;
786 
787 	spin_lock(&context->lock);
788 
789 	switch (cpt_type) {
790 	case VMCI_NOTIFICATION_CPT_STATE:
791 		result = vmci_ctx_get_chkpt_notifiers(context, buf_size, pbuf);
792 		break;
793 
794 	case VMCI_WELLKNOWN_CPT_STATE:
795 		/*
796 		 * For compatibility with VMX'en with VM to VM communication, we
797 		 * always return zero wellknown handles.
798 		 */
799 
800 		*buf_size = 0;
801 		*pbuf = NULL;
802 		result = VMCI_SUCCESS;
803 		break;
804 
805 	case VMCI_DOORBELL_CPT_STATE:
806 		result = vmci_ctx_get_chkpt_doorbells(context, buf_size, pbuf);
807 		break;
808 
809 	default:
810 		pr_devel("Invalid cpt state (type=%d)\n", cpt_type);
811 		result = VMCI_ERROR_INVALID_ARGS;
812 		break;
813 	}
814 
815 	spin_unlock(&context->lock);
816 	vmci_ctx_put(context);
817 
818 	return result;
819 }
820 
821 /*
822  * Set current context's checkpoint state of given type.
823  */
vmci_ctx_set_chkpt_state(u32 context_id,u32 cpt_type,u32 buf_size,void * cpt_buf)824 int vmci_ctx_set_chkpt_state(u32 context_id,
825 			     u32 cpt_type,
826 			     u32 buf_size,
827 			     void *cpt_buf)
828 {
829 	u32 i;
830 	u32 current_id;
831 	int result = VMCI_SUCCESS;
832 	u32 num_ids = buf_size / sizeof(u32);
833 
834 	if (cpt_type == VMCI_WELLKNOWN_CPT_STATE && num_ids > 0) {
835 		/*
836 		 * We would end up here if VMX with VM to VM communication
837 		 * attempts to restore a checkpoint with wellknown handles.
838 		 */
839 		pr_warn("Attempt to restore checkpoint with obsolete wellknown handles\n");
840 		return VMCI_ERROR_OBSOLETE;
841 	}
842 
843 	if (cpt_type != VMCI_NOTIFICATION_CPT_STATE) {
844 		pr_devel("Invalid cpt state (type=%d)\n", cpt_type);
845 		return VMCI_ERROR_INVALID_ARGS;
846 	}
847 
848 	for (i = 0; i < num_ids && result == VMCI_SUCCESS; i++) {
849 		current_id = ((u32 *)cpt_buf)[i];
850 		result = vmci_ctx_add_notification(context_id, current_id);
851 		if (result != VMCI_SUCCESS)
852 			break;
853 	}
854 	if (result != VMCI_SUCCESS)
855 		pr_devel("Failed to set cpt state (type=%d) (error=%d)\n",
856 			 cpt_type, result);
857 
858 	return result;
859 }
860 
861 /*
862  * Retrieves the specified context's pending notifications in the
863  * form of a handle array. The handle arrays returned are the
864  * actual data - not a copy and should not be modified by the
865  * caller. They must be released using
866  * vmci_ctx_rcv_notifications_release.
867  */
vmci_ctx_rcv_notifications_get(u32 context_id,struct vmci_handle_arr ** db_handle_array,struct vmci_handle_arr ** qp_handle_array)868 int vmci_ctx_rcv_notifications_get(u32 context_id,
869 				   struct vmci_handle_arr **db_handle_array,
870 				   struct vmci_handle_arr **qp_handle_array)
871 {
872 	struct vmci_ctx *context;
873 	int result = VMCI_SUCCESS;
874 
875 	context = vmci_ctx_get(context_id);
876 	if (context == NULL)
877 		return VMCI_ERROR_NOT_FOUND;
878 
879 	spin_lock(&context->lock);
880 
881 	*db_handle_array = context->pending_doorbell_array;
882 	context->pending_doorbell_array =
883 		vmci_handle_arr_create(0, VMCI_MAX_GUEST_DOORBELL_COUNT);
884 	if (!context->pending_doorbell_array) {
885 		context->pending_doorbell_array = *db_handle_array;
886 		*db_handle_array = NULL;
887 		result = VMCI_ERROR_NO_MEM;
888 	}
889 	*qp_handle_array = NULL;
890 
891 	spin_unlock(&context->lock);
892 	vmci_ctx_put(context);
893 
894 	return result;
895 }
896 
897 /*
898  * Releases handle arrays with pending notifications previously
899  * retrieved using vmci_ctx_rcv_notifications_get. If the
900  * notifications were not successfully handed over to the guest,
901  * success must be false.
902  */
vmci_ctx_rcv_notifications_release(u32 context_id,struct vmci_handle_arr * db_handle_array,struct vmci_handle_arr * qp_handle_array,bool success)903 void vmci_ctx_rcv_notifications_release(u32 context_id,
904 					struct vmci_handle_arr *db_handle_array,
905 					struct vmci_handle_arr *qp_handle_array,
906 					bool success)
907 {
908 	struct vmci_ctx *context = vmci_ctx_get(context_id);
909 
910 	spin_lock(&context->lock);
911 	if (!success) {
912 		struct vmci_handle handle;
913 
914 		/*
915 		 * New notifications may have been added while we were not
916 		 * holding the context lock, so we transfer any new pending
917 		 * doorbell notifications to the old array, and reinstate the
918 		 * old array.
919 		 */
920 
921 		handle = vmci_handle_arr_remove_tail(
922 					context->pending_doorbell_array);
923 		while (!vmci_handle_is_invalid(handle)) {
924 			if (!vmci_handle_arr_has_entry(db_handle_array,
925 						       handle)) {
926 				vmci_handle_arr_append_entry(
927 						&db_handle_array, handle);
928 			}
929 			handle = vmci_handle_arr_remove_tail(
930 					context->pending_doorbell_array);
931 		}
932 		vmci_handle_arr_destroy(context->pending_doorbell_array);
933 		context->pending_doorbell_array = db_handle_array;
934 		db_handle_array = NULL;
935 	} else {
936 		ctx_clear_notify_call(context);
937 	}
938 	spin_unlock(&context->lock);
939 	vmci_ctx_put(context);
940 
941 	if (db_handle_array)
942 		vmci_handle_arr_destroy(db_handle_array);
943 
944 	if (qp_handle_array)
945 		vmci_handle_arr_destroy(qp_handle_array);
946 }
947 
948 /*
949  * Registers that a new doorbell handle has been allocated by the
950  * context. Only doorbell handles registered can be notified.
951  */
vmci_ctx_dbell_create(u32 context_id,struct vmci_handle handle)952 int vmci_ctx_dbell_create(u32 context_id, struct vmci_handle handle)
953 {
954 	struct vmci_ctx *context;
955 	int result;
956 
957 	if (context_id == VMCI_INVALID_ID || vmci_handle_is_invalid(handle))
958 		return VMCI_ERROR_INVALID_ARGS;
959 
960 	context = vmci_ctx_get(context_id);
961 	if (context == NULL)
962 		return VMCI_ERROR_NOT_FOUND;
963 
964 	spin_lock(&context->lock);
965 	if (!vmci_handle_arr_has_entry(context->doorbell_array, handle))
966 		result = vmci_handle_arr_append_entry(&context->doorbell_array,
967 						      handle);
968 	else
969 		result = VMCI_ERROR_DUPLICATE_ENTRY;
970 
971 	spin_unlock(&context->lock);
972 	vmci_ctx_put(context);
973 
974 	return result;
975 }
976 
977 /*
978  * Unregisters a doorbell handle that was previously registered
979  * with vmci_ctx_dbell_create.
980  */
vmci_ctx_dbell_destroy(u32 context_id,struct vmci_handle handle)981 int vmci_ctx_dbell_destroy(u32 context_id, struct vmci_handle handle)
982 {
983 	struct vmci_ctx *context;
984 	struct vmci_handle removed_handle;
985 
986 	if (context_id == VMCI_INVALID_ID || vmci_handle_is_invalid(handle))
987 		return VMCI_ERROR_INVALID_ARGS;
988 
989 	context = vmci_ctx_get(context_id);
990 	if (context == NULL)
991 		return VMCI_ERROR_NOT_FOUND;
992 
993 	spin_lock(&context->lock);
994 	removed_handle =
995 	    vmci_handle_arr_remove_entry(context->doorbell_array, handle);
996 	vmci_handle_arr_remove_entry(context->pending_doorbell_array, handle);
997 	spin_unlock(&context->lock);
998 
999 	vmci_ctx_put(context);
1000 
1001 	return vmci_handle_is_invalid(removed_handle) ?
1002 	    VMCI_ERROR_NOT_FOUND : VMCI_SUCCESS;
1003 }
1004 
1005 /*
1006  * Unregisters all doorbell handles that were previously
1007  * registered with vmci_ctx_dbell_create.
1008  */
vmci_ctx_dbell_destroy_all(u32 context_id)1009 int vmci_ctx_dbell_destroy_all(u32 context_id)
1010 {
1011 	struct vmci_ctx *context;
1012 	struct vmci_handle handle;
1013 
1014 	if (context_id == VMCI_INVALID_ID)
1015 		return VMCI_ERROR_INVALID_ARGS;
1016 
1017 	context = vmci_ctx_get(context_id);
1018 	if (context == NULL)
1019 		return VMCI_ERROR_NOT_FOUND;
1020 
1021 	spin_lock(&context->lock);
1022 	do {
1023 		struct vmci_handle_arr *arr = context->doorbell_array;
1024 		handle = vmci_handle_arr_remove_tail(arr);
1025 	} while (!vmci_handle_is_invalid(handle));
1026 	do {
1027 		struct vmci_handle_arr *arr = context->pending_doorbell_array;
1028 		handle = vmci_handle_arr_remove_tail(arr);
1029 	} while (!vmci_handle_is_invalid(handle));
1030 	spin_unlock(&context->lock);
1031 
1032 	vmci_ctx_put(context);
1033 
1034 	return VMCI_SUCCESS;
1035 }
1036 
1037 /*
1038  * Registers a notification of a doorbell handle initiated by the
1039  * specified source context. The notification of doorbells are
1040  * subject to the same isolation rules as datagram delivery. To
1041  * allow host side senders of notifications a finer granularity
1042  * of sender rights than those assigned to the sending context
1043  * itself, the host context is required to specify a different
1044  * set of privilege flags that will override the privileges of
1045  * the source context.
1046  */
vmci_ctx_notify_dbell(u32 src_cid,struct vmci_handle handle,u32 src_priv_flags)1047 int vmci_ctx_notify_dbell(u32 src_cid,
1048 			  struct vmci_handle handle,
1049 			  u32 src_priv_flags)
1050 {
1051 	struct vmci_ctx *dst_context;
1052 	int result;
1053 
1054 	if (vmci_handle_is_invalid(handle))
1055 		return VMCI_ERROR_INVALID_ARGS;
1056 
1057 	/* Get the target VM's VMCI context. */
1058 	dst_context = vmci_ctx_get(handle.context);
1059 	if (!dst_context) {
1060 		pr_devel("Invalid context (ID=0x%x)\n", handle.context);
1061 		return VMCI_ERROR_NOT_FOUND;
1062 	}
1063 
1064 	if (src_cid != handle.context) {
1065 		u32 dst_priv_flags;
1066 
1067 		if (VMCI_CONTEXT_IS_VM(src_cid) &&
1068 		    VMCI_CONTEXT_IS_VM(handle.context)) {
1069 			pr_devel("Doorbell notification from VM to VM not supported (src=0x%x, dst=0x%x)\n",
1070 				 src_cid, handle.context);
1071 			result = VMCI_ERROR_DST_UNREACHABLE;
1072 			goto out;
1073 		}
1074 
1075 		result = vmci_dbell_get_priv_flags(handle, &dst_priv_flags);
1076 		if (result < VMCI_SUCCESS) {
1077 			pr_warn("Failed to get privilege flags for destination (handle=0x%x:0x%x)\n",
1078 				handle.context, handle.resource);
1079 			goto out;
1080 		}
1081 
1082 		if (src_cid != VMCI_HOST_CONTEXT_ID ||
1083 		    src_priv_flags == VMCI_NO_PRIVILEGE_FLAGS) {
1084 			src_priv_flags = vmci_context_get_priv_flags(src_cid);
1085 		}
1086 
1087 		if (vmci_deny_interaction(src_priv_flags, dst_priv_flags)) {
1088 			result = VMCI_ERROR_NO_ACCESS;
1089 			goto out;
1090 		}
1091 	}
1092 
1093 	if (handle.context == VMCI_HOST_CONTEXT_ID) {
1094 		result = vmci_dbell_host_context_notify(src_cid, handle);
1095 	} else {
1096 		spin_lock(&dst_context->lock);
1097 
1098 		if (!vmci_handle_arr_has_entry(dst_context->doorbell_array,
1099 					       handle)) {
1100 			result = VMCI_ERROR_NOT_FOUND;
1101 		} else {
1102 			if (!vmci_handle_arr_has_entry(
1103 					dst_context->pending_doorbell_array,
1104 					handle)) {
1105 				result = vmci_handle_arr_append_entry(
1106 					&dst_context->pending_doorbell_array,
1107 					handle);
1108 				if (result == VMCI_SUCCESS) {
1109 					ctx_signal_notify(dst_context);
1110 					wake_up(&dst_context->host_context.wait_queue);
1111 				}
1112 			} else {
1113 				result = VMCI_SUCCESS;
1114 			}
1115 		}
1116 		spin_unlock(&dst_context->lock);
1117 	}
1118 
1119  out:
1120 	vmci_ctx_put(dst_context);
1121 
1122 	return result;
1123 }
1124 
vmci_ctx_supports_host_qp(struct vmci_ctx * context)1125 bool vmci_ctx_supports_host_qp(struct vmci_ctx *context)
1126 {
1127 	return context && context->user_version >= VMCI_VERSION_HOSTQP;
1128 }
1129 
1130 /*
1131  * Registers that a new queue pair handle has been allocated by
1132  * the context.
1133  */
vmci_ctx_qp_create(struct vmci_ctx * context,struct vmci_handle handle)1134 int vmci_ctx_qp_create(struct vmci_ctx *context, struct vmci_handle handle)
1135 {
1136 	int result;
1137 
1138 	if (context == NULL || vmci_handle_is_invalid(handle))
1139 		return VMCI_ERROR_INVALID_ARGS;
1140 
1141 	if (!vmci_handle_arr_has_entry(context->queue_pair_array, handle))
1142 		result = vmci_handle_arr_append_entry(
1143 			&context->queue_pair_array, handle);
1144 	else
1145 		result = VMCI_ERROR_DUPLICATE_ENTRY;
1146 
1147 	return result;
1148 }
1149 
1150 /*
1151  * Unregisters a queue pair handle that was previously registered
1152  * with vmci_ctx_qp_create.
1153  */
vmci_ctx_qp_destroy(struct vmci_ctx * context,struct vmci_handle handle)1154 int vmci_ctx_qp_destroy(struct vmci_ctx *context, struct vmci_handle handle)
1155 {
1156 	struct vmci_handle hndl;
1157 
1158 	if (context == NULL || vmci_handle_is_invalid(handle))
1159 		return VMCI_ERROR_INVALID_ARGS;
1160 
1161 	hndl = vmci_handle_arr_remove_entry(context->queue_pair_array, handle);
1162 
1163 	return vmci_handle_is_invalid(hndl) ?
1164 		VMCI_ERROR_NOT_FOUND : VMCI_SUCCESS;
1165 }
1166 
1167 /*
1168  * Determines whether a given queue pair handle is registered
1169  * with the given context.
1170  */
vmci_ctx_qp_exists(struct vmci_ctx * context,struct vmci_handle handle)1171 bool vmci_ctx_qp_exists(struct vmci_ctx *context, struct vmci_handle handle)
1172 {
1173 	if (context == NULL || vmci_handle_is_invalid(handle))
1174 		return false;
1175 
1176 	return vmci_handle_arr_has_entry(context->queue_pair_array, handle);
1177 }
1178 
1179 /*
1180  * vmci_context_get_priv_flags() - Retrieve privilege flags.
1181  * @context_id: The context ID of the VMCI context.
1182  *
1183  * Retrieves privilege flags of the given VMCI context ID.
1184  */
vmci_context_get_priv_flags(u32 context_id)1185 u32 vmci_context_get_priv_flags(u32 context_id)
1186 {
1187 	if (vmci_host_code_active()) {
1188 		u32 flags;
1189 		struct vmci_ctx *context;
1190 
1191 		context = vmci_ctx_get(context_id);
1192 		if (!context)
1193 			return VMCI_LEAST_PRIVILEGE_FLAGS;
1194 
1195 		flags = context->priv_flags;
1196 		vmci_ctx_put(context);
1197 		return flags;
1198 	}
1199 	return VMCI_NO_PRIVILEGE_FLAGS;
1200 }
1201 EXPORT_SYMBOL_GPL(vmci_context_get_priv_flags);
1202 
1203 /*
1204  * vmci_is_context_owner() - Determimnes if user is the context owner
1205  * @context_id: The context ID of the VMCI context.
1206  * @uid:        The host user id (real kernel value).
1207  *
1208  * Determines whether a given UID is the owner of given VMCI context.
1209  */
vmci_is_context_owner(u32 context_id,kuid_t uid)1210 bool vmci_is_context_owner(u32 context_id, kuid_t uid)
1211 {
1212 	bool is_owner = false;
1213 
1214 	if (vmci_host_code_active()) {
1215 		struct vmci_ctx *context = vmci_ctx_get(context_id);
1216 		if (context) {
1217 			if (context->cred)
1218 				is_owner = uid_eq(context->cred->uid, uid);
1219 			vmci_ctx_put(context);
1220 		}
1221 	}
1222 
1223 	return is_owner;
1224 }
1225 EXPORT_SYMBOL_GPL(vmci_is_context_owner);
1226