1 /*
2  * Intel MIC Platform Software Stack (MPSS)
3  *
4  * Copyright(c) 2014 Intel Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License, version 2, as
8  * published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * General Public License for more details.
14  *
15  * Intel SCIF driver.
16  *
17  */
18 #include <linux/scif.h>
19 #include "scif_main.h"
20 #include "scif_map.h"
21 
22 static const char * const scif_ep_states[] = {
23 	"Unbound",
24 	"Bound",
25 	"Listening",
26 	"Connected",
27 	"Connecting",
28 	"Mapping",
29 	"Closing",
30 	"Close Listening",
31 	"Disconnected",
32 	"Zombie"};
33 
34 enum conn_async_state {
35 	ASYNC_CONN_IDLE = 1,	/* ep setup for async connect */
36 	ASYNC_CONN_INPROGRESS,	/* async connect in progress */
37 	ASYNC_CONN_FLUSH_WORK	/* async work flush in progress  */
38 };
39 
40 /*
41  * File operations for anonymous inode file associated with a SCIF endpoint,
42  * used in kernel mode SCIF poll. Kernel mode SCIF poll calls portions of the
43  * poll API in the kernel and these take in a struct file *. Since a struct
44  * file is not available to kernel mode SCIF, it uses an anonymous file for
45  * this purpose.
46  */
47 const struct file_operations scif_anon_fops = {
48 	.owner = THIS_MODULE,
49 };
50 
scif_open(void)51 scif_epd_t scif_open(void)
52 {
53 	struct scif_endpt *ep;
54 	int err;
55 
56 	might_sleep();
57 	ep = kzalloc(sizeof(*ep), GFP_KERNEL);
58 	if (!ep)
59 		goto err_ep_alloc;
60 
61 	ep->qp_info.qp = kzalloc(sizeof(*ep->qp_info.qp), GFP_KERNEL);
62 	if (!ep->qp_info.qp)
63 		goto err_qp_alloc;
64 
65 	err = scif_anon_inode_getfile(ep);
66 	if (err)
67 		goto err_anon_inode;
68 
69 	spin_lock_init(&ep->lock);
70 	mutex_init(&ep->sendlock);
71 	mutex_init(&ep->recvlock);
72 
73 	scif_rma_ep_init(ep);
74 	ep->state = SCIFEP_UNBOUND;
75 	dev_dbg(scif_info.mdev.this_device,
76 		"SCIFAPI open: ep %p success\n", ep);
77 	return ep;
78 
79 err_anon_inode:
80 	kfree(ep->qp_info.qp);
81 err_qp_alloc:
82 	kfree(ep);
83 err_ep_alloc:
84 	return NULL;
85 }
86 EXPORT_SYMBOL_GPL(scif_open);
87 
88 /*
89  * scif_disconnect_ep - Disconnects the endpoint if found
90  * @epd: The end point returned from scif_open()
91  */
scif_disconnect_ep(struct scif_endpt * ep)92 static struct scif_endpt *scif_disconnect_ep(struct scif_endpt *ep)
93 {
94 	struct scifmsg msg;
95 	struct scif_endpt *fep = NULL;
96 	struct scif_endpt *tmpep;
97 	struct list_head *pos, *tmpq;
98 	int err;
99 
100 	/*
101 	 * Wake up any threads blocked in send()/recv() before closing
102 	 * out the connection. Grabbing and releasing the send/recv lock
103 	 * will ensure that any blocked senders/receivers have exited for
104 	 * Ring 0 endpoints. It is a Ring 0 bug to call send/recv after
105 	 * close. Ring 3 endpoints are not affected since close will not
106 	 * be called while there are IOCTLs executing.
107 	 */
108 	wake_up_interruptible(&ep->sendwq);
109 	wake_up_interruptible(&ep->recvwq);
110 	mutex_lock(&ep->sendlock);
111 	mutex_unlock(&ep->sendlock);
112 	mutex_lock(&ep->recvlock);
113 	mutex_unlock(&ep->recvlock);
114 
115 	/* Remove from the connected list */
116 	mutex_lock(&scif_info.connlock);
117 	list_for_each_safe(pos, tmpq, &scif_info.connected) {
118 		tmpep = list_entry(pos, struct scif_endpt, list);
119 		if (tmpep == ep) {
120 			list_del(pos);
121 			fep = tmpep;
122 			spin_lock(&ep->lock);
123 			break;
124 		}
125 	}
126 
127 	if (!fep) {
128 		/*
129 		 * The other side has completed the disconnect before
130 		 * the end point can be removed from the list. Therefore
131 		 * the ep lock is not locked, traverse the disconnected
132 		 * list to find the endpoint and release the conn lock.
133 		 */
134 		list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
135 			tmpep = list_entry(pos, struct scif_endpt, list);
136 			if (tmpep == ep) {
137 				list_del(pos);
138 				break;
139 			}
140 		}
141 		mutex_unlock(&scif_info.connlock);
142 		return NULL;
143 	}
144 
145 	init_completion(&ep->discon);
146 	msg.uop = SCIF_DISCNCT;
147 	msg.src = ep->port;
148 	msg.dst = ep->peer;
149 	msg.payload[0] = (u64)ep;
150 	msg.payload[1] = ep->remote_ep;
151 
152 	err = scif_nodeqp_send(ep->remote_dev, &msg);
153 	spin_unlock(&ep->lock);
154 	mutex_unlock(&scif_info.connlock);
155 
156 	if (!err)
157 		/* Wait for the remote node to respond with SCIF_DISCNT_ACK */
158 		wait_for_completion_timeout(&ep->discon,
159 					    SCIF_NODE_ALIVE_TIMEOUT);
160 	return ep;
161 }
162 
scif_close(scif_epd_t epd)163 int scif_close(scif_epd_t epd)
164 {
165 	struct scif_endpt *ep = (struct scif_endpt *)epd;
166 	struct scif_endpt *tmpep;
167 	struct list_head *pos, *tmpq;
168 	enum scif_epd_state oldstate;
169 	bool flush_conn;
170 
171 	dev_dbg(scif_info.mdev.this_device, "SCIFAPI close: ep %p %s\n",
172 		ep, scif_ep_states[ep->state]);
173 	might_sleep();
174 	spin_lock(&ep->lock);
175 	flush_conn = (ep->conn_async_state == ASYNC_CONN_INPROGRESS);
176 	spin_unlock(&ep->lock);
177 
178 	if (flush_conn)
179 		flush_work(&scif_info.conn_work);
180 
181 	spin_lock(&ep->lock);
182 	oldstate = ep->state;
183 
184 	ep->state = SCIFEP_CLOSING;
185 
186 	switch (oldstate) {
187 	case SCIFEP_ZOMBIE:
188 		dev_err(scif_info.mdev.this_device,
189 			"SCIFAPI close: zombie state unexpected\n");
190 		/* fall through */
191 	case SCIFEP_DISCONNECTED:
192 		spin_unlock(&ep->lock);
193 		scif_unregister_all_windows(epd);
194 		/* Remove from the disconnected list */
195 		mutex_lock(&scif_info.connlock);
196 		list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
197 			tmpep = list_entry(pos, struct scif_endpt, list);
198 			if (tmpep == ep) {
199 				list_del(pos);
200 				break;
201 			}
202 		}
203 		mutex_unlock(&scif_info.connlock);
204 		break;
205 	case SCIFEP_UNBOUND:
206 	case SCIFEP_BOUND:
207 	case SCIFEP_CONNECTING:
208 		spin_unlock(&ep->lock);
209 		break;
210 	case SCIFEP_MAPPING:
211 	case SCIFEP_CONNECTED:
212 	case SCIFEP_CLOSING:
213 	{
214 		spin_unlock(&ep->lock);
215 		scif_unregister_all_windows(epd);
216 		scif_disconnect_ep(ep);
217 		break;
218 	}
219 	case SCIFEP_LISTENING:
220 	case SCIFEP_CLLISTEN:
221 	{
222 		struct scif_conreq *conreq;
223 		struct scifmsg msg;
224 		struct scif_endpt *aep;
225 
226 		spin_unlock(&ep->lock);
227 		mutex_lock(&scif_info.eplock);
228 
229 		/* remove from listen list */
230 		list_for_each_safe(pos, tmpq, &scif_info.listen) {
231 			tmpep = list_entry(pos, struct scif_endpt, list);
232 			if (tmpep == ep)
233 				list_del(pos);
234 		}
235 		/* Remove any dangling accepts */
236 		while (ep->acceptcnt) {
237 			aep = list_first_entry(&ep->li_accept,
238 					       struct scif_endpt, liacceptlist);
239 			list_del(&aep->liacceptlist);
240 			scif_put_port(aep->port.port);
241 			list_for_each_safe(pos, tmpq, &scif_info.uaccept) {
242 				tmpep = list_entry(pos, struct scif_endpt,
243 						   miacceptlist);
244 				if (tmpep == aep) {
245 					list_del(pos);
246 					break;
247 				}
248 			}
249 			mutex_unlock(&scif_info.eplock);
250 			mutex_lock(&scif_info.connlock);
251 			list_for_each_safe(pos, tmpq, &scif_info.connected) {
252 				tmpep = list_entry(pos,
253 						   struct scif_endpt, list);
254 				if (tmpep == aep) {
255 					list_del(pos);
256 					break;
257 				}
258 			}
259 			list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
260 				tmpep = list_entry(pos,
261 						   struct scif_endpt, list);
262 				if (tmpep == aep) {
263 					list_del(pos);
264 					break;
265 				}
266 			}
267 			mutex_unlock(&scif_info.connlock);
268 			scif_teardown_ep(aep);
269 			mutex_lock(&scif_info.eplock);
270 			scif_add_epd_to_zombie_list(aep, SCIF_EPLOCK_HELD);
271 			ep->acceptcnt--;
272 		}
273 
274 		spin_lock(&ep->lock);
275 		mutex_unlock(&scif_info.eplock);
276 
277 		/* Remove and reject any pending connection requests. */
278 		while (ep->conreqcnt) {
279 			conreq = list_first_entry(&ep->conlist,
280 						  struct scif_conreq, list);
281 			list_del(&conreq->list);
282 
283 			msg.uop = SCIF_CNCT_REJ;
284 			msg.dst.node = conreq->msg.src.node;
285 			msg.dst.port = conreq->msg.src.port;
286 			msg.payload[0] = conreq->msg.payload[0];
287 			msg.payload[1] = conreq->msg.payload[1];
288 			/*
289 			 * No Error Handling on purpose for scif_nodeqp_send().
290 			 * If the remote node is lost we still want free the
291 			 * connection requests on the self node.
292 			 */
293 			scif_nodeqp_send(&scif_dev[conreq->msg.src.node],
294 					 &msg);
295 			ep->conreqcnt--;
296 			kfree(conreq);
297 		}
298 
299 		spin_unlock(&ep->lock);
300 		/* If a kSCIF accept is waiting wake it up */
301 		wake_up_interruptible(&ep->conwq);
302 		break;
303 	}
304 	}
305 	scif_put_port(ep->port.port);
306 	scif_anon_inode_fput(ep);
307 	scif_teardown_ep(ep);
308 	scif_add_epd_to_zombie_list(ep, !SCIF_EPLOCK_HELD);
309 	return 0;
310 }
311 EXPORT_SYMBOL_GPL(scif_close);
312 
313 /**
314  * scif_flush() - Wakes up any blocking accepts. The endpoint will no longer
315  *			accept new connections.
316  * @epd: The end point returned from scif_open()
317  */
__scif_flush(scif_epd_t epd)318 int __scif_flush(scif_epd_t epd)
319 {
320 	struct scif_endpt *ep = (struct scif_endpt *)epd;
321 
322 	switch (ep->state) {
323 	case SCIFEP_LISTENING:
324 	{
325 		ep->state = SCIFEP_CLLISTEN;
326 
327 		/* If an accept is waiting wake it up */
328 		wake_up_interruptible(&ep->conwq);
329 		break;
330 	}
331 	default:
332 		break;
333 	}
334 	return 0;
335 }
336 
scif_bind(scif_epd_t epd,u16 pn)337 int scif_bind(scif_epd_t epd, u16 pn)
338 {
339 	struct scif_endpt *ep = (struct scif_endpt *)epd;
340 	int ret = 0;
341 	int tmp;
342 
343 	dev_dbg(scif_info.mdev.this_device,
344 		"SCIFAPI bind: ep %p %s requested port number %d\n",
345 		ep, scif_ep_states[ep->state], pn);
346 	if (pn) {
347 		/*
348 		 * Similar to IETF RFC 1700, SCIF ports below
349 		 * SCIF_ADMIN_PORT_END can only be bound by system (or root)
350 		 * processes or by processes executed by privileged users.
351 		 */
352 		if (pn < SCIF_ADMIN_PORT_END && !capable(CAP_SYS_ADMIN)) {
353 			ret = -EACCES;
354 			goto scif_bind_admin_exit;
355 		}
356 	}
357 
358 	spin_lock(&ep->lock);
359 	if (ep->state == SCIFEP_BOUND) {
360 		ret = -EINVAL;
361 		goto scif_bind_exit;
362 	} else if (ep->state != SCIFEP_UNBOUND) {
363 		ret = -EISCONN;
364 		goto scif_bind_exit;
365 	}
366 
367 	if (pn) {
368 		tmp = scif_rsrv_port(pn);
369 		if (tmp != pn) {
370 			ret = -EINVAL;
371 			goto scif_bind_exit;
372 		}
373 	} else {
374 		ret = scif_get_new_port();
375 		if (ret < 0)
376 			goto scif_bind_exit;
377 		pn = ret;
378 	}
379 
380 	ep->state = SCIFEP_BOUND;
381 	ep->port.node = scif_info.nodeid;
382 	ep->port.port = pn;
383 	ep->conn_async_state = ASYNC_CONN_IDLE;
384 	ret = pn;
385 	dev_dbg(scif_info.mdev.this_device,
386 		"SCIFAPI bind: bound to port number %d\n", pn);
387 scif_bind_exit:
388 	spin_unlock(&ep->lock);
389 scif_bind_admin_exit:
390 	return ret;
391 }
392 EXPORT_SYMBOL_GPL(scif_bind);
393 
scif_listen(scif_epd_t epd,int backlog)394 int scif_listen(scif_epd_t epd, int backlog)
395 {
396 	struct scif_endpt *ep = (struct scif_endpt *)epd;
397 
398 	dev_dbg(scif_info.mdev.this_device,
399 		"SCIFAPI listen: ep %p %s\n", ep, scif_ep_states[ep->state]);
400 	spin_lock(&ep->lock);
401 	switch (ep->state) {
402 	case SCIFEP_ZOMBIE:
403 	case SCIFEP_CLOSING:
404 	case SCIFEP_CLLISTEN:
405 	case SCIFEP_UNBOUND:
406 	case SCIFEP_DISCONNECTED:
407 		spin_unlock(&ep->lock);
408 		return -EINVAL;
409 	case SCIFEP_LISTENING:
410 	case SCIFEP_CONNECTED:
411 	case SCIFEP_CONNECTING:
412 	case SCIFEP_MAPPING:
413 		spin_unlock(&ep->lock);
414 		return -EISCONN;
415 	case SCIFEP_BOUND:
416 		break;
417 	}
418 
419 	ep->state = SCIFEP_LISTENING;
420 	ep->backlog = backlog;
421 
422 	ep->conreqcnt = 0;
423 	ep->acceptcnt = 0;
424 	INIT_LIST_HEAD(&ep->conlist);
425 	init_waitqueue_head(&ep->conwq);
426 	INIT_LIST_HEAD(&ep->li_accept);
427 	spin_unlock(&ep->lock);
428 
429 	/*
430 	 * Listen status is complete so delete the qp information not needed
431 	 * on a listen before placing on the list of listening ep's
432 	 */
433 	scif_teardown_ep(ep);
434 	ep->qp_info.qp = NULL;
435 
436 	mutex_lock(&scif_info.eplock);
437 	list_add_tail(&ep->list, &scif_info.listen);
438 	mutex_unlock(&scif_info.eplock);
439 	return 0;
440 }
441 EXPORT_SYMBOL_GPL(scif_listen);
442 
443 /*
444  ************************************************************************
445  * SCIF connection flow:
446  *
447  * 1) A SCIF listening endpoint can call scif_accept(..) to wait for SCIF
448  *	connections via a SCIF_CNCT_REQ message
449  * 2) A SCIF endpoint can initiate a SCIF connection by calling
450  *	scif_connect(..) which calls scif_setup_qp_connect(..) which
451  *	allocates the local qp for the endpoint ring buffer and then sends
452  *	a SCIF_CNCT_REQ to the remote node and waits for a SCIF_CNCT_GNT or
453  *	a SCIF_CNCT_REJ message
454  * 3) The peer node handles a SCIF_CNCT_REQ via scif_cnctreq_resp(..) which
455  *	wakes up any threads blocked in step 1 or sends a SCIF_CNCT_REJ
456  *	message otherwise
457  * 4) A thread blocked waiting for incoming connections allocates its local
458  *	endpoint QP and ring buffer following which it sends a SCIF_CNCT_GNT
459  *	and waits for a SCIF_CNCT_GNT(N)ACK. If the allocation fails then
460  *	the node sends a SCIF_CNCT_REJ message
461  * 5) Upon receipt of a SCIF_CNCT_GNT or a SCIF_CNCT_REJ message the
462  *	connecting endpoint is woken up as part of handling
463  *	scif_cnctgnt_resp(..) following which it maps the remote endpoints'
464  *	QP, updates its outbound QP and sends a SCIF_CNCT_GNTACK message on
465  *	success or a SCIF_CNCT_GNTNACK message on failure and completes
466  *	the scif_connect(..) API
467  * 6) Upon receipt of a SCIF_CNCT_GNT(N)ACK the accepting endpoint blocked
468  *	in step 4 is woken up and completes the scif_accept(..) API
469  * 7) The SCIF connection is now established between the two SCIF endpoints.
470  */
scif_conn_func(struct scif_endpt * ep)471 static int scif_conn_func(struct scif_endpt *ep)
472 {
473 	int err = 0;
474 	struct scifmsg msg;
475 	struct device *spdev;
476 
477 	err = scif_reserve_dma_chan(ep);
478 	if (err) {
479 		dev_err(&ep->remote_dev->sdev->dev,
480 			"%s %d err %d\n", __func__, __LINE__, err);
481 		ep->state = SCIFEP_BOUND;
482 		goto connect_error_simple;
483 	}
484 	/* Initiate the first part of the endpoint QP setup */
485 	err = scif_setup_qp_connect(ep->qp_info.qp, &ep->qp_info.qp_offset,
486 				    SCIF_ENDPT_QP_SIZE, ep->remote_dev);
487 	if (err) {
488 		dev_err(&ep->remote_dev->sdev->dev,
489 			"%s err %d qp_offset 0x%llx\n",
490 			__func__, err, ep->qp_info.qp_offset);
491 		ep->state = SCIFEP_BOUND;
492 		goto connect_error_simple;
493 	}
494 
495 	spdev = scif_get_peer_dev(ep->remote_dev);
496 	if (IS_ERR(spdev)) {
497 		err = PTR_ERR(spdev);
498 		goto cleanup_qp;
499 	}
500 	/* Format connect message and send it */
501 	msg.src = ep->port;
502 	msg.dst = ep->conn_port;
503 	msg.uop = SCIF_CNCT_REQ;
504 	msg.payload[0] = (u64)ep;
505 	msg.payload[1] = ep->qp_info.qp_offset;
506 	err = _scif_nodeqp_send(ep->remote_dev, &msg);
507 	if (err)
508 		goto connect_error_dec;
509 	scif_put_peer_dev(spdev);
510 	/*
511 	 * Wait for the remote node to respond with SCIF_CNCT_GNT or
512 	 * SCIF_CNCT_REJ message.
513 	 */
514 	err = wait_event_timeout(ep->conwq, ep->state != SCIFEP_CONNECTING,
515 				 SCIF_NODE_ALIVE_TIMEOUT);
516 	if (!err) {
517 		dev_err(&ep->remote_dev->sdev->dev,
518 			"%s %d timeout\n", __func__, __LINE__);
519 		ep->state = SCIFEP_BOUND;
520 	}
521 	spdev = scif_get_peer_dev(ep->remote_dev);
522 	if (IS_ERR(spdev)) {
523 		err = PTR_ERR(spdev);
524 		goto cleanup_qp;
525 	}
526 	if (ep->state == SCIFEP_MAPPING) {
527 		err = scif_setup_qp_connect_response(ep->remote_dev,
528 						     ep->qp_info.qp,
529 						     ep->qp_info.gnt_pld);
530 		/*
531 		 * If the resource to map the queue are not available then
532 		 * we need to tell the other side to terminate the accept
533 		 */
534 		if (err) {
535 			dev_err(&ep->remote_dev->sdev->dev,
536 				"%s %d err %d\n", __func__, __LINE__, err);
537 			msg.uop = SCIF_CNCT_GNTNACK;
538 			msg.payload[0] = ep->remote_ep;
539 			_scif_nodeqp_send(ep->remote_dev, &msg);
540 			ep->state = SCIFEP_BOUND;
541 			goto connect_error_dec;
542 		}
543 
544 		msg.uop = SCIF_CNCT_GNTACK;
545 		msg.payload[0] = ep->remote_ep;
546 		err = _scif_nodeqp_send(ep->remote_dev, &msg);
547 		if (err) {
548 			ep->state = SCIFEP_BOUND;
549 			goto connect_error_dec;
550 		}
551 		ep->state = SCIFEP_CONNECTED;
552 		mutex_lock(&scif_info.connlock);
553 		list_add_tail(&ep->list, &scif_info.connected);
554 		mutex_unlock(&scif_info.connlock);
555 		dev_dbg(&ep->remote_dev->sdev->dev,
556 			"SCIFAPI connect: ep %p connected\n", ep);
557 	} else if (ep->state == SCIFEP_BOUND) {
558 		dev_dbg(&ep->remote_dev->sdev->dev,
559 			"SCIFAPI connect: ep %p connection refused\n", ep);
560 		err = -ECONNREFUSED;
561 		goto connect_error_dec;
562 	}
563 	scif_put_peer_dev(spdev);
564 	return err;
565 connect_error_dec:
566 	scif_put_peer_dev(spdev);
567 cleanup_qp:
568 	scif_cleanup_ep_qp(ep);
569 connect_error_simple:
570 	return err;
571 }
572 
573 /*
574  * scif_conn_handler:
575  *
576  * Workqueue handler for servicing non-blocking SCIF connect
577  *
578  */
scif_conn_handler(struct work_struct * work)579 void scif_conn_handler(struct work_struct *work)
580 {
581 	struct scif_endpt *ep;
582 
583 	do {
584 		ep = NULL;
585 		spin_lock(&scif_info.nb_connect_lock);
586 		if (!list_empty(&scif_info.nb_connect_list)) {
587 			ep = list_first_entry(&scif_info.nb_connect_list,
588 					      struct scif_endpt, conn_list);
589 			list_del(&ep->conn_list);
590 		}
591 		spin_unlock(&scif_info.nb_connect_lock);
592 		if (ep) {
593 			ep->conn_err = scif_conn_func(ep);
594 			wake_up_interruptible(&ep->conn_pend_wq);
595 		}
596 	} while (ep);
597 }
598 
__scif_connect(scif_epd_t epd,struct scif_port_id * dst,bool non_block)599 int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block)
600 {
601 	struct scif_endpt *ep = (struct scif_endpt *)epd;
602 	int err = 0;
603 	struct scif_dev *remote_dev;
604 	struct device *spdev;
605 
606 	dev_dbg(scif_info.mdev.this_device, "SCIFAPI connect: ep %p %s\n", ep,
607 		scif_ep_states[ep->state]);
608 
609 	if (!scif_dev || dst->node > scif_info.maxid)
610 		return -ENODEV;
611 
612 	might_sleep();
613 
614 	remote_dev = &scif_dev[dst->node];
615 	spdev = scif_get_peer_dev(remote_dev);
616 	if (IS_ERR(spdev)) {
617 		err = PTR_ERR(spdev);
618 		return err;
619 	}
620 
621 	spin_lock(&ep->lock);
622 	switch (ep->state) {
623 	case SCIFEP_ZOMBIE:
624 	case SCIFEP_CLOSING:
625 		err = -EINVAL;
626 		break;
627 	case SCIFEP_DISCONNECTED:
628 		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
629 			ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
630 		else
631 			err = -EINVAL;
632 		break;
633 	case SCIFEP_LISTENING:
634 	case SCIFEP_CLLISTEN:
635 		err = -EOPNOTSUPP;
636 		break;
637 	case SCIFEP_CONNECTING:
638 	case SCIFEP_MAPPING:
639 		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
640 			err = -EINPROGRESS;
641 		else
642 			err = -EISCONN;
643 		break;
644 	case SCIFEP_CONNECTED:
645 		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
646 			ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
647 		else
648 			err = -EISCONN;
649 		break;
650 	case SCIFEP_UNBOUND:
651 		err = scif_get_new_port();
652 		if (err < 0)
653 			break;
654 		ep->port.port = err;
655 		ep->port.node = scif_info.nodeid;
656 		ep->conn_async_state = ASYNC_CONN_IDLE;
657 		/* Fall through */
658 	case SCIFEP_BOUND:
659 		/*
660 		 * If a non-blocking connect has been already initiated
661 		 * (conn_async_state is either ASYNC_CONN_INPROGRESS or
662 		 * ASYNC_CONN_FLUSH_WORK), the end point could end up in
663 		 * SCIF_BOUND due an error in the connection process
664 		 * (e.g., connection refused) If conn_async_state is
665 		 * ASYNC_CONN_INPROGRESS - transition to ASYNC_CONN_FLUSH_WORK
666 		 * so that the error status can be collected. If the state is
667 		 * already ASYNC_CONN_FLUSH_WORK - then set the error to
668 		 * EINPROGRESS since some other thread is waiting to collect
669 		 * error status.
670 		 */
671 		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
672 			ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
673 		} else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) {
674 			err = -EINPROGRESS;
675 		} else {
676 			ep->conn_port = *dst;
677 			init_waitqueue_head(&ep->sendwq);
678 			init_waitqueue_head(&ep->recvwq);
679 			init_waitqueue_head(&ep->conwq);
680 			ep->conn_async_state = 0;
681 
682 			if (unlikely(non_block))
683 				ep->conn_async_state = ASYNC_CONN_INPROGRESS;
684 		}
685 		break;
686 	}
687 
688 	if (err || ep->conn_async_state == ASYNC_CONN_FLUSH_WORK)
689 			goto connect_simple_unlock1;
690 
691 	ep->state = SCIFEP_CONNECTING;
692 	ep->remote_dev = &scif_dev[dst->node];
693 	ep->qp_info.qp->magic = SCIFEP_MAGIC;
694 	if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
695 		init_waitqueue_head(&ep->conn_pend_wq);
696 		spin_lock(&scif_info.nb_connect_lock);
697 		list_add_tail(&ep->conn_list, &scif_info.nb_connect_list);
698 		spin_unlock(&scif_info.nb_connect_lock);
699 		err = -EINPROGRESS;
700 		schedule_work(&scif_info.conn_work);
701 	}
702 connect_simple_unlock1:
703 	spin_unlock(&ep->lock);
704 	scif_put_peer_dev(spdev);
705 	if (err) {
706 		return err;
707 	} else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) {
708 		flush_work(&scif_info.conn_work);
709 		err = ep->conn_err;
710 		spin_lock(&ep->lock);
711 		ep->conn_async_state = ASYNC_CONN_IDLE;
712 		spin_unlock(&ep->lock);
713 	} else {
714 		err = scif_conn_func(ep);
715 	}
716 	return err;
717 }
718 
scif_connect(scif_epd_t epd,struct scif_port_id * dst)719 int scif_connect(scif_epd_t epd, struct scif_port_id *dst)
720 {
721 	return __scif_connect(epd, dst, false);
722 }
723 EXPORT_SYMBOL_GPL(scif_connect);
724 
725 /**
726  * scif_accept() - Accept a connection request from the remote node
727  *
728  * The function accepts a connection request from the remote node.  Successful
729  * complete is indicate by a new end point being created and passed back
730  * to the caller for future reference.
731  *
732  * Upon successful complete a zero will be returned and the peer information
733  * will be filled in.
734  *
735  * If the end point is not in the listening state -EINVAL will be returned.
736  *
737  * If during the connection sequence resource allocation fails the -ENOMEM
738  * will be returned.
739  *
740  * If the function is called with the ASYNC flag set and no connection requests
741  * are pending it will return -EAGAIN.
742  *
743  * If the remote side is not sending any connection requests the caller may
744  * terminate this function with a signal.  If so a -EINTR will be returned.
745  */
scif_accept(scif_epd_t epd,struct scif_port_id * peer,scif_epd_t * newepd,int flags)746 int scif_accept(scif_epd_t epd, struct scif_port_id *peer,
747 		scif_epd_t *newepd, int flags)
748 {
749 	struct scif_endpt *lep = (struct scif_endpt *)epd;
750 	struct scif_endpt *cep;
751 	struct scif_conreq *conreq;
752 	struct scifmsg msg;
753 	int err;
754 	struct device *spdev;
755 
756 	dev_dbg(scif_info.mdev.this_device,
757 		"SCIFAPI accept: ep %p %s\n", lep, scif_ep_states[lep->state]);
758 
759 	if (flags & ~SCIF_ACCEPT_SYNC)
760 		return -EINVAL;
761 
762 	if (!peer || !newepd)
763 		return -EINVAL;
764 
765 	might_sleep();
766 	spin_lock(&lep->lock);
767 	if (lep->state != SCIFEP_LISTENING) {
768 		spin_unlock(&lep->lock);
769 		return -EINVAL;
770 	}
771 
772 	if (!lep->conreqcnt && !(flags & SCIF_ACCEPT_SYNC)) {
773 		/* No connection request present and we do not want to wait */
774 		spin_unlock(&lep->lock);
775 		return -EAGAIN;
776 	}
777 
778 	lep->files = current->files;
779 retry_connection:
780 	spin_unlock(&lep->lock);
781 	/* Wait for the remote node to send us a SCIF_CNCT_REQ */
782 	err = wait_event_interruptible(lep->conwq,
783 				       (lep->conreqcnt ||
784 				       (lep->state != SCIFEP_LISTENING)));
785 	if (err)
786 		return err;
787 
788 	if (lep->state != SCIFEP_LISTENING)
789 		return -EINTR;
790 
791 	spin_lock(&lep->lock);
792 
793 	if (!lep->conreqcnt)
794 		goto retry_connection;
795 
796 	/* Get the first connect request off the list */
797 	conreq = list_first_entry(&lep->conlist, struct scif_conreq, list);
798 	list_del(&conreq->list);
799 	lep->conreqcnt--;
800 	spin_unlock(&lep->lock);
801 
802 	/* Fill in the peer information */
803 	peer->node = conreq->msg.src.node;
804 	peer->port = conreq->msg.src.port;
805 
806 	cep = kzalloc(sizeof(*cep), GFP_KERNEL);
807 	if (!cep) {
808 		err = -ENOMEM;
809 		goto scif_accept_error_epalloc;
810 	}
811 	spin_lock_init(&cep->lock);
812 	mutex_init(&cep->sendlock);
813 	mutex_init(&cep->recvlock);
814 	cep->state = SCIFEP_CONNECTING;
815 	cep->remote_dev = &scif_dev[peer->node];
816 	cep->remote_ep = conreq->msg.payload[0];
817 
818 	scif_rma_ep_init(cep);
819 
820 	err = scif_reserve_dma_chan(cep);
821 	if (err) {
822 		dev_err(scif_info.mdev.this_device,
823 			"%s %d err %d\n", __func__, __LINE__, err);
824 		goto scif_accept_error_qpalloc;
825 	}
826 
827 	cep->qp_info.qp = kzalloc(sizeof(*cep->qp_info.qp), GFP_KERNEL);
828 	if (!cep->qp_info.qp) {
829 		err = -ENOMEM;
830 		goto scif_accept_error_qpalloc;
831 	}
832 
833 	err = scif_anon_inode_getfile(cep);
834 	if (err)
835 		goto scif_accept_error_anon_inode;
836 
837 	cep->qp_info.qp->magic = SCIFEP_MAGIC;
838 	spdev = scif_get_peer_dev(cep->remote_dev);
839 	if (IS_ERR(spdev)) {
840 		err = PTR_ERR(spdev);
841 		goto scif_accept_error_map;
842 	}
843 	err = scif_setup_qp_accept(cep->qp_info.qp, &cep->qp_info.qp_offset,
844 				   conreq->msg.payload[1], SCIF_ENDPT_QP_SIZE,
845 				   cep->remote_dev);
846 	if (err) {
847 		dev_dbg(&cep->remote_dev->sdev->dev,
848 			"SCIFAPI accept: ep %p new %p scif_setup_qp_accept %d qp_offset 0x%llx\n",
849 			lep, cep, err, cep->qp_info.qp_offset);
850 		scif_put_peer_dev(spdev);
851 		goto scif_accept_error_map;
852 	}
853 
854 	cep->port.node = lep->port.node;
855 	cep->port.port = lep->port.port;
856 	cep->peer.node = peer->node;
857 	cep->peer.port = peer->port;
858 	init_waitqueue_head(&cep->sendwq);
859 	init_waitqueue_head(&cep->recvwq);
860 	init_waitqueue_head(&cep->conwq);
861 
862 	msg.uop = SCIF_CNCT_GNT;
863 	msg.src = cep->port;
864 	msg.payload[0] = cep->remote_ep;
865 	msg.payload[1] = cep->qp_info.qp_offset;
866 	msg.payload[2] = (u64)cep;
867 
868 	err = _scif_nodeqp_send(cep->remote_dev, &msg);
869 	scif_put_peer_dev(spdev);
870 	if (err)
871 		goto scif_accept_error_map;
872 retry:
873 	/* Wait for the remote node to respond with SCIF_CNCT_GNT(N)ACK */
874 	err = wait_event_timeout(cep->conwq, cep->state != SCIFEP_CONNECTING,
875 				 SCIF_NODE_ACCEPT_TIMEOUT);
876 	if (!err && scifdev_alive(cep))
877 		goto retry;
878 	err = !err ? -ENODEV : 0;
879 	if (err)
880 		goto scif_accept_error_map;
881 	kfree(conreq);
882 
883 	spin_lock(&cep->lock);
884 
885 	if (cep->state == SCIFEP_CLOSING) {
886 		/*
887 		 * Remote failed to allocate resources and NAKed the grant.
888 		 * There is at this point nothing referencing the new end point.
889 		 */
890 		spin_unlock(&cep->lock);
891 		scif_teardown_ep(cep);
892 		kfree(cep);
893 
894 		/* If call with sync flag then go back and wait. */
895 		if (flags & SCIF_ACCEPT_SYNC) {
896 			spin_lock(&lep->lock);
897 			goto retry_connection;
898 		}
899 		return -EAGAIN;
900 	}
901 
902 	scif_get_port(cep->port.port);
903 	*newepd = (scif_epd_t)cep;
904 	spin_unlock(&cep->lock);
905 	return 0;
906 scif_accept_error_map:
907 	scif_anon_inode_fput(cep);
908 scif_accept_error_anon_inode:
909 	scif_teardown_ep(cep);
910 scif_accept_error_qpalloc:
911 	kfree(cep);
912 scif_accept_error_epalloc:
913 	msg.uop = SCIF_CNCT_REJ;
914 	msg.dst.node = conreq->msg.src.node;
915 	msg.dst.port = conreq->msg.src.port;
916 	msg.payload[0] = conreq->msg.payload[0];
917 	msg.payload[1] = conreq->msg.payload[1];
918 	scif_nodeqp_send(&scif_dev[conreq->msg.src.node], &msg);
919 	kfree(conreq);
920 	return err;
921 }
922 EXPORT_SYMBOL_GPL(scif_accept);
923 
924 /*
925  * scif_msg_param_check:
926  * @epd: The end point returned from scif_open()
927  * @len: Length to receive
928  * @flags: blocking or non blocking
929  *
930  * Validate parameters for messaging APIs scif_send(..)/scif_recv(..).
931  */
scif_msg_param_check(scif_epd_t epd,int len,int flags)932 static inline int scif_msg_param_check(scif_epd_t epd, int len, int flags)
933 {
934 	int ret = -EINVAL;
935 
936 	if (len < 0)
937 		goto err_ret;
938 	if (flags && (!(flags & SCIF_RECV_BLOCK)))
939 		goto err_ret;
940 	ret = 0;
941 err_ret:
942 	return ret;
943 }
944 
_scif_send(scif_epd_t epd,void * msg,int len,int flags)945 static int _scif_send(scif_epd_t epd, void *msg, int len, int flags)
946 {
947 	struct scif_endpt *ep = (struct scif_endpt *)epd;
948 	struct scifmsg notif_msg;
949 	int curr_xfer_len = 0, sent_len = 0, write_count;
950 	int ret = 0;
951 	struct scif_qp *qp = ep->qp_info.qp;
952 
953 	if (flags & SCIF_SEND_BLOCK)
954 		might_sleep();
955 
956 	spin_lock(&ep->lock);
957 	while (sent_len != len && SCIFEP_CONNECTED == ep->state) {
958 		write_count = scif_rb_space(&qp->outbound_q);
959 		if (write_count) {
960 			/* Best effort to send as much data as possible */
961 			curr_xfer_len = min(len - sent_len, write_count);
962 			ret = scif_rb_write(&qp->outbound_q, msg,
963 					    curr_xfer_len);
964 			if (ret < 0)
965 				break;
966 			/* Success. Update write pointer */
967 			scif_rb_commit(&qp->outbound_q);
968 			/*
969 			 * Send a notification to the peer about the
970 			 * produced data message.
971 			 */
972 			notif_msg.src = ep->port;
973 			notif_msg.uop = SCIF_CLIENT_SENT;
974 			notif_msg.payload[0] = ep->remote_ep;
975 			ret = _scif_nodeqp_send(ep->remote_dev, &notif_msg);
976 			if (ret)
977 				break;
978 			sent_len += curr_xfer_len;
979 			msg = msg + curr_xfer_len;
980 			continue;
981 		}
982 		curr_xfer_len = min(len - sent_len, SCIF_ENDPT_QP_SIZE - 1);
983 		/* Not enough RB space. return for the Non Blocking case */
984 		if (!(flags & SCIF_SEND_BLOCK))
985 			break;
986 
987 		spin_unlock(&ep->lock);
988 		/* Wait for a SCIF_CLIENT_RCVD message in the Blocking case */
989 		ret =
990 		wait_event_interruptible(ep->sendwq,
991 					 (SCIFEP_CONNECTED != ep->state) ||
992 					 (scif_rb_space(&qp->outbound_q) >=
993 					 curr_xfer_len));
994 		spin_lock(&ep->lock);
995 		if (ret)
996 			break;
997 	}
998 	if (sent_len)
999 		ret = sent_len;
1000 	else if (!ret && SCIFEP_CONNECTED != ep->state)
1001 		ret = SCIFEP_DISCONNECTED == ep->state ?
1002 			-ECONNRESET : -ENOTCONN;
1003 	spin_unlock(&ep->lock);
1004 	return ret;
1005 }
1006 
_scif_recv(scif_epd_t epd,void * msg,int len,int flags)1007 static int _scif_recv(scif_epd_t epd, void *msg, int len, int flags)
1008 {
1009 	int read_size;
1010 	struct scif_endpt *ep = (struct scif_endpt *)epd;
1011 	struct scifmsg notif_msg;
1012 	int curr_recv_len = 0, remaining_len = len, read_count;
1013 	int ret = 0;
1014 	struct scif_qp *qp = ep->qp_info.qp;
1015 
1016 	if (flags & SCIF_RECV_BLOCK)
1017 		might_sleep();
1018 	spin_lock(&ep->lock);
1019 	while (remaining_len && (SCIFEP_CONNECTED == ep->state ||
1020 				 SCIFEP_DISCONNECTED == ep->state)) {
1021 		read_count = scif_rb_count(&qp->inbound_q, remaining_len);
1022 		if (read_count) {
1023 			/*
1024 			 * Best effort to recv as much data as there
1025 			 * are bytes to read in the RB particularly
1026 			 * important for the Non Blocking case.
1027 			 */
1028 			curr_recv_len = min(remaining_len, read_count);
1029 			read_size = scif_rb_get_next(&qp->inbound_q,
1030 						     msg, curr_recv_len);
1031 			if (ep->state == SCIFEP_CONNECTED) {
1032 				/*
1033 				 * Update the read pointer only if the endpoint
1034 				 * is still connected else the read pointer
1035 				 * might no longer exist since the peer has
1036 				 * freed resources!
1037 				 */
1038 				scif_rb_update_read_ptr(&qp->inbound_q);
1039 				/*
1040 				 * Send a notification to the peer about the
1041 				 * consumed data message only if the EP is in
1042 				 * SCIFEP_CONNECTED state.
1043 				 */
1044 				notif_msg.src = ep->port;
1045 				notif_msg.uop = SCIF_CLIENT_RCVD;
1046 				notif_msg.payload[0] = ep->remote_ep;
1047 				ret = _scif_nodeqp_send(ep->remote_dev,
1048 							&notif_msg);
1049 				if (ret)
1050 					break;
1051 			}
1052 			remaining_len -= curr_recv_len;
1053 			msg = msg + curr_recv_len;
1054 			continue;
1055 		}
1056 		/*
1057 		 * Bail out now if the EP is in SCIFEP_DISCONNECTED state else
1058 		 * we will keep looping forever.
1059 		 */
1060 		if (ep->state == SCIFEP_DISCONNECTED)
1061 			break;
1062 		/*
1063 		 * Return in the Non Blocking case if there is no data
1064 		 * to read in this iteration.
1065 		 */
1066 		if (!(flags & SCIF_RECV_BLOCK))
1067 			break;
1068 		curr_recv_len = min(remaining_len, SCIF_ENDPT_QP_SIZE - 1);
1069 		spin_unlock(&ep->lock);
1070 		/*
1071 		 * Wait for a SCIF_CLIENT_SEND message in the blocking case
1072 		 * or until other side disconnects.
1073 		 */
1074 		ret =
1075 		wait_event_interruptible(ep->recvwq,
1076 					 SCIFEP_CONNECTED != ep->state ||
1077 					 scif_rb_count(&qp->inbound_q,
1078 						       curr_recv_len)
1079 					 >= curr_recv_len);
1080 		spin_lock(&ep->lock);
1081 		if (ret)
1082 			break;
1083 	}
1084 	if (len - remaining_len)
1085 		ret = len - remaining_len;
1086 	else if (!ret && ep->state != SCIFEP_CONNECTED)
1087 		ret = ep->state == SCIFEP_DISCONNECTED ?
1088 			-ECONNRESET : -ENOTCONN;
1089 	spin_unlock(&ep->lock);
1090 	return ret;
1091 }
1092 
1093 /**
1094  * scif_user_send() - Send data to connection queue
1095  * @epd: The end point returned from scif_open()
1096  * @msg: Address to place data
1097  * @len: Length to receive
1098  * @flags: blocking or non blocking
1099  *
1100  * This function is called from the driver IOCTL entry point
1101  * only and is a wrapper for _scif_send().
1102  */
scif_user_send(scif_epd_t epd,void __user * msg,int len,int flags)1103 int scif_user_send(scif_epd_t epd, void __user *msg, int len, int flags)
1104 {
1105 	struct scif_endpt *ep = (struct scif_endpt *)epd;
1106 	int err = 0;
1107 	int sent_len = 0;
1108 	char *tmp;
1109 	int loop_len;
1110 	int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1)));
1111 
1112 	dev_dbg(scif_info.mdev.this_device,
1113 		"SCIFAPI send (U): ep %p %s\n", ep, scif_ep_states[ep->state]);
1114 	if (!len)
1115 		return 0;
1116 
1117 	err = scif_msg_param_check(epd, len, flags);
1118 	if (err)
1119 		goto send_err;
1120 
1121 	tmp = kmalloc(chunk_len, GFP_KERNEL);
1122 	if (!tmp) {
1123 		err = -ENOMEM;
1124 		goto send_err;
1125 	}
1126 	/*
1127 	 * Grabbing the lock before breaking up the transfer in
1128 	 * multiple chunks is required to ensure that messages do
1129 	 * not get fragmented and reordered.
1130 	 */
1131 	mutex_lock(&ep->sendlock);
1132 	while (sent_len != len) {
1133 		loop_len = len - sent_len;
1134 		loop_len = min(chunk_len, loop_len);
1135 		if (copy_from_user(tmp, msg, loop_len)) {
1136 			err = -EFAULT;
1137 			goto send_free_err;
1138 		}
1139 		err = _scif_send(epd, tmp, loop_len, flags);
1140 		if (err < 0)
1141 			goto send_free_err;
1142 		sent_len += err;
1143 		msg += err;
1144 		if (err != loop_len)
1145 			goto send_free_err;
1146 	}
1147 send_free_err:
1148 	mutex_unlock(&ep->sendlock);
1149 	kfree(tmp);
1150 send_err:
1151 	return err < 0 ? err : sent_len;
1152 }
1153 
1154 /**
1155  * scif_user_recv() - Receive data from connection queue
1156  * @epd: The end point returned from scif_open()
1157  * @msg: Address to place data
1158  * @len: Length to receive
1159  * @flags: blocking or non blocking
1160  *
1161  * This function is called from the driver IOCTL entry point
1162  * only and is a wrapper for _scif_recv().
1163  */
scif_user_recv(scif_epd_t epd,void __user * msg,int len,int flags)1164 int scif_user_recv(scif_epd_t epd, void __user *msg, int len, int flags)
1165 {
1166 	struct scif_endpt *ep = (struct scif_endpt *)epd;
1167 	int err = 0;
1168 	int recv_len = 0;
1169 	char *tmp;
1170 	int loop_len;
1171 	int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1)));
1172 
1173 	dev_dbg(scif_info.mdev.this_device,
1174 		"SCIFAPI recv (U): ep %p %s\n", ep, scif_ep_states[ep->state]);
1175 	if (!len)
1176 		return 0;
1177 
1178 	err = scif_msg_param_check(epd, len, flags);
1179 	if (err)
1180 		goto recv_err;
1181 
1182 	tmp = kmalloc(chunk_len, GFP_KERNEL);
1183 	if (!tmp) {
1184 		err = -ENOMEM;
1185 		goto recv_err;
1186 	}
1187 	/*
1188 	 * Grabbing the lock before breaking up the transfer in
1189 	 * multiple chunks is required to ensure that messages do
1190 	 * not get fragmented and reordered.
1191 	 */
1192 	mutex_lock(&ep->recvlock);
1193 	while (recv_len != len) {
1194 		loop_len = len - recv_len;
1195 		loop_len = min(chunk_len, loop_len);
1196 		err = _scif_recv(epd, tmp, loop_len, flags);
1197 		if (err < 0)
1198 			goto recv_free_err;
1199 		if (copy_to_user(msg, tmp, err)) {
1200 			err = -EFAULT;
1201 			goto recv_free_err;
1202 		}
1203 		recv_len += err;
1204 		msg += err;
1205 		if (err != loop_len)
1206 			goto recv_free_err;
1207 	}
1208 recv_free_err:
1209 	mutex_unlock(&ep->recvlock);
1210 	kfree(tmp);
1211 recv_err:
1212 	return err < 0 ? err : recv_len;
1213 }
1214 
1215 /**
1216  * scif_send() - Send data to connection queue
1217  * @epd: The end point returned from scif_open()
1218  * @msg: Address to place data
1219  * @len: Length to receive
1220  * @flags: blocking or non blocking
1221  *
1222  * This function is called from the kernel mode only and is
1223  * a wrapper for _scif_send().
1224  */
scif_send(scif_epd_t epd,void * msg,int len,int flags)1225 int scif_send(scif_epd_t epd, void *msg, int len, int flags)
1226 {
1227 	struct scif_endpt *ep = (struct scif_endpt *)epd;
1228 	int ret;
1229 
1230 	dev_dbg(scif_info.mdev.this_device,
1231 		"SCIFAPI send (K): ep %p %s\n", ep, scif_ep_states[ep->state]);
1232 	if (!len)
1233 		return 0;
1234 
1235 	ret = scif_msg_param_check(epd, len, flags);
1236 	if (ret)
1237 		return ret;
1238 	if (!ep->remote_dev)
1239 		return -ENOTCONN;
1240 	/*
1241 	 * Grab the mutex lock in the blocking case only
1242 	 * to ensure messages do not get fragmented/reordered.
1243 	 * The non blocking mode is protected using spin locks
1244 	 * in _scif_send().
1245 	 */
1246 	if (flags & SCIF_SEND_BLOCK)
1247 		mutex_lock(&ep->sendlock);
1248 
1249 	ret = _scif_send(epd, msg, len, flags);
1250 
1251 	if (flags & SCIF_SEND_BLOCK)
1252 		mutex_unlock(&ep->sendlock);
1253 	return ret;
1254 }
1255 EXPORT_SYMBOL_GPL(scif_send);
1256 
1257 /**
1258  * scif_recv() - Receive data from connection queue
1259  * @epd: The end point returned from scif_open()
1260  * @msg: Address to place data
1261  * @len: Length to receive
1262  * @flags: blocking or non blocking
1263  *
1264  * This function is called from the kernel mode only and is
1265  * a wrapper for _scif_recv().
1266  */
scif_recv(scif_epd_t epd,void * msg,int len,int flags)1267 int scif_recv(scif_epd_t epd, void *msg, int len, int flags)
1268 {
1269 	struct scif_endpt *ep = (struct scif_endpt *)epd;
1270 	int ret;
1271 
1272 	dev_dbg(scif_info.mdev.this_device,
1273 		"SCIFAPI recv (K): ep %p %s\n", ep, scif_ep_states[ep->state]);
1274 	if (!len)
1275 		return 0;
1276 
1277 	ret = scif_msg_param_check(epd, len, flags);
1278 	if (ret)
1279 		return ret;
1280 	/*
1281 	 * Grab the mutex lock in the blocking case only
1282 	 * to ensure messages do not get fragmented/reordered.
1283 	 * The non blocking mode is protected using spin locks
1284 	 * in _scif_send().
1285 	 */
1286 	if (flags & SCIF_RECV_BLOCK)
1287 		mutex_lock(&ep->recvlock);
1288 
1289 	ret = _scif_recv(epd, msg, len, flags);
1290 
1291 	if (flags & SCIF_RECV_BLOCK)
1292 		mutex_unlock(&ep->recvlock);
1293 
1294 	return ret;
1295 }
1296 EXPORT_SYMBOL_GPL(scif_recv);
1297 
_scif_poll_wait(struct file * f,wait_queue_head_t * wq,poll_table * p,struct scif_endpt * ep)1298 static inline void _scif_poll_wait(struct file *f, wait_queue_head_t *wq,
1299 				   poll_table *p, struct scif_endpt *ep)
1300 {
1301 	/*
1302 	 * Because poll_wait makes a GFP_KERNEL allocation, give up the lock
1303 	 * and regrab it afterwards. Because the endpoint state might have
1304 	 * changed while the lock was given up, the state must be checked
1305 	 * again after re-acquiring the lock. The code in __scif_pollfd(..)
1306 	 * does this.
1307 	 */
1308 	spin_unlock(&ep->lock);
1309 	poll_wait(f, wq, p);
1310 	spin_lock(&ep->lock);
1311 }
1312 
1313 __poll_t
__scif_pollfd(struct file * f,poll_table * wait,struct scif_endpt * ep)1314 __scif_pollfd(struct file *f, poll_table *wait, struct scif_endpt *ep)
1315 {
1316 	__poll_t mask = 0;
1317 
1318 	dev_dbg(scif_info.mdev.this_device,
1319 		"SCIFAPI pollfd: ep %p %s\n", ep, scif_ep_states[ep->state]);
1320 
1321 	spin_lock(&ep->lock);
1322 
1323 	/* Endpoint is waiting for a non-blocking connect to complete */
1324 	if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
1325 		_scif_poll_wait(f, &ep->conn_pend_wq, wait, ep);
1326 		if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
1327 			if (ep->state == SCIFEP_CONNECTED ||
1328 			    ep->state == SCIFEP_DISCONNECTED ||
1329 			    ep->conn_err)
1330 				mask |= EPOLLOUT;
1331 			goto exit;
1332 		}
1333 	}
1334 
1335 	/* Endpoint is listening for incoming connection requests */
1336 	if (ep->state == SCIFEP_LISTENING) {
1337 		_scif_poll_wait(f, &ep->conwq, wait, ep);
1338 		if (ep->state == SCIFEP_LISTENING) {
1339 			if (ep->conreqcnt)
1340 				mask |= EPOLLIN;
1341 			goto exit;
1342 		}
1343 	}
1344 
1345 	/* Endpoint is connected or disconnected */
1346 	if (ep->state == SCIFEP_CONNECTED || ep->state == SCIFEP_DISCONNECTED) {
1347 		if (poll_requested_events(wait) & EPOLLIN)
1348 			_scif_poll_wait(f, &ep->recvwq, wait, ep);
1349 		if (poll_requested_events(wait) & EPOLLOUT)
1350 			_scif_poll_wait(f, &ep->sendwq, wait, ep);
1351 		if (ep->state == SCIFEP_CONNECTED ||
1352 		    ep->state == SCIFEP_DISCONNECTED) {
1353 			/* Data can be read without blocking */
1354 			if (scif_rb_count(&ep->qp_info.qp->inbound_q, 1))
1355 				mask |= EPOLLIN;
1356 			/* Data can be written without blocking */
1357 			if (scif_rb_space(&ep->qp_info.qp->outbound_q))
1358 				mask |= EPOLLOUT;
1359 			/* Return EPOLLHUP if endpoint is disconnected */
1360 			if (ep->state == SCIFEP_DISCONNECTED)
1361 				mask |= EPOLLHUP;
1362 			goto exit;
1363 		}
1364 	}
1365 
1366 	/* Return EPOLLERR if the endpoint is in none of the above states */
1367 	mask |= EPOLLERR;
1368 exit:
1369 	spin_unlock(&ep->lock);
1370 	return mask;
1371 }
1372 
1373 /**
1374  * scif_poll() - Kernel mode SCIF poll
1375  * @ufds: Array of scif_pollepd structures containing the end points
1376  *	  and events to poll on
1377  * @nfds: Size of the ufds array
1378  * @timeout_msecs: Timeout in msecs, -ve implies infinite timeout
1379  *
1380  * The code flow in this function is based on do_poll(..) in select.c
1381  *
1382  * Returns the number of endpoints which have pending events or 0 in
1383  * the event of a timeout. If a signal is used for wake up, -EINTR is
1384  * returned.
1385  */
1386 int
scif_poll(struct scif_pollepd * ufds,unsigned int nfds,long timeout_msecs)1387 scif_poll(struct scif_pollepd *ufds, unsigned int nfds, long timeout_msecs)
1388 {
1389 	struct poll_wqueues table;
1390 	poll_table *pt;
1391 	int i, count = 0, timed_out = timeout_msecs == 0;
1392 	__poll_t mask;
1393 	u64 timeout = timeout_msecs < 0 ? MAX_SCHEDULE_TIMEOUT
1394 		: msecs_to_jiffies(timeout_msecs);
1395 
1396 	poll_initwait(&table);
1397 	pt = &table.pt;
1398 	while (1) {
1399 		for (i = 0; i < nfds; i++) {
1400 			pt->_key = ufds[i].events | EPOLLERR | EPOLLHUP;
1401 			mask = __scif_pollfd(ufds[i].epd->anon,
1402 					     pt, ufds[i].epd);
1403 			mask &= ufds[i].events | EPOLLERR | EPOLLHUP;
1404 			if (mask) {
1405 				count++;
1406 				pt->_qproc = NULL;
1407 			}
1408 			ufds[i].revents = mask;
1409 		}
1410 		pt->_qproc = NULL;
1411 		if (!count) {
1412 			count = table.error;
1413 			if (signal_pending(current))
1414 				count = -EINTR;
1415 		}
1416 		if (count || timed_out)
1417 			break;
1418 
1419 		if (!schedule_timeout_interruptible(timeout))
1420 			timed_out = 1;
1421 	}
1422 	poll_freewait(&table);
1423 	return count;
1424 }
1425 EXPORT_SYMBOL_GPL(scif_poll);
1426 
scif_get_node_ids(u16 * nodes,int len,u16 * self)1427 int scif_get_node_ids(u16 *nodes, int len, u16 *self)
1428 {
1429 	int online = 0;
1430 	int offset = 0;
1431 	int node;
1432 
1433 	if (!scif_is_mgmt_node())
1434 		scif_get_node_info();
1435 
1436 	*self = scif_info.nodeid;
1437 	mutex_lock(&scif_info.conflock);
1438 	len = min_t(int, len, scif_info.total);
1439 	for (node = 0; node <= scif_info.maxid; node++) {
1440 		if (_scifdev_alive(&scif_dev[node])) {
1441 			online++;
1442 			if (offset < len)
1443 				nodes[offset++] = node;
1444 		}
1445 	}
1446 	dev_dbg(scif_info.mdev.this_device,
1447 		"SCIFAPI get_node_ids total %d online %d filled in %d nodes\n",
1448 		scif_info.total, online, offset);
1449 	mutex_unlock(&scif_info.conflock);
1450 
1451 	return online;
1452 }
1453 EXPORT_SYMBOL_GPL(scif_get_node_ids);
1454 
scif_add_client_dev(struct device * dev,struct subsys_interface * si)1455 static int scif_add_client_dev(struct device *dev, struct subsys_interface *si)
1456 {
1457 	struct scif_client *client =
1458 		container_of(si, struct scif_client, si);
1459 	struct scif_peer_dev *spdev =
1460 		container_of(dev, struct scif_peer_dev, dev);
1461 
1462 	if (client->probe)
1463 		client->probe(spdev);
1464 	return 0;
1465 }
1466 
scif_remove_client_dev(struct device * dev,struct subsys_interface * si)1467 static void scif_remove_client_dev(struct device *dev,
1468 				   struct subsys_interface *si)
1469 {
1470 	struct scif_client *client =
1471 		container_of(si, struct scif_client, si);
1472 	struct scif_peer_dev *spdev =
1473 		container_of(dev, struct scif_peer_dev, dev);
1474 
1475 	if (client->remove)
1476 		client->remove(spdev);
1477 }
1478 
scif_client_unregister(struct scif_client * client)1479 void scif_client_unregister(struct scif_client *client)
1480 {
1481 	subsys_interface_unregister(&client->si);
1482 }
1483 EXPORT_SYMBOL_GPL(scif_client_unregister);
1484 
scif_client_register(struct scif_client * client)1485 int scif_client_register(struct scif_client *client)
1486 {
1487 	struct subsys_interface *si = &client->si;
1488 
1489 	si->name = client->name;
1490 	si->subsys = &scif_peer_bus;
1491 	si->add_dev = scif_add_client_dev;
1492 	si->remove_dev = scif_remove_client_dev;
1493 
1494 	return subsys_interface_register(&client->si);
1495 }
1496 EXPORT_SYMBOL_GPL(scif_client_register);
1497