1 /*
2 * Intel MIC Platform Software Stack (MPSS)
3 *
4 * Copyright(c) 2014 Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License, version 2, as
8 * published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 *
15 * Intel SCIF driver.
16 *
17 */
18 #include <linux/scif.h>
19 #include "scif_main.h"
20 #include "scif_map.h"
21
22 static const char * const scif_ep_states[] = {
23 "Unbound",
24 "Bound",
25 "Listening",
26 "Connected",
27 "Connecting",
28 "Mapping",
29 "Closing",
30 "Close Listening",
31 "Disconnected",
32 "Zombie"};
33
34 enum conn_async_state {
35 ASYNC_CONN_IDLE = 1, /* ep setup for async connect */
36 ASYNC_CONN_INPROGRESS, /* async connect in progress */
37 ASYNC_CONN_FLUSH_WORK /* async work flush in progress */
38 };
39
40 /*
41 * File operations for anonymous inode file associated with a SCIF endpoint,
42 * used in kernel mode SCIF poll. Kernel mode SCIF poll calls portions of the
43 * poll API in the kernel and these take in a struct file *. Since a struct
44 * file is not available to kernel mode SCIF, it uses an anonymous file for
45 * this purpose.
46 */
47 const struct file_operations scif_anon_fops = {
48 .owner = THIS_MODULE,
49 };
50
scif_open(void)51 scif_epd_t scif_open(void)
52 {
53 struct scif_endpt *ep;
54 int err;
55
56 might_sleep();
57 ep = kzalloc(sizeof(*ep), GFP_KERNEL);
58 if (!ep)
59 goto err_ep_alloc;
60
61 ep->qp_info.qp = kzalloc(sizeof(*ep->qp_info.qp), GFP_KERNEL);
62 if (!ep->qp_info.qp)
63 goto err_qp_alloc;
64
65 err = scif_anon_inode_getfile(ep);
66 if (err)
67 goto err_anon_inode;
68
69 spin_lock_init(&ep->lock);
70 mutex_init(&ep->sendlock);
71 mutex_init(&ep->recvlock);
72
73 scif_rma_ep_init(ep);
74 ep->state = SCIFEP_UNBOUND;
75 dev_dbg(scif_info.mdev.this_device,
76 "SCIFAPI open: ep %p success\n", ep);
77 return ep;
78
79 err_anon_inode:
80 kfree(ep->qp_info.qp);
81 err_qp_alloc:
82 kfree(ep);
83 err_ep_alloc:
84 return NULL;
85 }
86 EXPORT_SYMBOL_GPL(scif_open);
87
88 /*
89 * scif_disconnect_ep - Disconnects the endpoint if found
90 * @epd: The end point returned from scif_open()
91 */
scif_disconnect_ep(struct scif_endpt * ep)92 static struct scif_endpt *scif_disconnect_ep(struct scif_endpt *ep)
93 {
94 struct scifmsg msg;
95 struct scif_endpt *fep = NULL;
96 struct scif_endpt *tmpep;
97 struct list_head *pos, *tmpq;
98 int err;
99
100 /*
101 * Wake up any threads blocked in send()/recv() before closing
102 * out the connection. Grabbing and releasing the send/recv lock
103 * will ensure that any blocked senders/receivers have exited for
104 * Ring 0 endpoints. It is a Ring 0 bug to call send/recv after
105 * close. Ring 3 endpoints are not affected since close will not
106 * be called while there are IOCTLs executing.
107 */
108 wake_up_interruptible(&ep->sendwq);
109 wake_up_interruptible(&ep->recvwq);
110 mutex_lock(&ep->sendlock);
111 mutex_unlock(&ep->sendlock);
112 mutex_lock(&ep->recvlock);
113 mutex_unlock(&ep->recvlock);
114
115 /* Remove from the connected list */
116 mutex_lock(&scif_info.connlock);
117 list_for_each_safe(pos, tmpq, &scif_info.connected) {
118 tmpep = list_entry(pos, struct scif_endpt, list);
119 if (tmpep == ep) {
120 list_del(pos);
121 fep = tmpep;
122 spin_lock(&ep->lock);
123 break;
124 }
125 }
126
127 if (!fep) {
128 /*
129 * The other side has completed the disconnect before
130 * the end point can be removed from the list. Therefore
131 * the ep lock is not locked, traverse the disconnected
132 * list to find the endpoint and release the conn lock.
133 */
134 list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
135 tmpep = list_entry(pos, struct scif_endpt, list);
136 if (tmpep == ep) {
137 list_del(pos);
138 break;
139 }
140 }
141 mutex_unlock(&scif_info.connlock);
142 return NULL;
143 }
144
145 init_completion(&ep->discon);
146 msg.uop = SCIF_DISCNCT;
147 msg.src = ep->port;
148 msg.dst = ep->peer;
149 msg.payload[0] = (u64)ep;
150 msg.payload[1] = ep->remote_ep;
151
152 err = scif_nodeqp_send(ep->remote_dev, &msg);
153 spin_unlock(&ep->lock);
154 mutex_unlock(&scif_info.connlock);
155
156 if (!err)
157 /* Wait for the remote node to respond with SCIF_DISCNT_ACK */
158 wait_for_completion_timeout(&ep->discon,
159 SCIF_NODE_ALIVE_TIMEOUT);
160 return ep;
161 }
162
scif_close(scif_epd_t epd)163 int scif_close(scif_epd_t epd)
164 {
165 struct scif_endpt *ep = (struct scif_endpt *)epd;
166 struct scif_endpt *tmpep;
167 struct list_head *pos, *tmpq;
168 enum scif_epd_state oldstate;
169 bool flush_conn;
170
171 dev_dbg(scif_info.mdev.this_device, "SCIFAPI close: ep %p %s\n",
172 ep, scif_ep_states[ep->state]);
173 might_sleep();
174 spin_lock(&ep->lock);
175 flush_conn = (ep->conn_async_state == ASYNC_CONN_INPROGRESS);
176 spin_unlock(&ep->lock);
177
178 if (flush_conn)
179 flush_work(&scif_info.conn_work);
180
181 spin_lock(&ep->lock);
182 oldstate = ep->state;
183
184 ep->state = SCIFEP_CLOSING;
185
186 switch (oldstate) {
187 case SCIFEP_ZOMBIE:
188 dev_err(scif_info.mdev.this_device,
189 "SCIFAPI close: zombie state unexpected\n");
190 /* fall through */
191 case SCIFEP_DISCONNECTED:
192 spin_unlock(&ep->lock);
193 scif_unregister_all_windows(epd);
194 /* Remove from the disconnected list */
195 mutex_lock(&scif_info.connlock);
196 list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
197 tmpep = list_entry(pos, struct scif_endpt, list);
198 if (tmpep == ep) {
199 list_del(pos);
200 break;
201 }
202 }
203 mutex_unlock(&scif_info.connlock);
204 break;
205 case SCIFEP_UNBOUND:
206 case SCIFEP_BOUND:
207 case SCIFEP_CONNECTING:
208 spin_unlock(&ep->lock);
209 break;
210 case SCIFEP_MAPPING:
211 case SCIFEP_CONNECTED:
212 case SCIFEP_CLOSING:
213 {
214 spin_unlock(&ep->lock);
215 scif_unregister_all_windows(epd);
216 scif_disconnect_ep(ep);
217 break;
218 }
219 case SCIFEP_LISTENING:
220 case SCIFEP_CLLISTEN:
221 {
222 struct scif_conreq *conreq;
223 struct scifmsg msg;
224 struct scif_endpt *aep;
225
226 spin_unlock(&ep->lock);
227 mutex_lock(&scif_info.eplock);
228
229 /* remove from listen list */
230 list_for_each_safe(pos, tmpq, &scif_info.listen) {
231 tmpep = list_entry(pos, struct scif_endpt, list);
232 if (tmpep == ep)
233 list_del(pos);
234 }
235 /* Remove any dangling accepts */
236 while (ep->acceptcnt) {
237 aep = list_first_entry(&ep->li_accept,
238 struct scif_endpt, liacceptlist);
239 list_del(&aep->liacceptlist);
240 scif_put_port(aep->port.port);
241 list_for_each_safe(pos, tmpq, &scif_info.uaccept) {
242 tmpep = list_entry(pos, struct scif_endpt,
243 miacceptlist);
244 if (tmpep == aep) {
245 list_del(pos);
246 break;
247 }
248 }
249 mutex_unlock(&scif_info.eplock);
250 mutex_lock(&scif_info.connlock);
251 list_for_each_safe(pos, tmpq, &scif_info.connected) {
252 tmpep = list_entry(pos,
253 struct scif_endpt, list);
254 if (tmpep == aep) {
255 list_del(pos);
256 break;
257 }
258 }
259 list_for_each_safe(pos, tmpq, &scif_info.disconnected) {
260 tmpep = list_entry(pos,
261 struct scif_endpt, list);
262 if (tmpep == aep) {
263 list_del(pos);
264 break;
265 }
266 }
267 mutex_unlock(&scif_info.connlock);
268 scif_teardown_ep(aep);
269 mutex_lock(&scif_info.eplock);
270 scif_add_epd_to_zombie_list(aep, SCIF_EPLOCK_HELD);
271 ep->acceptcnt--;
272 }
273
274 spin_lock(&ep->lock);
275 mutex_unlock(&scif_info.eplock);
276
277 /* Remove and reject any pending connection requests. */
278 while (ep->conreqcnt) {
279 conreq = list_first_entry(&ep->conlist,
280 struct scif_conreq, list);
281 list_del(&conreq->list);
282
283 msg.uop = SCIF_CNCT_REJ;
284 msg.dst.node = conreq->msg.src.node;
285 msg.dst.port = conreq->msg.src.port;
286 msg.payload[0] = conreq->msg.payload[0];
287 msg.payload[1] = conreq->msg.payload[1];
288 /*
289 * No Error Handling on purpose for scif_nodeqp_send().
290 * If the remote node is lost we still want free the
291 * connection requests on the self node.
292 */
293 scif_nodeqp_send(&scif_dev[conreq->msg.src.node],
294 &msg);
295 ep->conreqcnt--;
296 kfree(conreq);
297 }
298
299 spin_unlock(&ep->lock);
300 /* If a kSCIF accept is waiting wake it up */
301 wake_up_interruptible(&ep->conwq);
302 break;
303 }
304 }
305 scif_put_port(ep->port.port);
306 scif_anon_inode_fput(ep);
307 scif_teardown_ep(ep);
308 scif_add_epd_to_zombie_list(ep, !SCIF_EPLOCK_HELD);
309 return 0;
310 }
311 EXPORT_SYMBOL_GPL(scif_close);
312
313 /**
314 * scif_flush() - Wakes up any blocking accepts. The endpoint will no longer
315 * accept new connections.
316 * @epd: The end point returned from scif_open()
317 */
__scif_flush(scif_epd_t epd)318 int __scif_flush(scif_epd_t epd)
319 {
320 struct scif_endpt *ep = (struct scif_endpt *)epd;
321
322 switch (ep->state) {
323 case SCIFEP_LISTENING:
324 {
325 ep->state = SCIFEP_CLLISTEN;
326
327 /* If an accept is waiting wake it up */
328 wake_up_interruptible(&ep->conwq);
329 break;
330 }
331 default:
332 break;
333 }
334 return 0;
335 }
336
scif_bind(scif_epd_t epd,u16 pn)337 int scif_bind(scif_epd_t epd, u16 pn)
338 {
339 struct scif_endpt *ep = (struct scif_endpt *)epd;
340 int ret = 0;
341 int tmp;
342
343 dev_dbg(scif_info.mdev.this_device,
344 "SCIFAPI bind: ep %p %s requested port number %d\n",
345 ep, scif_ep_states[ep->state], pn);
346 if (pn) {
347 /*
348 * Similar to IETF RFC 1700, SCIF ports below
349 * SCIF_ADMIN_PORT_END can only be bound by system (or root)
350 * processes or by processes executed by privileged users.
351 */
352 if (pn < SCIF_ADMIN_PORT_END && !capable(CAP_SYS_ADMIN)) {
353 ret = -EACCES;
354 goto scif_bind_admin_exit;
355 }
356 }
357
358 spin_lock(&ep->lock);
359 if (ep->state == SCIFEP_BOUND) {
360 ret = -EINVAL;
361 goto scif_bind_exit;
362 } else if (ep->state != SCIFEP_UNBOUND) {
363 ret = -EISCONN;
364 goto scif_bind_exit;
365 }
366
367 if (pn) {
368 tmp = scif_rsrv_port(pn);
369 if (tmp != pn) {
370 ret = -EINVAL;
371 goto scif_bind_exit;
372 }
373 } else {
374 ret = scif_get_new_port();
375 if (ret < 0)
376 goto scif_bind_exit;
377 pn = ret;
378 }
379
380 ep->state = SCIFEP_BOUND;
381 ep->port.node = scif_info.nodeid;
382 ep->port.port = pn;
383 ep->conn_async_state = ASYNC_CONN_IDLE;
384 ret = pn;
385 dev_dbg(scif_info.mdev.this_device,
386 "SCIFAPI bind: bound to port number %d\n", pn);
387 scif_bind_exit:
388 spin_unlock(&ep->lock);
389 scif_bind_admin_exit:
390 return ret;
391 }
392 EXPORT_SYMBOL_GPL(scif_bind);
393
scif_listen(scif_epd_t epd,int backlog)394 int scif_listen(scif_epd_t epd, int backlog)
395 {
396 struct scif_endpt *ep = (struct scif_endpt *)epd;
397
398 dev_dbg(scif_info.mdev.this_device,
399 "SCIFAPI listen: ep %p %s\n", ep, scif_ep_states[ep->state]);
400 spin_lock(&ep->lock);
401 switch (ep->state) {
402 case SCIFEP_ZOMBIE:
403 case SCIFEP_CLOSING:
404 case SCIFEP_CLLISTEN:
405 case SCIFEP_UNBOUND:
406 case SCIFEP_DISCONNECTED:
407 spin_unlock(&ep->lock);
408 return -EINVAL;
409 case SCIFEP_LISTENING:
410 case SCIFEP_CONNECTED:
411 case SCIFEP_CONNECTING:
412 case SCIFEP_MAPPING:
413 spin_unlock(&ep->lock);
414 return -EISCONN;
415 case SCIFEP_BOUND:
416 break;
417 }
418
419 ep->state = SCIFEP_LISTENING;
420 ep->backlog = backlog;
421
422 ep->conreqcnt = 0;
423 ep->acceptcnt = 0;
424 INIT_LIST_HEAD(&ep->conlist);
425 init_waitqueue_head(&ep->conwq);
426 INIT_LIST_HEAD(&ep->li_accept);
427 spin_unlock(&ep->lock);
428
429 /*
430 * Listen status is complete so delete the qp information not needed
431 * on a listen before placing on the list of listening ep's
432 */
433 scif_teardown_ep(ep);
434 ep->qp_info.qp = NULL;
435
436 mutex_lock(&scif_info.eplock);
437 list_add_tail(&ep->list, &scif_info.listen);
438 mutex_unlock(&scif_info.eplock);
439 return 0;
440 }
441 EXPORT_SYMBOL_GPL(scif_listen);
442
443 /*
444 ************************************************************************
445 * SCIF connection flow:
446 *
447 * 1) A SCIF listening endpoint can call scif_accept(..) to wait for SCIF
448 * connections via a SCIF_CNCT_REQ message
449 * 2) A SCIF endpoint can initiate a SCIF connection by calling
450 * scif_connect(..) which calls scif_setup_qp_connect(..) which
451 * allocates the local qp for the endpoint ring buffer and then sends
452 * a SCIF_CNCT_REQ to the remote node and waits for a SCIF_CNCT_GNT or
453 * a SCIF_CNCT_REJ message
454 * 3) The peer node handles a SCIF_CNCT_REQ via scif_cnctreq_resp(..) which
455 * wakes up any threads blocked in step 1 or sends a SCIF_CNCT_REJ
456 * message otherwise
457 * 4) A thread blocked waiting for incoming connections allocates its local
458 * endpoint QP and ring buffer following which it sends a SCIF_CNCT_GNT
459 * and waits for a SCIF_CNCT_GNT(N)ACK. If the allocation fails then
460 * the node sends a SCIF_CNCT_REJ message
461 * 5) Upon receipt of a SCIF_CNCT_GNT or a SCIF_CNCT_REJ message the
462 * connecting endpoint is woken up as part of handling
463 * scif_cnctgnt_resp(..) following which it maps the remote endpoints'
464 * QP, updates its outbound QP and sends a SCIF_CNCT_GNTACK message on
465 * success or a SCIF_CNCT_GNTNACK message on failure and completes
466 * the scif_connect(..) API
467 * 6) Upon receipt of a SCIF_CNCT_GNT(N)ACK the accepting endpoint blocked
468 * in step 4 is woken up and completes the scif_accept(..) API
469 * 7) The SCIF connection is now established between the two SCIF endpoints.
470 */
scif_conn_func(struct scif_endpt * ep)471 static int scif_conn_func(struct scif_endpt *ep)
472 {
473 int err = 0;
474 struct scifmsg msg;
475 struct device *spdev;
476
477 err = scif_reserve_dma_chan(ep);
478 if (err) {
479 dev_err(&ep->remote_dev->sdev->dev,
480 "%s %d err %d\n", __func__, __LINE__, err);
481 ep->state = SCIFEP_BOUND;
482 goto connect_error_simple;
483 }
484 /* Initiate the first part of the endpoint QP setup */
485 err = scif_setup_qp_connect(ep->qp_info.qp, &ep->qp_info.qp_offset,
486 SCIF_ENDPT_QP_SIZE, ep->remote_dev);
487 if (err) {
488 dev_err(&ep->remote_dev->sdev->dev,
489 "%s err %d qp_offset 0x%llx\n",
490 __func__, err, ep->qp_info.qp_offset);
491 ep->state = SCIFEP_BOUND;
492 goto connect_error_simple;
493 }
494
495 spdev = scif_get_peer_dev(ep->remote_dev);
496 if (IS_ERR(spdev)) {
497 err = PTR_ERR(spdev);
498 goto cleanup_qp;
499 }
500 /* Format connect message and send it */
501 msg.src = ep->port;
502 msg.dst = ep->conn_port;
503 msg.uop = SCIF_CNCT_REQ;
504 msg.payload[0] = (u64)ep;
505 msg.payload[1] = ep->qp_info.qp_offset;
506 err = _scif_nodeqp_send(ep->remote_dev, &msg);
507 if (err)
508 goto connect_error_dec;
509 scif_put_peer_dev(spdev);
510 /*
511 * Wait for the remote node to respond with SCIF_CNCT_GNT or
512 * SCIF_CNCT_REJ message.
513 */
514 err = wait_event_timeout(ep->conwq, ep->state != SCIFEP_CONNECTING,
515 SCIF_NODE_ALIVE_TIMEOUT);
516 if (!err) {
517 dev_err(&ep->remote_dev->sdev->dev,
518 "%s %d timeout\n", __func__, __LINE__);
519 ep->state = SCIFEP_BOUND;
520 }
521 spdev = scif_get_peer_dev(ep->remote_dev);
522 if (IS_ERR(spdev)) {
523 err = PTR_ERR(spdev);
524 goto cleanup_qp;
525 }
526 if (ep->state == SCIFEP_MAPPING) {
527 err = scif_setup_qp_connect_response(ep->remote_dev,
528 ep->qp_info.qp,
529 ep->qp_info.gnt_pld);
530 /*
531 * If the resource to map the queue are not available then
532 * we need to tell the other side to terminate the accept
533 */
534 if (err) {
535 dev_err(&ep->remote_dev->sdev->dev,
536 "%s %d err %d\n", __func__, __LINE__, err);
537 msg.uop = SCIF_CNCT_GNTNACK;
538 msg.payload[0] = ep->remote_ep;
539 _scif_nodeqp_send(ep->remote_dev, &msg);
540 ep->state = SCIFEP_BOUND;
541 goto connect_error_dec;
542 }
543
544 msg.uop = SCIF_CNCT_GNTACK;
545 msg.payload[0] = ep->remote_ep;
546 err = _scif_nodeqp_send(ep->remote_dev, &msg);
547 if (err) {
548 ep->state = SCIFEP_BOUND;
549 goto connect_error_dec;
550 }
551 ep->state = SCIFEP_CONNECTED;
552 mutex_lock(&scif_info.connlock);
553 list_add_tail(&ep->list, &scif_info.connected);
554 mutex_unlock(&scif_info.connlock);
555 dev_dbg(&ep->remote_dev->sdev->dev,
556 "SCIFAPI connect: ep %p connected\n", ep);
557 } else if (ep->state == SCIFEP_BOUND) {
558 dev_dbg(&ep->remote_dev->sdev->dev,
559 "SCIFAPI connect: ep %p connection refused\n", ep);
560 err = -ECONNREFUSED;
561 goto connect_error_dec;
562 }
563 scif_put_peer_dev(spdev);
564 return err;
565 connect_error_dec:
566 scif_put_peer_dev(spdev);
567 cleanup_qp:
568 scif_cleanup_ep_qp(ep);
569 connect_error_simple:
570 return err;
571 }
572
573 /*
574 * scif_conn_handler:
575 *
576 * Workqueue handler for servicing non-blocking SCIF connect
577 *
578 */
scif_conn_handler(struct work_struct * work)579 void scif_conn_handler(struct work_struct *work)
580 {
581 struct scif_endpt *ep;
582
583 do {
584 ep = NULL;
585 spin_lock(&scif_info.nb_connect_lock);
586 if (!list_empty(&scif_info.nb_connect_list)) {
587 ep = list_first_entry(&scif_info.nb_connect_list,
588 struct scif_endpt, conn_list);
589 list_del(&ep->conn_list);
590 }
591 spin_unlock(&scif_info.nb_connect_lock);
592 if (ep) {
593 ep->conn_err = scif_conn_func(ep);
594 wake_up_interruptible(&ep->conn_pend_wq);
595 }
596 } while (ep);
597 }
598
__scif_connect(scif_epd_t epd,struct scif_port_id * dst,bool non_block)599 int __scif_connect(scif_epd_t epd, struct scif_port_id *dst, bool non_block)
600 {
601 struct scif_endpt *ep = (struct scif_endpt *)epd;
602 int err = 0;
603 struct scif_dev *remote_dev;
604 struct device *spdev;
605
606 dev_dbg(scif_info.mdev.this_device, "SCIFAPI connect: ep %p %s\n", ep,
607 scif_ep_states[ep->state]);
608
609 if (!scif_dev || dst->node > scif_info.maxid)
610 return -ENODEV;
611
612 might_sleep();
613
614 remote_dev = &scif_dev[dst->node];
615 spdev = scif_get_peer_dev(remote_dev);
616 if (IS_ERR(spdev)) {
617 err = PTR_ERR(spdev);
618 return err;
619 }
620
621 spin_lock(&ep->lock);
622 switch (ep->state) {
623 case SCIFEP_ZOMBIE:
624 case SCIFEP_CLOSING:
625 err = -EINVAL;
626 break;
627 case SCIFEP_DISCONNECTED:
628 if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
629 ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
630 else
631 err = -EINVAL;
632 break;
633 case SCIFEP_LISTENING:
634 case SCIFEP_CLLISTEN:
635 err = -EOPNOTSUPP;
636 break;
637 case SCIFEP_CONNECTING:
638 case SCIFEP_MAPPING:
639 if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
640 err = -EINPROGRESS;
641 else
642 err = -EISCONN;
643 break;
644 case SCIFEP_CONNECTED:
645 if (ep->conn_async_state == ASYNC_CONN_INPROGRESS)
646 ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
647 else
648 err = -EISCONN;
649 break;
650 case SCIFEP_UNBOUND:
651 err = scif_get_new_port();
652 if (err < 0)
653 break;
654 ep->port.port = err;
655 ep->port.node = scif_info.nodeid;
656 ep->conn_async_state = ASYNC_CONN_IDLE;
657 /* Fall through */
658 case SCIFEP_BOUND:
659 /*
660 * If a non-blocking connect has been already initiated
661 * (conn_async_state is either ASYNC_CONN_INPROGRESS or
662 * ASYNC_CONN_FLUSH_WORK), the end point could end up in
663 * SCIF_BOUND due an error in the connection process
664 * (e.g., connection refused) If conn_async_state is
665 * ASYNC_CONN_INPROGRESS - transition to ASYNC_CONN_FLUSH_WORK
666 * so that the error status can be collected. If the state is
667 * already ASYNC_CONN_FLUSH_WORK - then set the error to
668 * EINPROGRESS since some other thread is waiting to collect
669 * error status.
670 */
671 if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
672 ep->conn_async_state = ASYNC_CONN_FLUSH_WORK;
673 } else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) {
674 err = -EINPROGRESS;
675 } else {
676 ep->conn_port = *dst;
677 init_waitqueue_head(&ep->sendwq);
678 init_waitqueue_head(&ep->recvwq);
679 init_waitqueue_head(&ep->conwq);
680 ep->conn_async_state = 0;
681
682 if (unlikely(non_block))
683 ep->conn_async_state = ASYNC_CONN_INPROGRESS;
684 }
685 break;
686 }
687
688 if (err || ep->conn_async_state == ASYNC_CONN_FLUSH_WORK)
689 goto connect_simple_unlock1;
690
691 ep->state = SCIFEP_CONNECTING;
692 ep->remote_dev = &scif_dev[dst->node];
693 ep->qp_info.qp->magic = SCIFEP_MAGIC;
694 if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
695 init_waitqueue_head(&ep->conn_pend_wq);
696 spin_lock(&scif_info.nb_connect_lock);
697 list_add_tail(&ep->conn_list, &scif_info.nb_connect_list);
698 spin_unlock(&scif_info.nb_connect_lock);
699 err = -EINPROGRESS;
700 schedule_work(&scif_info.conn_work);
701 }
702 connect_simple_unlock1:
703 spin_unlock(&ep->lock);
704 scif_put_peer_dev(spdev);
705 if (err) {
706 return err;
707 } else if (ep->conn_async_state == ASYNC_CONN_FLUSH_WORK) {
708 flush_work(&scif_info.conn_work);
709 err = ep->conn_err;
710 spin_lock(&ep->lock);
711 ep->conn_async_state = ASYNC_CONN_IDLE;
712 spin_unlock(&ep->lock);
713 } else {
714 err = scif_conn_func(ep);
715 }
716 return err;
717 }
718
scif_connect(scif_epd_t epd,struct scif_port_id * dst)719 int scif_connect(scif_epd_t epd, struct scif_port_id *dst)
720 {
721 return __scif_connect(epd, dst, false);
722 }
723 EXPORT_SYMBOL_GPL(scif_connect);
724
725 /**
726 * scif_accept() - Accept a connection request from the remote node
727 *
728 * The function accepts a connection request from the remote node. Successful
729 * complete is indicate by a new end point being created and passed back
730 * to the caller for future reference.
731 *
732 * Upon successful complete a zero will be returned and the peer information
733 * will be filled in.
734 *
735 * If the end point is not in the listening state -EINVAL will be returned.
736 *
737 * If during the connection sequence resource allocation fails the -ENOMEM
738 * will be returned.
739 *
740 * If the function is called with the ASYNC flag set and no connection requests
741 * are pending it will return -EAGAIN.
742 *
743 * If the remote side is not sending any connection requests the caller may
744 * terminate this function with a signal. If so a -EINTR will be returned.
745 */
scif_accept(scif_epd_t epd,struct scif_port_id * peer,scif_epd_t * newepd,int flags)746 int scif_accept(scif_epd_t epd, struct scif_port_id *peer,
747 scif_epd_t *newepd, int flags)
748 {
749 struct scif_endpt *lep = (struct scif_endpt *)epd;
750 struct scif_endpt *cep;
751 struct scif_conreq *conreq;
752 struct scifmsg msg;
753 int err;
754 struct device *spdev;
755
756 dev_dbg(scif_info.mdev.this_device,
757 "SCIFAPI accept: ep %p %s\n", lep, scif_ep_states[lep->state]);
758
759 if (flags & ~SCIF_ACCEPT_SYNC)
760 return -EINVAL;
761
762 if (!peer || !newepd)
763 return -EINVAL;
764
765 might_sleep();
766 spin_lock(&lep->lock);
767 if (lep->state != SCIFEP_LISTENING) {
768 spin_unlock(&lep->lock);
769 return -EINVAL;
770 }
771
772 if (!lep->conreqcnt && !(flags & SCIF_ACCEPT_SYNC)) {
773 /* No connection request present and we do not want to wait */
774 spin_unlock(&lep->lock);
775 return -EAGAIN;
776 }
777
778 lep->files = current->files;
779 retry_connection:
780 spin_unlock(&lep->lock);
781 /* Wait for the remote node to send us a SCIF_CNCT_REQ */
782 err = wait_event_interruptible(lep->conwq,
783 (lep->conreqcnt ||
784 (lep->state != SCIFEP_LISTENING)));
785 if (err)
786 return err;
787
788 if (lep->state != SCIFEP_LISTENING)
789 return -EINTR;
790
791 spin_lock(&lep->lock);
792
793 if (!lep->conreqcnt)
794 goto retry_connection;
795
796 /* Get the first connect request off the list */
797 conreq = list_first_entry(&lep->conlist, struct scif_conreq, list);
798 list_del(&conreq->list);
799 lep->conreqcnt--;
800 spin_unlock(&lep->lock);
801
802 /* Fill in the peer information */
803 peer->node = conreq->msg.src.node;
804 peer->port = conreq->msg.src.port;
805
806 cep = kzalloc(sizeof(*cep), GFP_KERNEL);
807 if (!cep) {
808 err = -ENOMEM;
809 goto scif_accept_error_epalloc;
810 }
811 spin_lock_init(&cep->lock);
812 mutex_init(&cep->sendlock);
813 mutex_init(&cep->recvlock);
814 cep->state = SCIFEP_CONNECTING;
815 cep->remote_dev = &scif_dev[peer->node];
816 cep->remote_ep = conreq->msg.payload[0];
817
818 scif_rma_ep_init(cep);
819
820 err = scif_reserve_dma_chan(cep);
821 if (err) {
822 dev_err(scif_info.mdev.this_device,
823 "%s %d err %d\n", __func__, __LINE__, err);
824 goto scif_accept_error_qpalloc;
825 }
826
827 cep->qp_info.qp = kzalloc(sizeof(*cep->qp_info.qp), GFP_KERNEL);
828 if (!cep->qp_info.qp) {
829 err = -ENOMEM;
830 goto scif_accept_error_qpalloc;
831 }
832
833 err = scif_anon_inode_getfile(cep);
834 if (err)
835 goto scif_accept_error_anon_inode;
836
837 cep->qp_info.qp->magic = SCIFEP_MAGIC;
838 spdev = scif_get_peer_dev(cep->remote_dev);
839 if (IS_ERR(spdev)) {
840 err = PTR_ERR(spdev);
841 goto scif_accept_error_map;
842 }
843 err = scif_setup_qp_accept(cep->qp_info.qp, &cep->qp_info.qp_offset,
844 conreq->msg.payload[1], SCIF_ENDPT_QP_SIZE,
845 cep->remote_dev);
846 if (err) {
847 dev_dbg(&cep->remote_dev->sdev->dev,
848 "SCIFAPI accept: ep %p new %p scif_setup_qp_accept %d qp_offset 0x%llx\n",
849 lep, cep, err, cep->qp_info.qp_offset);
850 scif_put_peer_dev(spdev);
851 goto scif_accept_error_map;
852 }
853
854 cep->port.node = lep->port.node;
855 cep->port.port = lep->port.port;
856 cep->peer.node = peer->node;
857 cep->peer.port = peer->port;
858 init_waitqueue_head(&cep->sendwq);
859 init_waitqueue_head(&cep->recvwq);
860 init_waitqueue_head(&cep->conwq);
861
862 msg.uop = SCIF_CNCT_GNT;
863 msg.src = cep->port;
864 msg.payload[0] = cep->remote_ep;
865 msg.payload[1] = cep->qp_info.qp_offset;
866 msg.payload[2] = (u64)cep;
867
868 err = _scif_nodeqp_send(cep->remote_dev, &msg);
869 scif_put_peer_dev(spdev);
870 if (err)
871 goto scif_accept_error_map;
872 retry:
873 /* Wait for the remote node to respond with SCIF_CNCT_GNT(N)ACK */
874 err = wait_event_timeout(cep->conwq, cep->state != SCIFEP_CONNECTING,
875 SCIF_NODE_ACCEPT_TIMEOUT);
876 if (!err && scifdev_alive(cep))
877 goto retry;
878 err = !err ? -ENODEV : 0;
879 if (err)
880 goto scif_accept_error_map;
881 kfree(conreq);
882
883 spin_lock(&cep->lock);
884
885 if (cep->state == SCIFEP_CLOSING) {
886 /*
887 * Remote failed to allocate resources and NAKed the grant.
888 * There is at this point nothing referencing the new end point.
889 */
890 spin_unlock(&cep->lock);
891 scif_teardown_ep(cep);
892 kfree(cep);
893
894 /* If call with sync flag then go back and wait. */
895 if (flags & SCIF_ACCEPT_SYNC) {
896 spin_lock(&lep->lock);
897 goto retry_connection;
898 }
899 return -EAGAIN;
900 }
901
902 scif_get_port(cep->port.port);
903 *newepd = (scif_epd_t)cep;
904 spin_unlock(&cep->lock);
905 return 0;
906 scif_accept_error_map:
907 scif_anon_inode_fput(cep);
908 scif_accept_error_anon_inode:
909 scif_teardown_ep(cep);
910 scif_accept_error_qpalloc:
911 kfree(cep);
912 scif_accept_error_epalloc:
913 msg.uop = SCIF_CNCT_REJ;
914 msg.dst.node = conreq->msg.src.node;
915 msg.dst.port = conreq->msg.src.port;
916 msg.payload[0] = conreq->msg.payload[0];
917 msg.payload[1] = conreq->msg.payload[1];
918 scif_nodeqp_send(&scif_dev[conreq->msg.src.node], &msg);
919 kfree(conreq);
920 return err;
921 }
922 EXPORT_SYMBOL_GPL(scif_accept);
923
924 /*
925 * scif_msg_param_check:
926 * @epd: The end point returned from scif_open()
927 * @len: Length to receive
928 * @flags: blocking or non blocking
929 *
930 * Validate parameters for messaging APIs scif_send(..)/scif_recv(..).
931 */
scif_msg_param_check(scif_epd_t epd,int len,int flags)932 static inline int scif_msg_param_check(scif_epd_t epd, int len, int flags)
933 {
934 int ret = -EINVAL;
935
936 if (len < 0)
937 goto err_ret;
938 if (flags && (!(flags & SCIF_RECV_BLOCK)))
939 goto err_ret;
940 ret = 0;
941 err_ret:
942 return ret;
943 }
944
_scif_send(scif_epd_t epd,void * msg,int len,int flags)945 static int _scif_send(scif_epd_t epd, void *msg, int len, int flags)
946 {
947 struct scif_endpt *ep = (struct scif_endpt *)epd;
948 struct scifmsg notif_msg;
949 int curr_xfer_len = 0, sent_len = 0, write_count;
950 int ret = 0;
951 struct scif_qp *qp = ep->qp_info.qp;
952
953 if (flags & SCIF_SEND_BLOCK)
954 might_sleep();
955
956 spin_lock(&ep->lock);
957 while (sent_len != len && SCIFEP_CONNECTED == ep->state) {
958 write_count = scif_rb_space(&qp->outbound_q);
959 if (write_count) {
960 /* Best effort to send as much data as possible */
961 curr_xfer_len = min(len - sent_len, write_count);
962 ret = scif_rb_write(&qp->outbound_q, msg,
963 curr_xfer_len);
964 if (ret < 0)
965 break;
966 /* Success. Update write pointer */
967 scif_rb_commit(&qp->outbound_q);
968 /*
969 * Send a notification to the peer about the
970 * produced data message.
971 */
972 notif_msg.src = ep->port;
973 notif_msg.uop = SCIF_CLIENT_SENT;
974 notif_msg.payload[0] = ep->remote_ep;
975 ret = _scif_nodeqp_send(ep->remote_dev, ¬if_msg);
976 if (ret)
977 break;
978 sent_len += curr_xfer_len;
979 msg = msg + curr_xfer_len;
980 continue;
981 }
982 curr_xfer_len = min(len - sent_len, SCIF_ENDPT_QP_SIZE - 1);
983 /* Not enough RB space. return for the Non Blocking case */
984 if (!(flags & SCIF_SEND_BLOCK))
985 break;
986
987 spin_unlock(&ep->lock);
988 /* Wait for a SCIF_CLIENT_RCVD message in the Blocking case */
989 ret =
990 wait_event_interruptible(ep->sendwq,
991 (SCIFEP_CONNECTED != ep->state) ||
992 (scif_rb_space(&qp->outbound_q) >=
993 curr_xfer_len));
994 spin_lock(&ep->lock);
995 if (ret)
996 break;
997 }
998 if (sent_len)
999 ret = sent_len;
1000 else if (!ret && SCIFEP_CONNECTED != ep->state)
1001 ret = SCIFEP_DISCONNECTED == ep->state ?
1002 -ECONNRESET : -ENOTCONN;
1003 spin_unlock(&ep->lock);
1004 return ret;
1005 }
1006
_scif_recv(scif_epd_t epd,void * msg,int len,int flags)1007 static int _scif_recv(scif_epd_t epd, void *msg, int len, int flags)
1008 {
1009 int read_size;
1010 struct scif_endpt *ep = (struct scif_endpt *)epd;
1011 struct scifmsg notif_msg;
1012 int curr_recv_len = 0, remaining_len = len, read_count;
1013 int ret = 0;
1014 struct scif_qp *qp = ep->qp_info.qp;
1015
1016 if (flags & SCIF_RECV_BLOCK)
1017 might_sleep();
1018 spin_lock(&ep->lock);
1019 while (remaining_len && (SCIFEP_CONNECTED == ep->state ||
1020 SCIFEP_DISCONNECTED == ep->state)) {
1021 read_count = scif_rb_count(&qp->inbound_q, remaining_len);
1022 if (read_count) {
1023 /*
1024 * Best effort to recv as much data as there
1025 * are bytes to read in the RB particularly
1026 * important for the Non Blocking case.
1027 */
1028 curr_recv_len = min(remaining_len, read_count);
1029 read_size = scif_rb_get_next(&qp->inbound_q,
1030 msg, curr_recv_len);
1031 if (ep->state == SCIFEP_CONNECTED) {
1032 /*
1033 * Update the read pointer only if the endpoint
1034 * is still connected else the read pointer
1035 * might no longer exist since the peer has
1036 * freed resources!
1037 */
1038 scif_rb_update_read_ptr(&qp->inbound_q);
1039 /*
1040 * Send a notification to the peer about the
1041 * consumed data message only if the EP is in
1042 * SCIFEP_CONNECTED state.
1043 */
1044 notif_msg.src = ep->port;
1045 notif_msg.uop = SCIF_CLIENT_RCVD;
1046 notif_msg.payload[0] = ep->remote_ep;
1047 ret = _scif_nodeqp_send(ep->remote_dev,
1048 ¬if_msg);
1049 if (ret)
1050 break;
1051 }
1052 remaining_len -= curr_recv_len;
1053 msg = msg + curr_recv_len;
1054 continue;
1055 }
1056 /*
1057 * Bail out now if the EP is in SCIFEP_DISCONNECTED state else
1058 * we will keep looping forever.
1059 */
1060 if (ep->state == SCIFEP_DISCONNECTED)
1061 break;
1062 /*
1063 * Return in the Non Blocking case if there is no data
1064 * to read in this iteration.
1065 */
1066 if (!(flags & SCIF_RECV_BLOCK))
1067 break;
1068 curr_recv_len = min(remaining_len, SCIF_ENDPT_QP_SIZE - 1);
1069 spin_unlock(&ep->lock);
1070 /*
1071 * Wait for a SCIF_CLIENT_SEND message in the blocking case
1072 * or until other side disconnects.
1073 */
1074 ret =
1075 wait_event_interruptible(ep->recvwq,
1076 SCIFEP_CONNECTED != ep->state ||
1077 scif_rb_count(&qp->inbound_q,
1078 curr_recv_len)
1079 >= curr_recv_len);
1080 spin_lock(&ep->lock);
1081 if (ret)
1082 break;
1083 }
1084 if (len - remaining_len)
1085 ret = len - remaining_len;
1086 else if (!ret && ep->state != SCIFEP_CONNECTED)
1087 ret = ep->state == SCIFEP_DISCONNECTED ?
1088 -ECONNRESET : -ENOTCONN;
1089 spin_unlock(&ep->lock);
1090 return ret;
1091 }
1092
1093 /**
1094 * scif_user_send() - Send data to connection queue
1095 * @epd: The end point returned from scif_open()
1096 * @msg: Address to place data
1097 * @len: Length to receive
1098 * @flags: blocking or non blocking
1099 *
1100 * This function is called from the driver IOCTL entry point
1101 * only and is a wrapper for _scif_send().
1102 */
scif_user_send(scif_epd_t epd,void __user * msg,int len,int flags)1103 int scif_user_send(scif_epd_t epd, void __user *msg, int len, int flags)
1104 {
1105 struct scif_endpt *ep = (struct scif_endpt *)epd;
1106 int err = 0;
1107 int sent_len = 0;
1108 char *tmp;
1109 int loop_len;
1110 int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1)));
1111
1112 dev_dbg(scif_info.mdev.this_device,
1113 "SCIFAPI send (U): ep %p %s\n", ep, scif_ep_states[ep->state]);
1114 if (!len)
1115 return 0;
1116
1117 err = scif_msg_param_check(epd, len, flags);
1118 if (err)
1119 goto send_err;
1120
1121 tmp = kmalloc(chunk_len, GFP_KERNEL);
1122 if (!tmp) {
1123 err = -ENOMEM;
1124 goto send_err;
1125 }
1126 /*
1127 * Grabbing the lock before breaking up the transfer in
1128 * multiple chunks is required to ensure that messages do
1129 * not get fragmented and reordered.
1130 */
1131 mutex_lock(&ep->sendlock);
1132 while (sent_len != len) {
1133 loop_len = len - sent_len;
1134 loop_len = min(chunk_len, loop_len);
1135 if (copy_from_user(tmp, msg, loop_len)) {
1136 err = -EFAULT;
1137 goto send_free_err;
1138 }
1139 err = _scif_send(epd, tmp, loop_len, flags);
1140 if (err < 0)
1141 goto send_free_err;
1142 sent_len += err;
1143 msg += err;
1144 if (err != loop_len)
1145 goto send_free_err;
1146 }
1147 send_free_err:
1148 mutex_unlock(&ep->sendlock);
1149 kfree(tmp);
1150 send_err:
1151 return err < 0 ? err : sent_len;
1152 }
1153
1154 /**
1155 * scif_user_recv() - Receive data from connection queue
1156 * @epd: The end point returned from scif_open()
1157 * @msg: Address to place data
1158 * @len: Length to receive
1159 * @flags: blocking or non blocking
1160 *
1161 * This function is called from the driver IOCTL entry point
1162 * only and is a wrapper for _scif_recv().
1163 */
scif_user_recv(scif_epd_t epd,void __user * msg,int len,int flags)1164 int scif_user_recv(scif_epd_t epd, void __user *msg, int len, int flags)
1165 {
1166 struct scif_endpt *ep = (struct scif_endpt *)epd;
1167 int err = 0;
1168 int recv_len = 0;
1169 char *tmp;
1170 int loop_len;
1171 int chunk_len = min(len, (1 << (MAX_ORDER + PAGE_SHIFT - 1)));
1172
1173 dev_dbg(scif_info.mdev.this_device,
1174 "SCIFAPI recv (U): ep %p %s\n", ep, scif_ep_states[ep->state]);
1175 if (!len)
1176 return 0;
1177
1178 err = scif_msg_param_check(epd, len, flags);
1179 if (err)
1180 goto recv_err;
1181
1182 tmp = kmalloc(chunk_len, GFP_KERNEL);
1183 if (!tmp) {
1184 err = -ENOMEM;
1185 goto recv_err;
1186 }
1187 /*
1188 * Grabbing the lock before breaking up the transfer in
1189 * multiple chunks is required to ensure that messages do
1190 * not get fragmented and reordered.
1191 */
1192 mutex_lock(&ep->recvlock);
1193 while (recv_len != len) {
1194 loop_len = len - recv_len;
1195 loop_len = min(chunk_len, loop_len);
1196 err = _scif_recv(epd, tmp, loop_len, flags);
1197 if (err < 0)
1198 goto recv_free_err;
1199 if (copy_to_user(msg, tmp, err)) {
1200 err = -EFAULT;
1201 goto recv_free_err;
1202 }
1203 recv_len += err;
1204 msg += err;
1205 if (err != loop_len)
1206 goto recv_free_err;
1207 }
1208 recv_free_err:
1209 mutex_unlock(&ep->recvlock);
1210 kfree(tmp);
1211 recv_err:
1212 return err < 0 ? err : recv_len;
1213 }
1214
1215 /**
1216 * scif_send() - Send data to connection queue
1217 * @epd: The end point returned from scif_open()
1218 * @msg: Address to place data
1219 * @len: Length to receive
1220 * @flags: blocking or non blocking
1221 *
1222 * This function is called from the kernel mode only and is
1223 * a wrapper for _scif_send().
1224 */
scif_send(scif_epd_t epd,void * msg,int len,int flags)1225 int scif_send(scif_epd_t epd, void *msg, int len, int flags)
1226 {
1227 struct scif_endpt *ep = (struct scif_endpt *)epd;
1228 int ret;
1229
1230 dev_dbg(scif_info.mdev.this_device,
1231 "SCIFAPI send (K): ep %p %s\n", ep, scif_ep_states[ep->state]);
1232 if (!len)
1233 return 0;
1234
1235 ret = scif_msg_param_check(epd, len, flags);
1236 if (ret)
1237 return ret;
1238 if (!ep->remote_dev)
1239 return -ENOTCONN;
1240 /*
1241 * Grab the mutex lock in the blocking case only
1242 * to ensure messages do not get fragmented/reordered.
1243 * The non blocking mode is protected using spin locks
1244 * in _scif_send().
1245 */
1246 if (flags & SCIF_SEND_BLOCK)
1247 mutex_lock(&ep->sendlock);
1248
1249 ret = _scif_send(epd, msg, len, flags);
1250
1251 if (flags & SCIF_SEND_BLOCK)
1252 mutex_unlock(&ep->sendlock);
1253 return ret;
1254 }
1255 EXPORT_SYMBOL_GPL(scif_send);
1256
1257 /**
1258 * scif_recv() - Receive data from connection queue
1259 * @epd: The end point returned from scif_open()
1260 * @msg: Address to place data
1261 * @len: Length to receive
1262 * @flags: blocking or non blocking
1263 *
1264 * This function is called from the kernel mode only and is
1265 * a wrapper for _scif_recv().
1266 */
scif_recv(scif_epd_t epd,void * msg,int len,int flags)1267 int scif_recv(scif_epd_t epd, void *msg, int len, int flags)
1268 {
1269 struct scif_endpt *ep = (struct scif_endpt *)epd;
1270 int ret;
1271
1272 dev_dbg(scif_info.mdev.this_device,
1273 "SCIFAPI recv (K): ep %p %s\n", ep, scif_ep_states[ep->state]);
1274 if (!len)
1275 return 0;
1276
1277 ret = scif_msg_param_check(epd, len, flags);
1278 if (ret)
1279 return ret;
1280 /*
1281 * Grab the mutex lock in the blocking case only
1282 * to ensure messages do not get fragmented/reordered.
1283 * The non blocking mode is protected using spin locks
1284 * in _scif_send().
1285 */
1286 if (flags & SCIF_RECV_BLOCK)
1287 mutex_lock(&ep->recvlock);
1288
1289 ret = _scif_recv(epd, msg, len, flags);
1290
1291 if (flags & SCIF_RECV_BLOCK)
1292 mutex_unlock(&ep->recvlock);
1293
1294 return ret;
1295 }
1296 EXPORT_SYMBOL_GPL(scif_recv);
1297
_scif_poll_wait(struct file * f,wait_queue_head_t * wq,poll_table * p,struct scif_endpt * ep)1298 static inline void _scif_poll_wait(struct file *f, wait_queue_head_t *wq,
1299 poll_table *p, struct scif_endpt *ep)
1300 {
1301 /*
1302 * Because poll_wait makes a GFP_KERNEL allocation, give up the lock
1303 * and regrab it afterwards. Because the endpoint state might have
1304 * changed while the lock was given up, the state must be checked
1305 * again after re-acquiring the lock. The code in __scif_pollfd(..)
1306 * does this.
1307 */
1308 spin_unlock(&ep->lock);
1309 poll_wait(f, wq, p);
1310 spin_lock(&ep->lock);
1311 }
1312
1313 __poll_t
__scif_pollfd(struct file * f,poll_table * wait,struct scif_endpt * ep)1314 __scif_pollfd(struct file *f, poll_table *wait, struct scif_endpt *ep)
1315 {
1316 __poll_t mask = 0;
1317
1318 dev_dbg(scif_info.mdev.this_device,
1319 "SCIFAPI pollfd: ep %p %s\n", ep, scif_ep_states[ep->state]);
1320
1321 spin_lock(&ep->lock);
1322
1323 /* Endpoint is waiting for a non-blocking connect to complete */
1324 if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
1325 _scif_poll_wait(f, &ep->conn_pend_wq, wait, ep);
1326 if (ep->conn_async_state == ASYNC_CONN_INPROGRESS) {
1327 if (ep->state == SCIFEP_CONNECTED ||
1328 ep->state == SCIFEP_DISCONNECTED ||
1329 ep->conn_err)
1330 mask |= EPOLLOUT;
1331 goto exit;
1332 }
1333 }
1334
1335 /* Endpoint is listening for incoming connection requests */
1336 if (ep->state == SCIFEP_LISTENING) {
1337 _scif_poll_wait(f, &ep->conwq, wait, ep);
1338 if (ep->state == SCIFEP_LISTENING) {
1339 if (ep->conreqcnt)
1340 mask |= EPOLLIN;
1341 goto exit;
1342 }
1343 }
1344
1345 /* Endpoint is connected or disconnected */
1346 if (ep->state == SCIFEP_CONNECTED || ep->state == SCIFEP_DISCONNECTED) {
1347 if (poll_requested_events(wait) & EPOLLIN)
1348 _scif_poll_wait(f, &ep->recvwq, wait, ep);
1349 if (poll_requested_events(wait) & EPOLLOUT)
1350 _scif_poll_wait(f, &ep->sendwq, wait, ep);
1351 if (ep->state == SCIFEP_CONNECTED ||
1352 ep->state == SCIFEP_DISCONNECTED) {
1353 /* Data can be read without blocking */
1354 if (scif_rb_count(&ep->qp_info.qp->inbound_q, 1))
1355 mask |= EPOLLIN;
1356 /* Data can be written without blocking */
1357 if (scif_rb_space(&ep->qp_info.qp->outbound_q))
1358 mask |= EPOLLOUT;
1359 /* Return EPOLLHUP if endpoint is disconnected */
1360 if (ep->state == SCIFEP_DISCONNECTED)
1361 mask |= EPOLLHUP;
1362 goto exit;
1363 }
1364 }
1365
1366 /* Return EPOLLERR if the endpoint is in none of the above states */
1367 mask |= EPOLLERR;
1368 exit:
1369 spin_unlock(&ep->lock);
1370 return mask;
1371 }
1372
1373 /**
1374 * scif_poll() - Kernel mode SCIF poll
1375 * @ufds: Array of scif_pollepd structures containing the end points
1376 * and events to poll on
1377 * @nfds: Size of the ufds array
1378 * @timeout_msecs: Timeout in msecs, -ve implies infinite timeout
1379 *
1380 * The code flow in this function is based on do_poll(..) in select.c
1381 *
1382 * Returns the number of endpoints which have pending events or 0 in
1383 * the event of a timeout. If a signal is used for wake up, -EINTR is
1384 * returned.
1385 */
1386 int
scif_poll(struct scif_pollepd * ufds,unsigned int nfds,long timeout_msecs)1387 scif_poll(struct scif_pollepd *ufds, unsigned int nfds, long timeout_msecs)
1388 {
1389 struct poll_wqueues table;
1390 poll_table *pt;
1391 int i, count = 0, timed_out = timeout_msecs == 0;
1392 __poll_t mask;
1393 u64 timeout = timeout_msecs < 0 ? MAX_SCHEDULE_TIMEOUT
1394 : msecs_to_jiffies(timeout_msecs);
1395
1396 poll_initwait(&table);
1397 pt = &table.pt;
1398 while (1) {
1399 for (i = 0; i < nfds; i++) {
1400 pt->_key = ufds[i].events | EPOLLERR | EPOLLHUP;
1401 mask = __scif_pollfd(ufds[i].epd->anon,
1402 pt, ufds[i].epd);
1403 mask &= ufds[i].events | EPOLLERR | EPOLLHUP;
1404 if (mask) {
1405 count++;
1406 pt->_qproc = NULL;
1407 }
1408 ufds[i].revents = mask;
1409 }
1410 pt->_qproc = NULL;
1411 if (!count) {
1412 count = table.error;
1413 if (signal_pending(current))
1414 count = -EINTR;
1415 }
1416 if (count || timed_out)
1417 break;
1418
1419 if (!schedule_timeout_interruptible(timeout))
1420 timed_out = 1;
1421 }
1422 poll_freewait(&table);
1423 return count;
1424 }
1425 EXPORT_SYMBOL_GPL(scif_poll);
1426
scif_get_node_ids(u16 * nodes,int len,u16 * self)1427 int scif_get_node_ids(u16 *nodes, int len, u16 *self)
1428 {
1429 int online = 0;
1430 int offset = 0;
1431 int node;
1432
1433 if (!scif_is_mgmt_node())
1434 scif_get_node_info();
1435
1436 *self = scif_info.nodeid;
1437 mutex_lock(&scif_info.conflock);
1438 len = min_t(int, len, scif_info.total);
1439 for (node = 0; node <= scif_info.maxid; node++) {
1440 if (_scifdev_alive(&scif_dev[node])) {
1441 online++;
1442 if (offset < len)
1443 nodes[offset++] = node;
1444 }
1445 }
1446 dev_dbg(scif_info.mdev.this_device,
1447 "SCIFAPI get_node_ids total %d online %d filled in %d nodes\n",
1448 scif_info.total, online, offset);
1449 mutex_unlock(&scif_info.conflock);
1450
1451 return online;
1452 }
1453 EXPORT_SYMBOL_GPL(scif_get_node_ids);
1454
scif_add_client_dev(struct device * dev,struct subsys_interface * si)1455 static int scif_add_client_dev(struct device *dev, struct subsys_interface *si)
1456 {
1457 struct scif_client *client =
1458 container_of(si, struct scif_client, si);
1459 struct scif_peer_dev *spdev =
1460 container_of(dev, struct scif_peer_dev, dev);
1461
1462 if (client->probe)
1463 client->probe(spdev);
1464 return 0;
1465 }
1466
scif_remove_client_dev(struct device * dev,struct subsys_interface * si)1467 static void scif_remove_client_dev(struct device *dev,
1468 struct subsys_interface *si)
1469 {
1470 struct scif_client *client =
1471 container_of(si, struct scif_client, si);
1472 struct scif_peer_dev *spdev =
1473 container_of(dev, struct scif_peer_dev, dev);
1474
1475 if (client->remove)
1476 client->remove(spdev);
1477 }
1478
scif_client_unregister(struct scif_client * client)1479 void scif_client_unregister(struct scif_client *client)
1480 {
1481 subsys_interface_unregister(&client->si);
1482 }
1483 EXPORT_SYMBOL_GPL(scif_client_unregister);
1484
scif_client_register(struct scif_client * client)1485 int scif_client_register(struct scif_client *client)
1486 {
1487 struct subsys_interface *si = &client->si;
1488
1489 si->name = client->name;
1490 si->subsys = &scif_peer_bus;
1491 si->add_dev = scif_add_client_dev;
1492 si->remove_dev = scif_remove_client_dev;
1493
1494 return subsys_interface_register(&client->si);
1495 }
1496 EXPORT_SYMBOL_GPL(scif_client_register);
1497