1 /* 2 * Intel MIC Platform Software Stack (MPSS) 3 * 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 * redistributing this file, you may do so under either license. 6 * 7 * GPL LICENSE SUMMARY 8 * 9 * Copyright(c) 2014 Intel Corporation. 10 * 11 * This program is free software; you can redistribute it and/or modify 12 * it under the terms of version 2 of the GNU General Public License as 13 * published by the Free Software Foundation. 14 * 15 * This program is distributed in the hope that it will be useful, but 16 * WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 * General Public License for more details. 19 * 20 * BSD LICENSE 21 * 22 * Copyright(c) 2014 Intel Corporation. 23 * 24 * Redistribution and use in source and binary forms, with or without 25 * modification, are permitted provided that the following conditions 26 * are met: 27 * 28 * * Redistributions of source code must retain the above copyright 29 * notice, this list of conditions and the following disclaimer. 30 * * Redistributions in binary form must reproduce the above copyright 31 * notice, this list of conditions and the following disclaimer in 32 * the documentation and/or other materials provided with the 33 * distribution. 34 * * Neither the name of Intel Corporation nor the names of its 35 * contributors may be used to endorse or promote products derived 36 * from this software without specific prior written permission. 37 * 38 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 39 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 40 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 41 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 42 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 43 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 44 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 45 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 46 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 47 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 48 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 49 * 50 * Intel SCIF driver. 51 * 52 */ 53 #ifndef __SCIF_H__ 54 #define __SCIF_H__ 55 56 #include <linux/types.h> 57 #include <linux/poll.h> 58 #include <linux/device.h> 59 #include <linux/scif_ioctl.h> 60 61 #define SCIF_ACCEPT_SYNC 1 62 #define SCIF_SEND_BLOCK 1 63 #define SCIF_RECV_BLOCK 1 64 65 enum { 66 SCIF_PROT_READ = (1 << 0), 67 SCIF_PROT_WRITE = (1 << 1) 68 }; 69 70 enum { 71 SCIF_MAP_FIXED = 0x10, 72 SCIF_MAP_KERNEL = 0x20, 73 }; 74 75 enum { 76 SCIF_FENCE_INIT_SELF = (1 << 0), 77 SCIF_FENCE_INIT_PEER = (1 << 1), 78 SCIF_SIGNAL_LOCAL = (1 << 4), 79 SCIF_SIGNAL_REMOTE = (1 << 5) 80 }; 81 82 enum { 83 SCIF_RMA_USECPU = (1 << 0), 84 SCIF_RMA_USECACHE = (1 << 1), 85 SCIF_RMA_SYNC = (1 << 2), 86 SCIF_RMA_ORDERED = (1 << 3) 87 }; 88 89 /* End of SCIF Admin Reserved Ports */ 90 #define SCIF_ADMIN_PORT_END 1024 91 92 /* End of SCIF Reserved Ports */ 93 #define SCIF_PORT_RSVD 1088 94 95 typedef struct scif_endpt *scif_epd_t; 96 typedef struct scif_pinned_pages *scif_pinned_pages_t; 97 98 /** 99 * struct scif_range - SCIF registered range used in kernel mode 100 * @cookie: cookie used internally by SCIF 101 * @nr_pages: number of pages of PAGE_SIZE 102 * @prot_flags: R/W protection 103 * @phys_addr: Array of bus addresses 104 * @va: Array of kernel virtual addresses backed by the pages in the phys_addr 105 * array. The va is populated only when called on the host for a remote 106 * SCIF connection on MIC. This is required to support the use case of DMA 107 * between MIC and another device which is not a SCIF node e.g., an IB or 108 * ethernet NIC. 109 */ 110 struct scif_range { 111 void *cookie; 112 int nr_pages; 113 int prot_flags; 114 dma_addr_t *phys_addr; 115 void __iomem **va; 116 }; 117 118 /** 119 * struct scif_pollepd - SCIF endpoint to be monitored via scif_poll 120 * @epd: SCIF endpoint 121 * @events: requested events 122 * @revents: returned events 123 */ 124 struct scif_pollepd { 125 scif_epd_t epd; 126 __poll_t events; 127 __poll_t revents; 128 }; 129 130 /** 131 * scif_peer_dev - representation of a peer SCIF device 132 * 133 * Peer devices show up as PCIe devices for the mgmt node but not the cards. 134 * The mgmt node discovers all the cards on the PCIe bus and informs the other 135 * cards about their peers. Upon notification of a peer a node adds a peer 136 * device to the peer bus to maintain symmetry in the way devices are 137 * discovered across all nodes in the SCIF network. 138 * 139 * @dev: underlying device 140 * @dnode - The destination node which this device will communicate with. 141 */ 142 struct scif_peer_dev { 143 struct device dev; 144 u8 dnode; 145 }; 146 147 /** 148 * scif_client - representation of a SCIF client 149 * @name: client name 150 * @probe - client method called when a peer device is registered 151 * @remove - client method called when a peer device is unregistered 152 * @si - subsys_interface used internally for implementing SCIF clients 153 */ 154 struct scif_client { 155 const char *name; 156 void (*probe)(struct scif_peer_dev *spdev); 157 void (*remove)(struct scif_peer_dev *spdev); 158 struct subsys_interface si; 159 }; 160 161 #define SCIF_OPEN_FAILED ((scif_epd_t)-1) 162 #define SCIF_REGISTER_FAILED ((off_t)-1) 163 #define SCIF_MMAP_FAILED ((void *)-1) 164 165 /** 166 * scif_open() - Create an endpoint 167 * 168 * Return: 169 * Upon successful completion, scif_open() returns an endpoint descriptor to 170 * be used in subsequent SCIF functions calls to refer to that endpoint; 171 * otherwise in user mode SCIF_OPEN_FAILED (that is ((scif_epd_t)-1)) is 172 * returned and errno is set to indicate the error; in kernel mode a NULL 173 * scif_epd_t is returned. 174 * 175 * Errors: 176 * ENOMEM - Insufficient kernel memory was available 177 */ 178 scif_epd_t scif_open(void); 179 180 /** 181 * scif_bind() - Bind an endpoint to a port 182 * @epd: endpoint descriptor 183 * @pn: port number 184 * 185 * scif_bind() binds endpoint epd to port pn, where pn is a port number on the 186 * local node. If pn is zero, a port number greater than or equal to 187 * SCIF_PORT_RSVD is assigned and returned. Each endpoint may be bound to 188 * exactly one local port. Ports less than 1024 when requested can only be bound 189 * by system (or root) processes or by processes executed by privileged users. 190 * 191 * Return: 192 * Upon successful completion, scif_bind() returns the port number to which epd 193 * is bound; otherwise in user mode -1 is returned and errno is set to 194 * indicate the error; in kernel mode the negative of one of the following 195 * errors is returned. 196 * 197 * Errors: 198 * EBADF, ENOTTY - epd is not a valid endpoint descriptor 199 * EINVAL - the endpoint or the port is already bound 200 * EISCONN - The endpoint is already connected 201 * ENOSPC - No port number available for assignment 202 * EACCES - The port requested is protected and the user is not the superuser 203 */ 204 int scif_bind(scif_epd_t epd, u16 pn); 205 206 /** 207 * scif_listen() - Listen for connections on an endpoint 208 * @epd: endpoint descriptor 209 * @backlog: maximum pending connection requests 210 * 211 * scif_listen() marks the endpoint epd as a listening endpoint - that is, as 212 * an endpoint that will be used to accept incoming connection requests. Once 213 * so marked, the endpoint is said to be in the listening state and may not be 214 * used as the endpoint of a connection. 215 * 216 * The endpoint, epd, must have been bound to a port. 217 * 218 * The backlog argument defines the maximum length to which the queue of 219 * pending connections for epd may grow. If a connection request arrives when 220 * the queue is full, the client may receive an error with an indication that 221 * the connection was refused. 222 * 223 * Return: 224 * Upon successful completion, scif_listen() returns 0; otherwise in user mode 225 * -1 is returned and errno is set to indicate the error; in kernel mode the 226 * negative of one of the following errors is returned. 227 * 228 * Errors: 229 * EBADF, ENOTTY - epd is not a valid endpoint descriptor 230 * EINVAL - the endpoint is not bound to a port 231 * EISCONN - The endpoint is already connected or listening 232 */ 233 int scif_listen(scif_epd_t epd, int backlog); 234 235 /** 236 * scif_connect() - Initiate a connection on a port 237 * @epd: endpoint descriptor 238 * @dst: global id of port to which to connect 239 * 240 * The scif_connect() function requests the connection of endpoint epd to remote 241 * port dst. If the connection is successful, a peer endpoint, bound to dst, is 242 * created on node dst.node. On successful return, the connection is complete. 243 * 244 * If the endpoint epd has not already been bound to a port, scif_connect() 245 * will bind it to an unused local port. 246 * 247 * A connection is terminated when an endpoint of the connection is closed, 248 * either explicitly by scif_close(), or when a process that owns one of the 249 * endpoints of the connection is terminated. 250 * 251 * In user space, scif_connect() supports an asynchronous connection mode 252 * if the application has set the O_NONBLOCK flag on the endpoint via the 253 * fcntl() system call. Setting this flag will result in the calling process 254 * not to wait during scif_connect(). 255 * 256 * Return: 257 * Upon successful completion, scif_connect() returns the port ID to which the 258 * endpoint, epd, is bound; otherwise in user mode -1 is returned and errno is 259 * set to indicate the error; in kernel mode the negative of one of the 260 * following errors is returned. 261 * 262 * Errors: 263 * EBADF, ENOTTY - epd is not a valid endpoint descriptor 264 * ECONNREFUSED - The destination was not listening for connections or refused 265 * the connection request 266 * EINVAL - dst.port is not a valid port ID 267 * EISCONN - The endpoint is already connected 268 * ENOMEM - No buffer space is available 269 * ENODEV - The destination node does not exist, or the node is lost or existed, 270 * but is not currently in the network since it may have crashed 271 * ENOSPC - No port number available for assignment 272 * EOPNOTSUPP - The endpoint is listening and cannot be connected 273 */ 274 int scif_connect(scif_epd_t epd, struct scif_port_id *dst); 275 276 /** 277 * scif_accept() - Accept a connection on an endpoint 278 * @epd: endpoint descriptor 279 * @peer: global id of port to which connected 280 * @newepd: new connected endpoint descriptor 281 * @flags: flags 282 * 283 * The scif_accept() call extracts the first connection request from the queue 284 * of pending connections for the port on which epd is listening. scif_accept() 285 * creates a new endpoint, bound to the same port as epd, and allocates a new 286 * SCIF endpoint descriptor, returned in newepd, for the endpoint. The new 287 * endpoint is connected to the endpoint through which the connection was 288 * requested. epd is unaffected by this call, and remains in the listening 289 * state. 290 * 291 * On successful return, peer holds the global port identifier (node id and 292 * local port number) of the port which requested the connection. 293 * 294 * A connection is terminated when an endpoint of the connection is closed, 295 * either explicitly by scif_close(), or when a process that owns one of the 296 * endpoints of the connection is terminated. 297 * 298 * The number of connections that can (subsequently) be accepted on epd is only 299 * limited by system resources (memory). 300 * 301 * The flags argument is formed by OR'ing together zero or more of the 302 * following values. 303 * SCIF_ACCEPT_SYNC - block until a connection request is presented. If 304 * SCIF_ACCEPT_SYNC is not in flags, and no pending 305 * connections are present on the queue, scif_accept() 306 * fails with an EAGAIN error 307 * 308 * In user mode, the select() and poll() functions can be used to determine 309 * when there is a connection request. In kernel mode, the scif_poll() 310 * function may be used for this purpose. A readable event will be delivered 311 * when a connection is requested. 312 * 313 * Return: 314 * Upon successful completion, scif_accept() returns 0; otherwise in user mode 315 * -1 is returned and errno is set to indicate the error; in kernel mode the 316 * negative of one of the following errors is returned. 317 * 318 * Errors: 319 * EAGAIN - SCIF_ACCEPT_SYNC is not set and no connections are present to be 320 * accepted or SCIF_ACCEPT_SYNC is not set and remote node failed to complete 321 * its connection request 322 * EBADF, ENOTTY - epd is not a valid endpoint descriptor 323 * EINTR - Interrupted function 324 * EINVAL - epd is not a listening endpoint, or flags is invalid, or peer is 325 * NULL, or newepd is NULL 326 * ENODEV - The requesting node is lost or existed, but is not currently in the 327 * network since it may have crashed 328 * ENOMEM - Not enough space 329 * ENOENT - Secondary part of epd registration failed 330 */ 331 int scif_accept(scif_epd_t epd, struct scif_port_id *peer, scif_epd_t 332 *newepd, int flags); 333 334 /** 335 * scif_close() - Close an endpoint 336 * @epd: endpoint descriptor 337 * 338 * scif_close() closes an endpoint and performs necessary teardown of 339 * facilities associated with that endpoint. 340 * 341 * If epd is a listening endpoint then it will no longer accept connection 342 * requests on the port to which it is bound. Any pending connection requests 343 * are rejected. 344 * 345 * If epd is a connected endpoint, then its peer endpoint is also closed. RMAs 346 * which are in-process through epd or its peer endpoint will complete before 347 * scif_close() returns. Registered windows of the local and peer endpoints are 348 * released as if scif_unregister() was called against each window. 349 * 350 * Closing a SCIF endpoint does not affect local registered memory mapped by 351 * a SCIF endpoint on a remote node. The local memory remains mapped by the peer 352 * SCIF endpoint explicitly removed by calling munmap(..) by the peer. 353 * 354 * If the peer endpoint's receive queue is not empty at the time that epd is 355 * closed, then the peer endpoint can be passed as the endpoint parameter to 356 * scif_recv() until the receive queue is empty. 357 * 358 * epd is freed and may no longer be accessed. 359 * 360 * Return: 361 * Upon successful completion, scif_close() returns 0; otherwise in user mode 362 * -1 is returned and errno is set to indicate the error; in kernel mode the 363 * negative of one of the following errors is returned. 364 * 365 * Errors: 366 * EBADF, ENOTTY - epd is not a valid endpoint descriptor 367 */ 368 int scif_close(scif_epd_t epd); 369 370 /** 371 * scif_send() - Send a message 372 * @epd: endpoint descriptor 373 * @msg: message buffer address 374 * @len: message length 375 * @flags: blocking mode flags 376 * 377 * scif_send() sends data to the peer of endpoint epd. Up to len bytes of data 378 * are copied from memory starting at address msg. On successful execution the 379 * return value of scif_send() is the number of bytes that were sent, and is 380 * zero if no bytes were sent because len was zero. scif_send() may be called 381 * only when the endpoint is in a connected state. 382 * 383 * If a scif_send() call is non-blocking, then it sends only those bytes which 384 * can be sent without waiting, up to a maximum of len bytes. 385 * 386 * If a scif_send() call is blocking, then it normally returns after sending 387 * all len bytes. If a blocking call is interrupted or the connection is 388 * reset, the call is considered successful if some bytes were sent or len is 389 * zero, otherwise the call is considered unsuccessful. 390 * 391 * In user mode, the select() and poll() functions can be used to determine 392 * when the send queue is not full. In kernel mode, the scif_poll() function 393 * may be used for this purpose. 394 * 395 * It is recommended that scif_send()/scif_recv() only be used for short 396 * control-type message communication between SCIF endpoints. The SCIF RMA 397 * APIs are expected to provide better performance for transfer sizes of 398 * 1024 bytes or longer for the current MIC hardware and software 399 * implementation. 400 * 401 * scif_send() will block until the entire message is sent if SCIF_SEND_BLOCK 402 * is passed as the flags argument. 403 * 404 * Return: 405 * Upon successful completion, scif_send() returns the number of bytes sent; 406 * otherwise in user mode -1 is returned and errno is set to indicate the 407 * error; in kernel mode the negative of one of the following errors is 408 * returned. 409 * 410 * Errors: 411 * EBADF, ENOTTY - epd is not a valid endpoint descriptor 412 * ECONNRESET - Connection reset by peer 413 * EINVAL - flags is invalid, or len is negative 414 * ENODEV - The remote node is lost or existed, but is not currently in the 415 * network since it may have crashed 416 * ENOMEM - Not enough space 417 * ENOTCONN - The endpoint is not connected 418 */ 419 int scif_send(scif_epd_t epd, void *msg, int len, int flags); 420 421 /** 422 * scif_recv() - Receive a message 423 * @epd: endpoint descriptor 424 * @msg: message buffer address 425 * @len: message buffer length 426 * @flags: blocking mode flags 427 * 428 * scif_recv() receives data from the peer of endpoint epd. Up to len bytes of 429 * data are copied to memory starting at address msg. On successful execution 430 * the return value of scif_recv() is the number of bytes that were received, 431 * and is zero if no bytes were received because len was zero. scif_recv() may 432 * be called only when the endpoint is in a connected state. 433 * 434 * If a scif_recv() call is non-blocking, then it receives only those bytes 435 * which can be received without waiting, up to a maximum of len bytes. 436 * 437 * If a scif_recv() call is blocking, then it normally returns after receiving 438 * all len bytes. If the blocking call was interrupted due to a disconnection, 439 * subsequent calls to scif_recv() will copy all bytes received upto the point 440 * of disconnection. 441 * 442 * In user mode, the select() and poll() functions can be used to determine 443 * when data is available to be received. In kernel mode, the scif_poll() 444 * function may be used for this purpose. 445 * 446 * It is recommended that scif_send()/scif_recv() only be used for short 447 * control-type message communication between SCIF endpoints. The SCIF RMA 448 * APIs are expected to provide better performance for transfer sizes of 449 * 1024 bytes or longer for the current MIC hardware and software 450 * implementation. 451 * 452 * scif_recv() will block until the entire message is received if 453 * SCIF_RECV_BLOCK is passed as the flags argument. 454 * 455 * Return: 456 * Upon successful completion, scif_recv() returns the number of bytes 457 * received; otherwise in user mode -1 is returned and errno is set to 458 * indicate the error; in kernel mode the negative of one of the following 459 * errors is returned. 460 * 461 * Errors: 462 * EAGAIN - The destination node is returning from a low power state 463 * EBADF, ENOTTY - epd is not a valid endpoint descriptor 464 * ECONNRESET - Connection reset by peer 465 * EINVAL - flags is invalid, or len is negative 466 * ENODEV - The remote node is lost or existed, but is not currently in the 467 * network since it may have crashed 468 * ENOMEM - Not enough space 469 * ENOTCONN - The endpoint is not connected 470 */ 471 int scif_recv(scif_epd_t epd, void *msg, int len, int flags); 472 473 /** 474 * scif_register() - Mark a memory region for remote access. 475 * @epd: endpoint descriptor 476 * @addr: starting virtual address 477 * @len: length of range 478 * @offset: offset of window 479 * @prot_flags: read/write protection flags 480 * @map_flags: mapping flags 481 * 482 * The scif_register() function opens a window, a range of whole pages of the 483 * registered address space of the endpoint epd, starting at offset po and 484 * continuing for len bytes. The value of po, further described below, is a 485 * function of the parameters offset and len, and the value of map_flags. Each 486 * page of the window represents the physical memory page which backs the 487 * corresponding page of the range of virtual address pages starting at addr 488 * and continuing for len bytes. addr and len are constrained to be multiples 489 * of the page size. A successful scif_register() call returns po. 490 * 491 * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset 492 * exactly, and offset is constrained to be a multiple of the page size. The 493 * mapping established by scif_register() will not replace any existing 494 * registration; an error is returned if any page within the range [offset, 495 * offset + len - 1] intersects an existing window. 496 * 497 * When SCIF_MAP_FIXED is not set, the implementation uses offset in an 498 * implementation-defined manner to arrive at po. The po value so chosen will 499 * be an area of the registered address space that the implementation deems 500 * suitable for a mapping of len bytes. An offset value of 0 is interpreted as 501 * granting the implementation complete freedom in selecting po, subject to 502 * constraints described below. A non-zero value of offset is taken to be a 503 * suggestion of an offset near which the mapping should be placed. When the 504 * implementation selects a value for po, it does not replace any extant 505 * window. In all cases, po will be a multiple of the page size. 506 * 507 * The physical pages which are so represented by a window are available for 508 * access in calls to mmap(), scif_readfrom(), scif_writeto(), 509 * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the 510 * physical pages represented by the window will not be reused by the memory 511 * subsystem for any other purpose. Note that the same physical page may be 512 * represented by multiple windows. 513 * 514 * Subsequent operations which change the memory pages to which virtual 515 * addresses are mapped (such as mmap(), munmap()) have no effect on 516 * existing window. 517 * 518 * If the process will fork(), it is recommended that the registered 519 * virtual address range be marked with MADV_DONTFORK. Doing so will prevent 520 * problems due to copy-on-write semantics. 521 * 522 * The prot_flags argument is formed by OR'ing together one or more of the 523 * following values. 524 * SCIF_PROT_READ - allow read operations from the window 525 * SCIF_PROT_WRITE - allow write operations to the window 526 * 527 * Return: 528 * Upon successful completion, scif_register() returns the offset at which the 529 * mapping was placed (po); otherwise in user mode SCIF_REGISTER_FAILED (that 530 * is (off_t *)-1) is returned and errno is set to indicate the error; in 531 * kernel mode the negative of one of the following errors is returned. 532 * 533 * Errors: 534 * EADDRINUSE - SCIF_MAP_FIXED is set in map_flags, and pages in the range 535 * [offset, offset + len -1] are already registered 536 * EAGAIN - The mapping could not be performed due to lack of resources 537 * EBADF, ENOTTY - epd is not a valid endpoint descriptor 538 * ECONNRESET - Connection reset by peer 539 * EINVAL - map_flags is invalid, or prot_flags is invalid, or SCIF_MAP_FIXED is 540 * set in flags, and offset is not a multiple of the page size, or addr is not a 541 * multiple of the page size, or len is not a multiple of the page size, or is 542 * 0, or offset is negative 543 * ENODEV - The remote node is lost or existed, but is not currently in the 544 * network since it may have crashed 545 * ENOMEM - Not enough space 546 * ENOTCONN -The endpoint is not connected 547 */ 548 off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset, 549 int prot_flags, int map_flags); 550 551 /** 552 * scif_unregister() - Mark a memory region for remote access. 553 * @epd: endpoint descriptor 554 * @offset: start of range to unregister 555 * @len: length of range to unregister 556 * 557 * The scif_unregister() function closes those previously registered windows 558 * which are entirely within the range [offset, offset + len - 1]. It is an 559 * error to specify a range which intersects only a subrange of a window. 560 * 561 * On a successful return, pages within the window may no longer be specified 562 * in calls to mmap(), scif_readfrom(), scif_writeto(), scif_vreadfrom(), 563 * scif_vwriteto(), scif_get_pages, and scif_fence_signal(). The window, 564 * however, continues to exist until all previous references against it are 565 * removed. A window is referenced if there is a mapping to it created by 566 * mmap(), or if scif_get_pages() was called against the window 567 * (and the pages have not been returned via scif_put_pages()). A window is 568 * also referenced while an RMA, in which some range of the window is a source 569 * or destination, is in progress. Finally a window is referenced while some 570 * offset in that window was specified to scif_fence_signal(), and the RMAs 571 * marked by that call to scif_fence_signal() have not completed. While a 572 * window is in this state, its registered address space pages are not 573 * available for use in a new registered window. 574 * 575 * When all such references to the window have been removed, its references to 576 * all the physical pages which it represents are removed. Similarly, the 577 * registered address space pages of the window become available for 578 * registration in a new window. 579 * 580 * Return: 581 * Upon successful completion, scif_unregister() returns 0; otherwise in user 582 * mode -1 is returned and errno is set to indicate the error; in kernel mode 583 * the negative of one of the following errors is returned. In the event of an 584 * error, no windows are unregistered. 585 * 586 * Errors: 587 * EBADF, ENOTTY - epd is not a valid endpoint descriptor 588 * ECONNRESET - Connection reset by peer 589 * EINVAL - the range [offset, offset + len - 1] intersects a subrange of a 590 * window, or offset is negative 591 * ENODEV - The remote node is lost or existed, but is not currently in the 592 * network since it may have crashed 593 * ENOTCONN - The endpoint is not connected 594 * ENXIO - Offsets in the range [offset, offset + len - 1] are invalid for the 595 * registered address space of epd 596 */ 597 int scif_unregister(scif_epd_t epd, off_t offset, size_t len); 598 599 /** 600 * scif_readfrom() - Copy from a remote address space 601 * @epd: endpoint descriptor 602 * @loffset: offset in local registered address space to 603 * which to copy 604 * @len: length of range to copy 605 * @roffset: offset in remote registered address space 606 * from which to copy 607 * @rma_flags: transfer mode flags 608 * 609 * scif_readfrom() copies len bytes from the remote registered address space of 610 * the peer of endpoint epd, starting at the offset roffset to the local 611 * registered address space of epd, starting at the offset loffset. 612 * 613 * Each of the specified ranges [loffset, loffset + len - 1] and [roffset, 614 * roffset + len - 1] must be within some registered window or windows of the 615 * local and remote nodes. A range may intersect multiple registered windows, 616 * but only if those windows are contiguous in the registered address space. 617 * 618 * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using 619 * programmed read/writes. Otherwise the data is copied using DMA. If rma_- 620 * flags includes SCIF_RMA_SYNC, then scif_readfrom() will return after the 621 * transfer is complete. Otherwise, the transfer may be performed asynchron- 622 * ously. The order in which any two asynchronous RMA operations complete 623 * is non-deterministic. The synchronization functions, scif_fence_mark()/ 624 * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to 625 * the completion of asynchronous RMA operations on the same endpoint. 626 * 627 * The DMA transfer of individual bytes is not guaranteed to complete in 628 * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last 629 * cacheline or partial cacheline of the source range will become visible on 630 * the destination node after all other transferred data in the source 631 * range has become visible on the destination node. 632 * 633 * The optimal DMA performance will likely be realized if both 634 * loffset and roffset are cacheline aligned (are a multiple of 64). Lower 635 * performance will likely be realized if loffset and roffset are not 636 * cacheline aligned but are separated by some multiple of 64. The lowest level 637 * of performance is likely if loffset and roffset are not separated by a 638 * multiple of 64. 639 * 640 * The rma_flags argument is formed by ORing together zero or more of the 641 * following values. 642 * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA 643 * engine. 644 * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the 645 * transfer has completed. Passing this flag results in the 646 * current implementation busy waiting and consuming CPU cycles 647 * while the DMA transfer is in progress for best performance by 648 * avoiding the interrupt latency. 649 * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of 650 * the source range becomes visible on the destination node 651 * after all other transferred data in the source range has 652 * become visible on the destination 653 * 654 * Return: 655 * Upon successful completion, scif_readfrom() returns 0; otherwise in user 656 * mode -1 is returned and errno is set to indicate the error; in kernel mode 657 * the negative of one of the following errors is returned. 658 * 659 * Errors: 660 * EACCESS - Attempt to write to a read-only range 661 * EBADF, ENOTTY - epd is not a valid endpoint descriptor 662 * ECONNRESET - Connection reset by peer 663 * EINVAL - rma_flags is invalid 664 * ENODEV - The remote node is lost or existed, but is not currently in the 665 * network since it may have crashed 666 * ENOTCONN - The endpoint is not connected 667 * ENXIO - The range [loffset, loffset + len - 1] is invalid for the registered 668 * address space of epd, or, The range [roffset, roffset + len - 1] is invalid 669 * for the registered address space of the peer of epd, or loffset or roffset 670 * is negative 671 */ 672 int scif_readfrom(scif_epd_t epd, off_t loffset, size_t len, off_t 673 roffset, int rma_flags); 674 675 /** 676 * scif_writeto() - Copy to a remote address space 677 * @epd: endpoint descriptor 678 * @loffset: offset in local registered address space 679 * from which to copy 680 * @len: length of range to copy 681 * @roffset: offset in remote registered address space to 682 * which to copy 683 * @rma_flags: transfer mode flags 684 * 685 * scif_writeto() copies len bytes from the local registered address space of 686 * epd, starting at the offset loffset to the remote registered address space 687 * of the peer of endpoint epd, starting at the offset roffset. 688 * 689 * Each of the specified ranges [loffset, loffset + len - 1] and [roffset, 690 * roffset + len - 1] must be within some registered window or windows of the 691 * local and remote nodes. A range may intersect multiple registered windows, 692 * but only if those windows are contiguous in the registered address space. 693 * 694 * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using 695 * programmed read/writes. Otherwise the data is copied using DMA. If rma_- 696 * flags includes SCIF_RMA_SYNC, then scif_writeto() will return after the 697 * transfer is complete. Otherwise, the transfer may be performed asynchron- 698 * ously. The order in which any two asynchronous RMA operations complete 699 * is non-deterministic. The synchronization functions, scif_fence_mark()/ 700 * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to 701 * the completion of asynchronous RMA operations on the same endpoint. 702 * 703 * The DMA transfer of individual bytes is not guaranteed to complete in 704 * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last 705 * cacheline or partial cacheline of the source range will become visible on 706 * the destination node after all other transferred data in the source 707 * range has become visible on the destination node. 708 * 709 * The optimal DMA performance will likely be realized if both 710 * loffset and roffset are cacheline aligned (are a multiple of 64). Lower 711 * performance will likely be realized if loffset and roffset are not cacheline 712 * aligned but are separated by some multiple of 64. The lowest level of 713 * performance is likely if loffset and roffset are not separated by a multiple 714 * of 64. 715 * 716 * The rma_flags argument is formed by ORing together zero or more of the 717 * following values. 718 * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA 719 * engine. 720 * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the 721 * transfer has completed. Passing this flag results in the 722 * current implementation busy waiting and consuming CPU cycles 723 * while the DMA transfer is in progress for best performance by 724 * avoiding the interrupt latency. 725 * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of 726 * the source range becomes visible on the destination node 727 * after all other transferred data in the source range has 728 * become visible on the destination 729 * 730 * Return: 731 * Upon successful completion, scif_readfrom() returns 0; otherwise in user 732 * mode -1 is returned and errno is set to indicate the error; in kernel mode 733 * the negative of one of the following errors is returned. 734 * 735 * Errors: 736 * EACCESS - Attempt to write to a read-only range 737 * EBADF, ENOTTY - epd is not a valid endpoint descriptor 738 * ECONNRESET - Connection reset by peer 739 * EINVAL - rma_flags is invalid 740 * ENODEV - The remote node is lost or existed, but is not currently in the 741 * network since it may have crashed 742 * ENOTCONN - The endpoint is not connected 743 * ENXIO - The range [loffset, loffset + len - 1] is invalid for the registered 744 * address space of epd, or, The range [roffset , roffset + len -1] is invalid 745 * for the registered address space of the peer of epd, or loffset or roffset 746 * is negative 747 */ 748 int scif_writeto(scif_epd_t epd, off_t loffset, size_t len, off_t 749 roffset, int rma_flags); 750 751 /** 752 * scif_vreadfrom() - Copy from a remote address space 753 * @epd: endpoint descriptor 754 * @addr: address to which to copy 755 * @len: length of range to copy 756 * @roffset: offset in remote registered address space 757 * from which to copy 758 * @rma_flags: transfer mode flags 759 * 760 * scif_vreadfrom() copies len bytes from the remote registered address 761 * space of the peer of endpoint epd, starting at the offset roffset, to local 762 * memory, starting at addr. 763 * 764 * The specified range [roffset, roffset + len - 1] must be within some 765 * registered window or windows of the remote nodes. The range may 766 * intersect multiple registered windows, but only if those windows are 767 * contiguous in the registered address space. 768 * 769 * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using 770 * programmed read/writes. Otherwise the data is copied using DMA. If rma_- 771 * flags includes SCIF_RMA_SYNC, then scif_vreadfrom() will return after the 772 * transfer is complete. Otherwise, the transfer may be performed asynchron- 773 * ously. The order in which any two asynchronous RMA operations complete 774 * is non-deterministic. The synchronization functions, scif_fence_mark()/ 775 * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to 776 * the completion of asynchronous RMA operations on the same endpoint. 777 * 778 * The DMA transfer of individual bytes is not guaranteed to complete in 779 * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last 780 * cacheline or partial cacheline of the source range will become visible on 781 * the destination node after all other transferred data in the source 782 * range has become visible on the destination node. 783 * 784 * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back 785 * the specified local memory range may be remain in a pinned state even after 786 * the specified transfer completes. This may reduce overhead if some or all of 787 * the same virtual address range is referenced in a subsequent call of 788 * scif_vreadfrom() or scif_vwriteto(). 789 * 790 * The optimal DMA performance will likely be realized if both 791 * addr and roffset are cacheline aligned (are a multiple of 64). Lower 792 * performance will likely be realized if addr and roffset are not 793 * cacheline aligned but are separated by some multiple of 64. The lowest level 794 * of performance is likely if addr and roffset are not separated by a 795 * multiple of 64. 796 * 797 * The rma_flags argument is formed by ORing together zero or more of the 798 * following values. 799 * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA 800 * engine. 801 * SCIF_RMA_USECACHE - enable registration caching 802 * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the 803 * transfer has completed. Passing this flag results in the 804 * current implementation busy waiting and consuming CPU cycles 805 * while the DMA transfer is in progress for best performance by 806 * avoiding the interrupt latency. 807 * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of 808 * the source range becomes visible on the destination node 809 * after all other transferred data in the source range has 810 * become visible on the destination 811 * 812 * Return: 813 * Upon successful completion, scif_vreadfrom() returns 0; otherwise in user 814 * mode -1 is returned and errno is set to indicate the error; in kernel mode 815 * the negative of one of the following errors is returned. 816 * 817 * Errors: 818 * EACCESS - Attempt to write to a read-only range 819 * EBADF, ENOTTY - epd is not a valid endpoint descriptor 820 * ECONNRESET - Connection reset by peer 821 * EINVAL - rma_flags is invalid 822 * ENODEV - The remote node is lost or existed, but is not currently in the 823 * network since it may have crashed 824 * ENOTCONN - The endpoint is not connected 825 * ENXIO - Offsets in the range [roffset, roffset + len - 1] are invalid for the 826 * registered address space of epd 827 */ 828 int scif_vreadfrom(scif_epd_t epd, void *addr, size_t len, off_t roffset, 829 int rma_flags); 830 831 /** 832 * scif_vwriteto() - Copy to a remote address space 833 * @epd: endpoint descriptor 834 * @addr: address from which to copy 835 * @len: length of range to copy 836 * @roffset: offset in remote registered address space to 837 * which to copy 838 * @rma_flags: transfer mode flags 839 * 840 * scif_vwriteto() copies len bytes from the local memory, starting at addr, to 841 * the remote registered address space of the peer of endpoint epd, starting at 842 * the offset roffset. 843 * 844 * The specified range [roffset, roffset + len - 1] must be within some 845 * registered window or windows of the remote nodes. The range may intersect 846 * multiple registered windows, but only if those windows are contiguous in the 847 * registered address space. 848 * 849 * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using 850 * programmed read/writes. Otherwise the data is copied using DMA. If rma_- 851 * flags includes SCIF_RMA_SYNC, then scif_vwriteto() will return after the 852 * transfer is complete. Otherwise, the transfer may be performed asynchron- 853 * ously. The order in which any two asynchronous RMA operations complete 854 * is non-deterministic. The synchronization functions, scif_fence_mark()/ 855 * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to 856 * the completion of asynchronous RMA operations on the same endpoint. 857 * 858 * The DMA transfer of individual bytes is not guaranteed to complete in 859 * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last 860 * cacheline or partial cacheline of the source range will become visible on 861 * the destination node after all other transferred data in the source 862 * range has become visible on the destination node. 863 * 864 * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back 865 * the specified local memory range may be remain in a pinned state even after 866 * the specified transfer completes. This may reduce overhead if some or all of 867 * the same virtual address range is referenced in a subsequent call of 868 * scif_vreadfrom() or scif_vwriteto(). 869 * 870 * The optimal DMA performance will likely be realized if both 871 * addr and offset are cacheline aligned (are a multiple of 64). Lower 872 * performance will likely be realized if addr and offset are not cacheline 873 * aligned but are separated by some multiple of 64. The lowest level of 874 * performance is likely if addr and offset are not separated by a multiple of 875 * 64. 876 * 877 * The rma_flags argument is formed by ORing together zero or more of the 878 * following values. 879 * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA 880 * engine. 881 * SCIF_RMA_USECACHE - allow registration caching 882 * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the 883 * transfer has completed. Passing this flag results in the 884 * current implementation busy waiting and consuming CPU cycles 885 * while the DMA transfer is in progress for best performance by 886 * avoiding the interrupt latency. 887 * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of 888 * the source range becomes visible on the destination node 889 * after all other transferred data in the source range has 890 * become visible on the destination 891 * 892 * Return: 893 * Upon successful completion, scif_vwriteto() returns 0; otherwise in user 894 * mode -1 is returned and errno is set to indicate the error; in kernel mode 895 * the negative of one of the following errors is returned. 896 * 897 * Errors: 898 * EACCESS - Attempt to write to a read-only range 899 * EBADF, ENOTTY - epd is not a valid endpoint descriptor 900 * ECONNRESET - Connection reset by peer 901 * EINVAL - rma_flags is invalid 902 * ENODEV - The remote node is lost or existed, but is not currently in the 903 * network since it may have crashed 904 * ENOTCONN - The endpoint is not connected 905 * ENXIO - Offsets in the range [roffset, roffset + len - 1] are invalid for the 906 * registered address space of epd 907 */ 908 int scif_vwriteto(scif_epd_t epd, void *addr, size_t len, off_t roffset, 909 int rma_flags); 910 911 /** 912 * scif_fence_mark() - Mark previously issued RMAs 913 * @epd: endpoint descriptor 914 * @flags: control flags 915 * @mark: marked value returned as output. 916 * 917 * scif_fence_mark() returns after marking the current set of all uncompleted 918 * RMAs initiated through the endpoint epd or the current set of all 919 * uncompleted RMAs initiated through the peer of endpoint epd. The RMAs are 920 * marked with a value returned at mark. The application may subsequently call 921 * scif_fence_wait(), passing the value returned at mark, to await completion 922 * of all RMAs so marked. 923 * 924 * The flags argument has exactly one of the following values. 925 * SCIF_FENCE_INIT_SELF - RMA operations initiated through endpoint 926 * epd are marked 927 * SCIF_FENCE_INIT_PEER - RMA operations initiated through the peer 928 * of endpoint epd are marked 929 * 930 * Return: 931 * Upon successful completion, scif_fence_mark() returns 0; otherwise in user 932 * mode -1 is returned and errno is set to indicate the error; in kernel mode 933 * the negative of one of the following errors is returned. 934 * 935 * Errors: 936 * EBADF, ENOTTY - epd is not a valid endpoint descriptor 937 * ECONNRESET - Connection reset by peer 938 * EINVAL - flags is invalid 939 * ENODEV - The remote node is lost or existed, but is not currently in the 940 * network since it may have crashed 941 * ENOTCONN - The endpoint is not connected 942 * ENOMEM - Insufficient kernel memory was available 943 */ 944 int scif_fence_mark(scif_epd_t epd, int flags, int *mark); 945 946 /** 947 * scif_fence_wait() - Wait for completion of marked RMAs 948 * @epd: endpoint descriptor 949 * @mark: mark request 950 * 951 * scif_fence_wait() returns after all RMAs marked with mark have completed. 952 * The value passed in mark must have been obtained in a previous call to 953 * scif_fence_mark(). 954 * 955 * Return: 956 * Upon successful completion, scif_fence_wait() returns 0; otherwise in user 957 * mode -1 is returned and errno is set to indicate the error; in kernel mode 958 * the negative of one of the following errors is returned. 959 * 960 * Errors: 961 * EBADF, ENOTTY - epd is not a valid endpoint descriptor 962 * ECONNRESET - Connection reset by peer 963 * ENODEV - The remote node is lost or existed, but is not currently in the 964 * network since it may have crashed 965 * ENOTCONN - The endpoint is not connected 966 * ENOMEM - Insufficient kernel memory was available 967 */ 968 int scif_fence_wait(scif_epd_t epd, int mark); 969 970 /** 971 * scif_fence_signal() - Request a memory update on completion of RMAs 972 * @epd: endpoint descriptor 973 * @loff: local offset 974 * @lval: local value to write to loffset 975 * @roff: remote offset 976 * @rval: remote value to write to roffset 977 * @flags: flags 978 * 979 * scif_fence_signal() returns after marking the current set of all uncompleted 980 * RMAs initiated through the endpoint epd or marking the current set of all 981 * uncompleted RMAs initiated through the peer of endpoint epd. 982 * 983 * If flags includes SCIF_SIGNAL_LOCAL, then on completion of the RMAs in the 984 * marked set, lval is written to memory at the address corresponding to offset 985 * loff in the local registered address space of epd. loff must be within a 986 * registered window. If flags includes SCIF_SIGNAL_REMOTE, then on completion 987 * of the RMAs in the marked set, rval is written to memory at the address 988 * corresponding to offset roff in the remote registered address space of epd. 989 * roff must be within a remote registered window of the peer of epd. Note 990 * that any specified offset must be DWORD (4 byte / 32 bit) aligned. 991 * 992 * The flags argument is formed by OR'ing together the following. 993 * Exactly one of the following values. 994 * SCIF_FENCE_INIT_SELF - RMA operations initiated through endpoint 995 * epd are marked 996 * SCIF_FENCE_INIT_PEER - RMA operations initiated through the peer 997 * of endpoint epd are marked 998 * One or more of the following values. 999 * SCIF_SIGNAL_LOCAL - On completion of the marked set of RMAs, write lval to 1000 * memory at the address corresponding to offset loff in the local 1001 * registered address space of epd. 1002 * SCIF_SIGNAL_REMOTE - On completion of the marked set of RMAs, write rval to 1003 * memory at the address corresponding to offset roff in the remote 1004 * registered address space of epd. 1005 * 1006 * Return: 1007 * Upon successful completion, scif_fence_signal() returns 0; otherwise in 1008 * user mode -1 is returned and errno is set to indicate the error; in kernel 1009 * mode the negative of one of the following errors is returned. 1010 * 1011 * Errors: 1012 * EBADF, ENOTTY - epd is not a valid endpoint descriptor 1013 * ECONNRESET - Connection reset by peer 1014 * EINVAL - flags is invalid, or loff or roff are not DWORD aligned 1015 * ENODEV - The remote node is lost or existed, but is not currently in the 1016 * network since it may have crashed 1017 * ENOTCONN - The endpoint is not connected 1018 * ENXIO - loff is invalid for the registered address of epd, or roff is invalid 1019 * for the registered address space, of the peer of epd 1020 */ 1021 int scif_fence_signal(scif_epd_t epd, off_t loff, u64 lval, off_t roff, 1022 u64 rval, int flags); 1023 1024 /** 1025 * scif_get_node_ids() - Return information about online nodes 1026 * @nodes: array in which to return online node IDs 1027 * @len: number of entries in the nodes array 1028 * @self: address to place the node ID of the local node 1029 * 1030 * scif_get_node_ids() fills in the nodes array with up to len node IDs of the 1031 * nodes in the SCIF network. If there is not enough space in nodes, as 1032 * indicated by the len parameter, only len node IDs are returned in nodes. The 1033 * return value of scif_get_node_ids() is the total number of nodes currently in 1034 * the SCIF network. By checking the return value against the len parameter, 1035 * the user may determine if enough space for nodes was allocated. 1036 * 1037 * The node ID of the local node is returned at self. 1038 * 1039 * Return: 1040 * Upon successful completion, scif_get_node_ids() returns the actual number of 1041 * online nodes in the SCIF network including 'self'; otherwise in user mode 1042 * -1 is returned and errno is set to indicate the error; in kernel mode no 1043 * errors are returned. 1044 */ 1045 int scif_get_node_ids(u16 *nodes, int len, u16 *self); 1046 1047 /** 1048 * scif_pin_pages() - Pin a set of pages 1049 * @addr: Virtual address of range to pin 1050 * @len: Length of range to pin 1051 * @prot_flags: Page protection flags 1052 * @map_flags: Page classification flags 1053 * @pinned_pages: Handle to pinned pages 1054 * 1055 * scif_pin_pages() pins (locks in physical memory) the physical pages which 1056 * back the range of virtual address pages starting at addr and continuing for 1057 * len bytes. addr and len are constrained to be multiples of the page size. A 1058 * successful scif_pin_pages() call returns a handle to pinned_pages which may 1059 * be used in subsequent calls to scif_register_pinned_pages(). 1060 * 1061 * The pages will remain pinned as long as there is a reference against the 1062 * scif_pinned_pages_t value returned by scif_pin_pages() and until 1063 * scif_unpin_pages() is called, passing the scif_pinned_pages_t value. A 1064 * reference is added to a scif_pinned_pages_t value each time a window is 1065 * created by calling scif_register_pinned_pages() and passing the 1066 * scif_pinned_pages_t value. A reference is removed from a 1067 * scif_pinned_pages_t value each time such a window is deleted. 1068 * 1069 * Subsequent operations which change the memory pages to which virtual 1070 * addresses are mapped (such as mmap(), munmap()) have no effect on the 1071 * scif_pinned_pages_t value or windows created against it. 1072 * 1073 * If the process will fork(), it is recommended that the registered 1074 * virtual address range be marked with MADV_DONTFORK. Doing so will prevent 1075 * problems due to copy-on-write semantics. 1076 * 1077 * The prot_flags argument is formed by OR'ing together one or more of the 1078 * following values. 1079 * SCIF_PROT_READ - allow read operations against the pages 1080 * SCIF_PROT_WRITE - allow write operations against the pages 1081 * The map_flags argument can be set as SCIF_MAP_KERNEL to interpret addr as a 1082 * kernel space address. By default, addr is interpreted as a user space 1083 * address. 1084 * 1085 * Return: 1086 * Upon successful completion, scif_pin_pages() returns 0; otherwise the 1087 * negative of one of the following errors is returned. 1088 * 1089 * Errors: 1090 * EINVAL - prot_flags is invalid, map_flags is invalid, or offset is negative 1091 * ENOMEM - Not enough space 1092 */ 1093 int scif_pin_pages(void *addr, size_t len, int prot_flags, int map_flags, 1094 scif_pinned_pages_t *pinned_pages); 1095 1096 /** 1097 * scif_unpin_pages() - Unpin a set of pages 1098 * @pinned_pages: Handle to pinned pages to be unpinned 1099 * 1100 * scif_unpin_pages() prevents scif_register_pinned_pages() from registering new 1101 * windows against pinned_pages. The physical pages represented by pinned_pages 1102 * will remain pinned until all windows previously registered against 1103 * pinned_pages are deleted (the window is scif_unregister()'d and all 1104 * references to the window are removed (see scif_unregister()). 1105 * 1106 * pinned_pages must have been obtain from a previous call to scif_pin_pages(). 1107 * After calling scif_unpin_pages(), it is an error to pass pinned_pages to 1108 * scif_register_pinned_pages(). 1109 * 1110 * Return: 1111 * Upon successful completion, scif_unpin_pages() returns 0; otherwise the 1112 * negative of one of the following errors is returned. 1113 * 1114 * Errors: 1115 * EINVAL - pinned_pages is not valid 1116 */ 1117 int scif_unpin_pages(scif_pinned_pages_t pinned_pages); 1118 1119 /** 1120 * scif_register_pinned_pages() - Mark a memory region for remote access. 1121 * @epd: endpoint descriptor 1122 * @pinned_pages: Handle to pinned pages 1123 * @offset: Registered address space offset 1124 * @map_flags: Flags which control where pages are mapped 1125 * 1126 * The scif_register_pinned_pages() function opens a window, a range of whole 1127 * pages of the registered address space of the endpoint epd, starting at 1128 * offset po. The value of po, further described below, is a function of the 1129 * parameters offset and pinned_pages, and the value of map_flags. Each page of 1130 * the window represents a corresponding physical memory page of the range 1131 * represented by pinned_pages; the length of the window is the same as the 1132 * length of range represented by pinned_pages. A successful 1133 * scif_register_pinned_pages() call returns po as the return value. 1134 * 1135 * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset 1136 * exactly, and offset is constrained to be a multiple of the page size. The 1137 * mapping established by scif_register_pinned_pages() will not replace any 1138 * existing registration; an error is returned if any page of the new window 1139 * would intersect an existing window. 1140 * 1141 * When SCIF_MAP_FIXED is not set, the implementation uses offset in an 1142 * implementation-defined manner to arrive at po. The po so chosen will be an 1143 * area of the registered address space that the implementation deems suitable 1144 * for a mapping of the required size. An offset value of 0 is interpreted as 1145 * granting the implementation complete freedom in selecting po, subject to 1146 * constraints described below. A non-zero value of offset is taken to be a 1147 * suggestion of an offset near which the mapping should be placed. When the 1148 * implementation selects a value for po, it does not replace any extant 1149 * window. In all cases, po will be a multiple of the page size. 1150 * 1151 * The physical pages which are so represented by a window are available for 1152 * access in calls to scif_get_pages(), scif_readfrom(), scif_writeto(), 1153 * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the 1154 * physical pages represented by the window will not be reused by the memory 1155 * subsystem for any other purpose. Note that the same physical page may be 1156 * represented by multiple windows. 1157 * 1158 * Windows created by scif_register_pinned_pages() are unregistered by 1159 * scif_unregister(). 1160 * 1161 * The map_flags argument can be set to SCIF_MAP_FIXED which interprets a 1162 * fixed offset. 1163 * 1164 * Return: 1165 * Upon successful completion, scif_register_pinned_pages() returns the offset 1166 * at which the mapping was placed (po); otherwise the negative of one of the 1167 * following errors is returned. 1168 * 1169 * Errors: 1170 * EADDRINUSE - SCIF_MAP_FIXED is set in map_flags and pages in the new window 1171 * would intersect an existing window 1172 * EAGAIN - The mapping could not be performed due to lack of resources 1173 * ECONNRESET - Connection reset by peer 1174 * EINVAL - map_flags is invalid, or SCIF_MAP_FIXED is set in map_flags, and 1175 * offset is not a multiple of the page size, or offset is negative 1176 * ENODEV - The remote node is lost or existed, but is not currently in the 1177 * network since it may have crashed 1178 * ENOMEM - Not enough space 1179 * ENOTCONN - The endpoint is not connected 1180 */ 1181 off_t scif_register_pinned_pages(scif_epd_t epd, 1182 scif_pinned_pages_t pinned_pages, 1183 off_t offset, int map_flags); 1184 1185 /** 1186 * scif_get_pages() - Add references to remote registered pages 1187 * @epd: endpoint descriptor 1188 * @offset: remote registered offset 1189 * @len: length of range of pages 1190 * @pages: returned scif_range structure 1191 * 1192 * scif_get_pages() returns the addresses of the physical pages represented by 1193 * those pages of the registered address space of the peer of epd, starting at 1194 * offset and continuing for len bytes. offset and len are constrained to be 1195 * multiples of the page size. 1196 * 1197 * All of the pages in the specified range [offset, offset + len - 1] must be 1198 * within a single window of the registered address space of the peer of epd. 1199 * 1200 * The addresses are returned as a virtually contiguous array pointed to by the 1201 * phys_addr component of the scif_range structure whose address is returned in 1202 * pages. The nr_pages component of scif_range is the length of the array. The 1203 * prot_flags component of scif_range holds the protection flag value passed 1204 * when the pages were registered. 1205 * 1206 * Each physical page whose address is returned by scif_get_pages() remains 1207 * available and will not be released for reuse until the scif_range structure 1208 * is returned in a call to scif_put_pages(). The scif_range structure returned 1209 * by scif_get_pages() must be unmodified. 1210 * 1211 * It is an error to call scif_close() on an endpoint on which a scif_range 1212 * structure of that endpoint has not been returned to scif_put_pages(). 1213 * 1214 * Return: 1215 * Upon successful completion, scif_get_pages() returns 0; otherwise the 1216 * negative of one of the following errors is returned. 1217 * Errors: 1218 * ECONNRESET - Connection reset by peer. 1219 * EINVAL - offset is not a multiple of the page size, or offset is negative, or 1220 * len is not a multiple of the page size 1221 * ENODEV - The remote node is lost or existed, but is not currently in the 1222 * network since it may have crashed 1223 * ENOTCONN - The endpoint is not connected 1224 * ENXIO - Offsets in the range [offset, offset + len - 1] are invalid 1225 * for the registered address space of the peer epd 1226 */ 1227 int scif_get_pages(scif_epd_t epd, off_t offset, size_t len, 1228 struct scif_range **pages); 1229 1230 /** 1231 * scif_put_pages() - Remove references from remote registered pages 1232 * @pages: pages to be returned 1233 * 1234 * scif_put_pages() releases a scif_range structure previously obtained by 1235 * calling scif_get_pages(). The physical pages represented by pages may 1236 * be reused when the window which represented those pages is unregistered. 1237 * Therefore, those pages must not be accessed after calling scif_put_pages(). 1238 * 1239 * Return: 1240 * Upon successful completion, scif_put_pages() returns 0; otherwise the 1241 * negative of one of the following errors is returned. 1242 * Errors: 1243 * EINVAL - pages does not point to a valid scif_range structure, or 1244 * the scif_range structure pointed to by pages was already returned 1245 * ENODEV - The remote node is lost or existed, but is not currently in the 1246 * network since it may have crashed 1247 * ENOTCONN - The endpoint is not connected 1248 */ 1249 int scif_put_pages(struct scif_range *pages); 1250 1251 /** 1252 * scif_poll() - Wait for some event on an endpoint 1253 * @epds: Array of endpoint descriptors 1254 * @nepds: Length of epds 1255 * @timeout: Upper limit on time for which scif_poll() will block 1256 * 1257 * scif_poll() waits for one of a set of endpoints to become ready to perform 1258 * an I/O operation. 1259 * 1260 * The epds argument specifies the endpoint descriptors to be examined and the 1261 * events of interest for each endpoint descriptor. epds is a pointer to an 1262 * array with one member for each open endpoint descriptor of interest. 1263 * 1264 * The number of items in the epds array is specified in nepds. The epd field 1265 * of scif_pollepd is an endpoint descriptor of an open endpoint. The field 1266 * events is a bitmask specifying the events which the application is 1267 * interested in. The field revents is an output parameter, filled by the 1268 * kernel with the events that actually occurred. The bits returned in revents 1269 * can include any of those specified in events, or one of the values EPOLLERR, 1270 * EPOLLHUP, or EPOLLNVAL. (These three bits are meaningless in the events 1271 * field, and will be set in the revents field whenever the corresponding 1272 * condition is true.) 1273 * 1274 * If none of the events requested (and no error) has occurred for any of the 1275 * endpoint descriptors, then scif_poll() blocks until one of the events occurs. 1276 * 1277 * The timeout argument specifies an upper limit on the time for which 1278 * scif_poll() will block, in milliseconds. Specifying a negative value in 1279 * timeout means an infinite timeout. 1280 * 1281 * The following bits may be set in events and returned in revents. 1282 * EPOLLIN - Data may be received without blocking. For a connected 1283 * endpoint, this means that scif_recv() may be called without blocking. For a 1284 * listening endpoint, this means that scif_accept() may be called without 1285 * blocking. 1286 * EPOLLOUT - Data may be sent without blocking. For a connected endpoint, this 1287 * means that scif_send() may be called without blocking. EPOLLOUT may also be 1288 * used to block waiting for a non-blocking connect to complete. This bit value 1289 * has no meaning for a listening endpoint and is ignored if specified. 1290 * 1291 * The following bits are only returned in revents, and are ignored if set in 1292 * events. 1293 * EPOLLERR - An error occurred on the endpoint 1294 * EPOLLHUP - The connection to the peer endpoint was disconnected 1295 * EPOLLNVAL - The specified endpoint descriptor is invalid. 1296 * 1297 * Return: 1298 * Upon successful completion, scif_poll() returns a non-negative value. A 1299 * positive value indicates the total number of endpoint descriptors that have 1300 * been selected (that is, endpoint descriptors for which the revents member is 1301 * non-zero). A value of 0 indicates that the call timed out and no endpoint 1302 * descriptors have been selected. Otherwise in user mode -1 is returned and 1303 * errno is set to indicate the error; in kernel mode the negative of one of 1304 * the following errors is returned. 1305 * 1306 * Errors: 1307 * EINTR - A signal occurred before any requested event 1308 * EINVAL - The nepds argument is greater than {OPEN_MAX} 1309 * ENOMEM - There was no space to allocate file descriptor tables 1310 */ 1311 int scif_poll(struct scif_pollepd *epds, unsigned int nepds, long timeout); 1312 1313 /** 1314 * scif_client_register() - Register a SCIF client 1315 * @client: client to be registered 1316 * 1317 * scif_client_register() registers a SCIF client. The probe() method 1318 * of the client is called when SCIF peer devices come online and the 1319 * remove() method is called when the peer devices disappear. 1320 * 1321 * Return: 1322 * Upon successful completion, scif_client_register() returns a non-negative 1323 * value. Otherwise the return value is the same as subsys_interface_register() 1324 * in the kernel. 1325 */ 1326 int scif_client_register(struct scif_client *client); 1327 1328 /** 1329 * scif_client_unregister() - Unregister a SCIF client 1330 * @client: client to be unregistered 1331 * 1332 * scif_client_unregister() unregisters a SCIF client. 1333 * 1334 * Return: 1335 * None 1336 */ 1337 void scif_client_unregister(struct scif_client *client); 1338 1339 #endif /* __SCIF_H__ */ 1340