| xf.li | bdd93d5 | 2023-05-12 07:10:14 -0700 | [diff] [blame] | 1 | /* Guts of both `select' and `poll' for Hurd. | 
|  | 2 | Copyright (C) 1991-2016 Free Software Foundation, Inc. | 
|  | 3 | This file is part of the GNU C Library. | 
|  | 4 |  | 
|  | 5 | The GNU C Library is free software; you can redistribute it and/or | 
|  | 6 | modify it under the terms of the GNU Lesser General Public | 
|  | 7 | License as published by the Free Software Foundation; either | 
|  | 8 | version 2.1 of the License, or (at your option) any later version. | 
|  | 9 |  | 
|  | 10 | The GNU C Library is distributed in the hope that it will be useful, | 
|  | 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|  | 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU | 
|  | 13 | Lesser General Public License for more details. | 
|  | 14 |  | 
|  | 15 | You should have received a copy of the GNU Lesser General Public | 
|  | 16 | License along with the GNU C Library; if not, see | 
|  | 17 | <http://www.gnu.org/licenses/>.  */ | 
|  | 18 |  | 
|  | 19 | #include <sys/types.h> | 
|  | 20 | #include <sys/poll.h> | 
|  | 21 | #include <hurd.h> | 
|  | 22 | #include <hurd/fd.h> | 
|  | 23 | #include <stdlib.h> | 
|  | 24 | #include <string.h> | 
|  | 25 | #include <assert.h> | 
|  | 26 | #include <stdint.h> | 
|  | 27 |  | 
|  | 28 | /* All user select types.  */ | 
|  | 29 | #define SELECT_ALL (SELECT_READ | SELECT_WRITE | SELECT_URG) | 
|  | 30 |  | 
|  | 31 | /* Used to record that a particular select rpc returned.  Must be distinct | 
|  | 32 | from SELECT_ALL (which better not have the high bit set).  */ | 
|  | 33 | #define SELECT_RETURNED ((SELECT_ALL << 1) & ~SELECT_ALL) | 
|  | 34 |  | 
|  | 35 | /* Check the first NFDS descriptors either in POLLFDS (if nonnnull) or in | 
|  | 36 | each of READFDS, WRITEFDS, EXCEPTFDS that is nonnull.  If TIMEOUT is not | 
|  | 37 | NULL, time out after waiting the interval specified therein.  Returns | 
|  | 38 | the number of ready descriptors, or -1 for errors.  */ | 
|  | 39 | int | 
|  | 40 | _hurd_select (int nfds, | 
|  | 41 | struct pollfd *pollfds, | 
|  | 42 | fd_set *readfds, fd_set *writefds, fd_set *exceptfds, | 
|  | 43 | const struct timespec *timeout, const sigset_t *sigmask) | 
|  | 44 | { | 
|  | 45 | int i; | 
|  | 46 | mach_port_t portset; | 
|  | 47 | int got; | 
|  | 48 | error_t err; | 
|  | 49 | fd_set rfds, wfds, xfds; | 
|  | 50 | int firstfd, lastfd; | 
|  | 51 | mach_msg_timeout_t to = 0; | 
|  | 52 | struct | 
|  | 53 | { | 
|  | 54 | struct hurd_userlink ulink; | 
|  | 55 | struct hurd_fd *cell; | 
|  | 56 | mach_port_t io_port; | 
|  | 57 | int type; | 
|  | 58 | mach_port_t reply_port; | 
|  | 59 | } d[nfds]; | 
|  | 60 | sigset_t oset; | 
|  | 61 |  | 
|  | 62 | union typeword		/* Use this to avoid unkosher casts.  */ | 
|  | 63 | { | 
|  | 64 | mach_msg_type_t type; | 
|  | 65 | uint32_t word; | 
|  | 66 | }; | 
|  | 67 | assert (sizeof (union typeword) == sizeof (mach_msg_type_t)); | 
|  | 68 | assert (sizeof (uint32_t) == sizeof (mach_msg_type_t)); | 
|  | 69 |  | 
|  | 70 | if (nfds < 0 || (pollfds == NULL && nfds > FD_SETSIZE)) | 
|  | 71 | { | 
|  | 72 | errno = EINVAL; | 
|  | 73 | return -1; | 
|  | 74 | } | 
|  | 75 |  | 
|  | 76 | if (timeout != NULL) | 
|  | 77 | { | 
|  | 78 | if (timeout->tv_sec < 0 || timeout->tv_nsec < 0) | 
|  | 79 | { | 
|  | 80 | errno = EINVAL; | 
|  | 81 | return -1; | 
|  | 82 | } | 
|  | 83 |  | 
|  | 84 | to = (timeout->tv_sec * 1000 + | 
|  | 85 | (timeout->tv_nsec + 999999) / 1000000); | 
|  | 86 | } | 
|  | 87 |  | 
|  | 88 | if (sigmask && __sigprocmask (SIG_SETMASK, sigmask, &oset)) | 
|  | 89 | return -1; | 
|  | 90 |  | 
|  | 91 | if (pollfds) | 
|  | 92 | { | 
|  | 93 | /* Collect interesting descriptors from the user's `pollfd' array. | 
|  | 94 | We do a first pass that reads the user's array before taking | 
|  | 95 | any locks.  The second pass then only touches our own stack, | 
|  | 96 | and gets the port references.  */ | 
|  | 97 |  | 
|  | 98 | for (i = 0; i < nfds; ++i) | 
|  | 99 | if (pollfds[i].fd >= 0) | 
|  | 100 | { | 
|  | 101 | int type = 0; | 
|  | 102 | if (pollfds[i].events & POLLIN) | 
|  | 103 | type |= SELECT_READ; | 
|  | 104 | if (pollfds[i].events & POLLOUT) | 
|  | 105 | type |= SELECT_WRITE; | 
|  | 106 | if (pollfds[i].events & POLLPRI) | 
|  | 107 | type |= SELECT_URG; | 
|  | 108 |  | 
|  | 109 | d[i].io_port = pollfds[i].fd; | 
|  | 110 | d[i].type = type; | 
|  | 111 | } | 
|  | 112 | else | 
|  | 113 | d[i].type = 0; | 
|  | 114 |  | 
|  | 115 | HURD_CRITICAL_BEGIN; | 
|  | 116 | __mutex_lock (&_hurd_dtable_lock); | 
|  | 117 |  | 
|  | 118 | for (i = 0; i < nfds; ++i) | 
|  | 119 | if (d[i].type != 0) | 
|  | 120 | { | 
|  | 121 | const int fd = (int) d[i].io_port; | 
|  | 122 |  | 
|  | 123 | if (fd < _hurd_dtablesize) | 
|  | 124 | { | 
|  | 125 | d[i].cell = _hurd_dtable[fd]; | 
|  | 126 | d[i].io_port = _hurd_port_get (&d[i].cell->port, &d[i].ulink); | 
|  | 127 | if (d[i].io_port != MACH_PORT_NULL) | 
|  | 128 | continue; | 
|  | 129 | } | 
|  | 130 |  | 
|  | 131 | /* If one descriptor is bogus, we fail completely.  */ | 
|  | 132 | while (i-- > 0) | 
|  | 133 | if (d[i].type != 0) | 
|  | 134 | _hurd_port_free (&d[i].cell->port, | 
|  | 135 | &d[i].ulink, d[i].io_port); | 
|  | 136 | break; | 
|  | 137 | } | 
|  | 138 |  | 
|  | 139 | __mutex_unlock (&_hurd_dtable_lock); | 
|  | 140 | HURD_CRITICAL_END; | 
|  | 141 |  | 
|  | 142 | if (i < nfds) | 
|  | 143 | { | 
|  | 144 | if (sigmask) | 
|  | 145 | __sigprocmask (SIG_SETMASK, &oset, NULL); | 
|  | 146 | errno = EBADF; | 
|  | 147 | return -1; | 
|  | 148 | } | 
|  | 149 |  | 
|  | 150 | lastfd = i - 1; | 
|  | 151 | firstfd = i == 0 ? lastfd : 0; | 
|  | 152 | } | 
|  | 153 | else | 
|  | 154 | { | 
|  | 155 | /* Collect interested descriptors from the user's fd_set arguments. | 
|  | 156 | Use local copies so we can't crash from user bogosity.  */ | 
|  | 157 |  | 
|  | 158 | if (readfds == NULL) | 
|  | 159 | FD_ZERO (&rfds); | 
|  | 160 | else | 
|  | 161 | rfds = *readfds; | 
|  | 162 | if (writefds == NULL) | 
|  | 163 | FD_ZERO (&wfds); | 
|  | 164 | else | 
|  | 165 | wfds = *writefds; | 
|  | 166 | if (exceptfds == NULL) | 
|  | 167 | FD_ZERO (&xfds); | 
|  | 168 | else | 
|  | 169 | xfds = *exceptfds; | 
|  | 170 |  | 
|  | 171 | HURD_CRITICAL_BEGIN; | 
|  | 172 | __mutex_lock (&_hurd_dtable_lock); | 
|  | 173 |  | 
|  | 174 | if (nfds > _hurd_dtablesize) | 
|  | 175 | nfds = _hurd_dtablesize; | 
|  | 176 |  | 
|  | 177 | /* Collect the ports for interesting FDs.  */ | 
|  | 178 | firstfd = lastfd = -1; | 
|  | 179 | for (i = 0; i < nfds; ++i) | 
|  | 180 | { | 
|  | 181 | int type = 0; | 
|  | 182 | if (readfds != NULL && FD_ISSET (i, &rfds)) | 
|  | 183 | type |= SELECT_READ; | 
|  | 184 | if (writefds != NULL && FD_ISSET (i, &wfds)) | 
|  | 185 | type |= SELECT_WRITE; | 
|  | 186 | if (exceptfds != NULL && FD_ISSET (i, &xfds)) | 
|  | 187 | type |= SELECT_URG; | 
|  | 188 | d[i].type = type; | 
|  | 189 | if (type) | 
|  | 190 | { | 
|  | 191 | d[i].cell = _hurd_dtable[i]; | 
|  | 192 | d[i].io_port = _hurd_port_get (&d[i].cell->port, &d[i].ulink); | 
|  | 193 | if (d[i].io_port == MACH_PORT_NULL) | 
|  | 194 | { | 
|  | 195 | /* If one descriptor is bogus, we fail completely.  */ | 
|  | 196 | while (i-- > 0) | 
|  | 197 | if (d[i].type != 0) | 
|  | 198 | _hurd_port_free (&d[i].cell->port, &d[i].ulink, | 
|  | 199 | d[i].io_port); | 
|  | 200 | break; | 
|  | 201 | } | 
|  | 202 | lastfd = i; | 
|  | 203 | if (firstfd == -1) | 
|  | 204 | firstfd = i; | 
|  | 205 | } | 
|  | 206 | } | 
|  | 207 |  | 
|  | 208 | __mutex_unlock (&_hurd_dtable_lock); | 
|  | 209 | HURD_CRITICAL_END; | 
|  | 210 |  | 
|  | 211 | if (i < nfds) | 
|  | 212 | { | 
|  | 213 | if (sigmask) | 
|  | 214 | __sigprocmask (SIG_SETMASK, &oset, NULL); | 
|  | 215 | errno = EBADF; | 
|  | 216 | return -1; | 
|  | 217 | } | 
|  | 218 | } | 
|  | 219 |  | 
|  | 220 |  | 
|  | 221 | err = 0; | 
|  | 222 | got = 0; | 
|  | 223 |  | 
|  | 224 | /* Send them all io_select request messages.  */ | 
|  | 225 |  | 
|  | 226 | if (firstfd == -1) | 
|  | 227 | /* But not if there were no ports to deal with at all. | 
|  | 228 | We are just a pure timeout.  */ | 
|  | 229 | portset = __mach_reply_port (); | 
|  | 230 | else | 
|  | 231 | { | 
|  | 232 | portset = MACH_PORT_NULL; | 
|  | 233 |  | 
|  | 234 | for (i = firstfd; i <= lastfd; ++i) | 
|  | 235 | if (d[i].type) | 
|  | 236 | { | 
|  | 237 | int type = d[i].type; | 
|  | 238 | d[i].reply_port = __mach_reply_port (); | 
|  | 239 | err = __io_select (d[i].io_port, d[i].reply_port, | 
|  | 240 | /* Poll only if there's a single descriptor.  */ | 
|  | 241 | (firstfd == lastfd) ? to : 0, | 
|  | 242 | &type); | 
|  | 243 | switch (err) | 
|  | 244 | { | 
|  | 245 | case MACH_RCV_TIMED_OUT: | 
|  | 246 | /* No immediate response.  This is normal.  */ | 
|  | 247 | err = 0; | 
|  | 248 | if (firstfd == lastfd) | 
|  | 249 | /* When there's a single descriptor, we don't need a | 
|  | 250 | portset, so just pretend we have one, but really | 
|  | 251 | use the single reply port.  */ | 
|  | 252 | portset = d[i].reply_port; | 
|  | 253 | else if (got == 0) | 
|  | 254 | /* We've got multiple reply ports, so we need a port set to | 
|  | 255 | multiplex them.  */ | 
|  | 256 | { | 
|  | 257 | /* We will wait again for a reply later.  */ | 
|  | 258 | if (portset == MACH_PORT_NULL) | 
|  | 259 | /* Create the portset to receive all the replies on.  */ | 
|  | 260 | err = __mach_port_allocate (__mach_task_self (), | 
|  | 261 | MACH_PORT_RIGHT_PORT_SET, | 
|  | 262 | &portset); | 
|  | 263 | if (! err) | 
|  | 264 | /* Put this reply port in the port set.  */ | 
|  | 265 | __mach_port_move_member (__mach_task_self (), | 
|  | 266 | d[i].reply_port, portset); | 
|  | 267 | } | 
|  | 268 | break; | 
|  | 269 |  | 
|  | 270 | default: | 
|  | 271 | /* No other error should happen.  Callers of select | 
|  | 272 | don't expect to see errors, so we simulate | 
|  | 273 | readiness of the erring object and the next call | 
|  | 274 | hopefully will get the error again.  */ | 
|  | 275 | type = SELECT_ALL; | 
|  | 276 | /* FALLTHROUGH */ | 
|  | 277 |  | 
|  | 278 | case 0: | 
|  | 279 | /* We got an answer.  */ | 
|  | 280 | if ((type & SELECT_ALL) == 0) | 
|  | 281 | /* Bogus answer; treat like an error, as a fake positive.  */ | 
|  | 282 | type = SELECT_ALL; | 
|  | 283 |  | 
|  | 284 | /* This port is already ready already.  */ | 
|  | 285 | d[i].type &= type; | 
|  | 286 | d[i].type |= SELECT_RETURNED; | 
|  | 287 | ++got; | 
|  | 288 | break; | 
|  | 289 | } | 
|  | 290 | _hurd_port_free (&d[i].cell->port, &d[i].ulink, d[i].io_port); | 
|  | 291 | } | 
|  | 292 | } | 
|  | 293 |  | 
|  | 294 | /* Now wait for reply messages.  */ | 
|  | 295 | if (!err && got == 0) | 
|  | 296 | { | 
|  | 297 | /* Now wait for io_select_reply messages on PORT, | 
|  | 298 | timing out as appropriate.  */ | 
|  | 299 |  | 
|  | 300 | union | 
|  | 301 | { | 
|  | 302 | mach_msg_header_t head; | 
|  | 303 | #ifdef MACH_MSG_TRAILER_MINIMUM_SIZE | 
|  | 304 | struct | 
|  | 305 | { | 
|  | 306 | mach_msg_header_t head; | 
|  | 307 | NDR_record_t ndr; | 
|  | 308 | error_t err; | 
|  | 309 | } error; | 
|  | 310 | struct | 
|  | 311 | { | 
|  | 312 | mach_msg_header_t head; | 
|  | 313 | NDR_record_t ndr; | 
|  | 314 | error_t err; | 
|  | 315 | int result; | 
|  | 316 | mach_msg_trailer_t trailer; | 
|  | 317 | } success; | 
|  | 318 | #else | 
|  | 319 | struct | 
|  | 320 | { | 
|  | 321 | mach_msg_header_t head; | 
|  | 322 | union typeword err_type; | 
|  | 323 | error_t err; | 
|  | 324 | } error; | 
|  | 325 | struct | 
|  | 326 | { | 
|  | 327 | mach_msg_header_t head; | 
|  | 328 | union typeword err_type; | 
|  | 329 | error_t err; | 
|  | 330 | union typeword result_type; | 
|  | 331 | int result; | 
|  | 332 | } success; | 
|  | 333 | #endif | 
|  | 334 | } msg; | 
|  | 335 | mach_msg_option_t options = (timeout == NULL ? 0 : MACH_RCV_TIMEOUT); | 
|  | 336 | error_t msgerr; | 
|  | 337 | while ((msgerr = __mach_msg (&msg.head, | 
|  | 338 | MACH_RCV_MSG | MACH_RCV_INTERRUPT | options, | 
|  | 339 | 0, sizeof msg, portset, to, | 
|  | 340 | MACH_PORT_NULL)) == MACH_MSG_SUCCESS) | 
|  | 341 | { | 
|  | 342 | /* We got a message.  Decode it.  */ | 
|  | 343 | #define IO_SELECT_REPLY_MSGID (21012 + 100) /* XXX */ | 
|  | 344 | #ifdef MACH_MSG_TYPE_BIT | 
|  | 345 | const union typeword inttype = | 
|  | 346 | { type: | 
|  | 347 | { MACH_MSG_TYPE_INTEGER_T, sizeof (integer_t) * 8, 1, 1, 0, 0 } | 
|  | 348 | }; | 
|  | 349 | #endif | 
|  | 350 | if (msg.head.msgh_id == IO_SELECT_REPLY_MSGID && | 
|  | 351 | msg.head.msgh_size >= sizeof msg.error && | 
|  | 352 | !(msg.head.msgh_bits & MACH_MSGH_BITS_COMPLEX) && | 
|  | 353 | #ifdef MACH_MSG_TYPE_BIT | 
|  | 354 | msg.error.err_type.word == inttype.word | 
|  | 355 | #endif | 
|  | 356 | ) | 
|  | 357 | { | 
|  | 358 | /* This is a properly formatted message so far. | 
|  | 359 | See if it is a success or a failure.  */ | 
|  | 360 | if (msg.error.err == EINTR && | 
|  | 361 | msg.head.msgh_size == sizeof msg.error) | 
|  | 362 | { | 
|  | 363 | /* EINTR response; poll for further responses | 
|  | 364 | and then return quickly.  */ | 
|  | 365 | err = EINTR; | 
|  | 366 | goto poll; | 
|  | 367 | } | 
|  | 368 | if (msg.error.err || | 
|  | 369 | msg.head.msgh_size != sizeof msg.success || | 
|  | 370 | #ifdef MACH_MSG_TYPE_BIT | 
|  | 371 | msg.success.result_type.word != inttype.word || | 
|  | 372 | #endif | 
|  | 373 | (msg.success.result & SELECT_ALL) == 0) | 
|  | 374 | { | 
|  | 375 | /* Error or bogus reply.  Simulate readiness.  */ | 
|  | 376 | __mach_msg_destroy (&msg.head); | 
|  | 377 | msg.success.result = SELECT_ALL; | 
|  | 378 | } | 
|  | 379 |  | 
|  | 380 | /* Look up the respondent's reply port and record its | 
|  | 381 | readiness.  */ | 
|  | 382 | { | 
|  | 383 | int had = got; | 
|  | 384 | if (firstfd != -1) | 
|  | 385 | for (i = firstfd; i <= lastfd; ++i) | 
|  | 386 | if (d[i].type | 
|  | 387 | && d[i].reply_port == msg.head.msgh_local_port) | 
|  | 388 | { | 
|  | 389 | d[i].type &= msg.success.result; | 
|  | 390 | d[i].type |= SELECT_RETURNED; | 
|  | 391 | ++got; | 
|  | 392 | } | 
|  | 393 | assert (got > had); | 
|  | 394 | } | 
|  | 395 | } | 
|  | 396 |  | 
|  | 397 | if (msg.head.msgh_remote_port != MACH_PORT_NULL) | 
|  | 398 | __mach_port_deallocate (__mach_task_self (), | 
|  | 399 | msg.head.msgh_remote_port); | 
|  | 400 |  | 
|  | 401 | if (got) | 
|  | 402 | poll: | 
|  | 403 | { | 
|  | 404 | /* Poll for another message.  */ | 
|  | 405 | to = 0; | 
|  | 406 | options |= MACH_RCV_TIMEOUT; | 
|  | 407 | } | 
|  | 408 | } | 
|  | 409 |  | 
|  | 410 | if (msgerr == MACH_RCV_INTERRUPTED) | 
|  | 411 | /* Interruption on our side (e.g. signal reception).  */ | 
|  | 412 | err = EINTR; | 
|  | 413 |  | 
|  | 414 | if (got) | 
|  | 415 | /* At least one descriptor is known to be ready now, so we will | 
|  | 416 | return success.  */ | 
|  | 417 | err = 0; | 
|  | 418 | } | 
|  | 419 |  | 
|  | 420 | if (firstfd != -1) | 
|  | 421 | for (i = firstfd; i <= lastfd; ++i) | 
|  | 422 | if (d[i].type) | 
|  | 423 | __mach_port_destroy (__mach_task_self (), d[i].reply_port); | 
|  | 424 | if (firstfd == -1 || (firstfd != lastfd && portset != MACH_PORT_NULL)) | 
|  | 425 | /* Destroy PORTSET, but only if it's not actually the reply port for a | 
|  | 426 | single descriptor (in which case it's destroyed in the previous loop; | 
|  | 427 | not doing it here is just a bit more efficient).  */ | 
|  | 428 | __mach_port_destroy (__mach_task_self (), portset); | 
|  | 429 |  | 
|  | 430 | if (err) | 
|  | 431 | { | 
|  | 432 | if (sigmask) | 
|  | 433 | __sigprocmask (SIG_SETMASK, &oset, NULL); | 
|  | 434 | return __hurd_fail (err); | 
|  | 435 | } | 
|  | 436 |  | 
|  | 437 | if (pollfds) | 
|  | 438 | /* Fill in the `revents' members of the user's array.  */ | 
|  | 439 | for (i = 0; i < nfds; ++i) | 
|  | 440 | { | 
|  | 441 | int type = d[i].type; | 
|  | 442 | int_fast16_t revents = 0; | 
|  | 443 |  | 
|  | 444 | if (type & SELECT_RETURNED) | 
|  | 445 | { | 
|  | 446 | if (type & SELECT_READ) | 
|  | 447 | revents |= POLLIN; | 
|  | 448 | if (type & SELECT_WRITE) | 
|  | 449 | revents |= POLLOUT; | 
|  | 450 | if (type & SELECT_URG) | 
|  | 451 | revents |= POLLPRI; | 
|  | 452 | } | 
|  | 453 |  | 
|  | 454 | pollfds[i].revents = revents; | 
|  | 455 | } | 
|  | 456 | else | 
|  | 457 | { | 
|  | 458 | /* Below we recalculate GOT to include an increment for each operation | 
|  | 459 | allowed on each fd.  */ | 
|  | 460 | got = 0; | 
|  | 461 |  | 
|  | 462 | /* Set the user bitarrays.  We only ever have to clear bits, as all | 
|  | 463 | desired ones are initially set.  */ | 
|  | 464 | if (firstfd != -1) | 
|  | 465 | for (i = firstfd; i <= lastfd; ++i) | 
|  | 466 | { | 
|  | 467 | int type = d[i].type; | 
|  | 468 |  | 
|  | 469 | if ((type & SELECT_RETURNED) == 0) | 
|  | 470 | type = 0; | 
|  | 471 |  | 
|  | 472 | if (type & SELECT_READ) | 
|  | 473 | got++; | 
|  | 474 | else if (readfds) | 
|  | 475 | FD_CLR (i, readfds); | 
|  | 476 | if (type & SELECT_WRITE) | 
|  | 477 | got++; | 
|  | 478 | else if (writefds) | 
|  | 479 | FD_CLR (i, writefds); | 
|  | 480 | if (type & SELECT_URG) | 
|  | 481 | got++; | 
|  | 482 | else if (exceptfds) | 
|  | 483 | FD_CLR (i, exceptfds); | 
|  | 484 | } | 
|  | 485 | } | 
|  | 486 |  | 
|  | 487 | if (sigmask && __sigprocmask (SIG_SETMASK, &oset, NULL)) | 
|  | 488 | return -1; | 
|  | 489 |  | 
|  | 490 | return got; | 
|  | 491 | } |