diff -ur linux-2.2.9-np/Makefile linux/Makefile --- linux-2.2.9-np/Makefile Wed May 26 16:06:16 1999 +++ linux/Makefile Fri May 28 12:58:31 1999 @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 2 SUBLEVEL = 9 -EXTRAVERSION = -np +EXTRAVERSION = -np-hint ARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/) diff -ur linux-2.2.9-np/arch/i386/kernel/apm.c linux/arch/i386/kernel/apm.c --- linux-2.2.9-np/arch/i386/kernel/apm.c Fri Jan 15 01:57:25 1999 +++ linux/arch/i386/kernel/apm.c Fri May 28 12:58:31 1999 @@ -278,7 +278,7 @@ static int do_open(struct inode *, struct file *); static int do_release(struct inode *, struct file *); static ssize_t do_read(struct file *, char *, size_t , loff_t *); -static unsigned int do_poll(struct file *, poll_table *); +static unsigned int do_poll(struct file *, struct poll_table_entry *); static int do_ioctl(struct inode *, struct file *, u_int, u_long); static int apm_get_info(char *, char **, off_t, int, int); @@ -1062,7 +1062,7 @@ return 0; } -static unsigned int do_poll(struct file *fp, poll_table * wait) +static unsigned int do_poll(struct file *fp, struct poll_table_entry * wait) { struct apm_bios_struct * as; diff -ur linux-2.2.9-np/drivers/char/n_tty.c linux/drivers/char/n_tty.c --- linux-2.2.9-np/drivers/char/n_tty.c Sat May 22 13:43:03 1999 +++ linux/drivers/char/n_tty.c Fri May 28 12:58:31 1999 @@ -1123,7 +1123,7 @@ return (b - buf) ? b - buf : retval; } -static unsigned int normal_poll(struct tty_struct * tty, struct file * file, poll_table *wait) +static unsigned int normal_poll(struct tty_struct * tty, struct file * file, struct poll_table_entry *wait) { unsigned int mask = 0; diff -ur linux-2.2.9-np/drivers/char/pc_keyb.c linux/drivers/char/pc_keyb.c --- linux-2.2.9-np/drivers/char/pc_keyb.c Sat May 22 13:43:03 1999 +++ linux/drivers/char/pc_keyb.c Fri May 28 12:58:31 1999 @@ -925,7 +925,7 @@ return retval; } -static unsigned int aux_poll(struct file *file, poll_table * wait) +static unsigned int aux_poll(struct file *file, struct poll_table_entry * wait) { poll_wait(file, &queue->proc_list, wait); if (!queue_empty()) diff -ur linux-2.2.9-np/drivers/char/random.c linux/drivers/char/random.c --- linux-2.2.9-np/drivers/char/random.c Thu Dec 31 15:03:49 1998 +++ linux/drivers/char/random.c Fri May 28 12:58:31 1999 @@ -431,7 +431,8 @@ size_t nbytes, loff_t *ppos); static ssize_t random_read_unlimited(struct file * file, char * buf, size_t nbytes, loff_t *ppos); -static unsigned int random_poll(struct file *file, poll_table * wait); +static unsigned int random_poll(struct file *file, + struct poll_table_entry * wait); static ssize_t random_write(struct file * file, const char * buffer, size_t count, loff_t *ppos); static int random_ioctl(struct inode * inode, struct file * file, @@ -1360,7 +1361,7 @@ } static unsigned int -random_poll(struct file *file, poll_table * wait) +random_poll(struct file *file, struct poll_table_entry * wait) { unsigned int mask; diff -ur linux-2.2.9-np/drivers/char/rtc.c linux/drivers/char/rtc.c --- linux-2.2.9-np/drivers/char/rtc.c Fri Jan 15 01:58:47 1999 +++ linux/drivers/char/rtc.c Fri May 28 12:58:31 1999 @@ -81,7 +81,7 @@ static int rtc_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg); -static unsigned int rtc_poll(struct file *file, poll_table *wait); +static unsigned int rtc_poll(struct file *file, struct poll_table_entry *wait); void get_rtc_time (struct rtc_time *rtc_tm); void get_rtc_alm_time (struct rtc_time *alm_tm); @@ -485,7 +485,7 @@ return 0; } -static unsigned int rtc_poll(struct file *file, poll_table *wait) +static unsigned int rtc_poll(struct file *file, struct poll_table_entry *wait) { poll_wait(file, &rtc_wait, wait); if (rtc_irq_data != 0) diff -ur linux-2.2.9-np/drivers/char/tty_io.c linux/drivers/char/tty_io.c --- linux-2.2.9-np/drivers/char/tty_io.c Sat May 22 13:42:58 1999 +++ linux/drivers/char/tty_io.c Fri May 28 12:58:31 1999 @@ -119,7 +119,7 @@ static ssize_t tty_read(struct file *, char *, size_t, loff_t *); static ssize_t tty_write(struct file *, const char *, size_t, loff_t *); -static unsigned int tty_poll(struct file *, poll_table *); +static unsigned int tty_poll(struct file *, struct poll_table_entry *); static int tty_open(struct inode *, struct file *); static int tty_release(struct inode *, struct file *); int tty_ioctl(struct inode * inode, struct file * file, @@ -327,7 +327,7 @@ return -EIO; } -static unsigned int hung_up_tty_poll(struct file * filp, poll_table * wait) +static unsigned int hung_up_tty_poll(struct file * filp, struct poll_table_entry * wait) { return POLLIN | POLLOUT | POLLERR | POLLHUP | POLLRDNORM | POLLWRNORM; } @@ -1371,7 +1371,7 @@ return 0; } -static unsigned int tty_poll(struct file * filp, poll_table * wait) +static unsigned int tty_poll(struct file * filp, struct poll_table_entry * wait) { struct tty_struct * tty; diff -ur linux-2.2.9-np/fs/open.c linux/fs/open.c --- linux-2.2.9-np/fs/open.c Wed May 26 16:06:16 1999 +++ linux/fs/open.c Fri May 28 13:00:21 1999 @@ -9,6 +9,7 @@ #include #include #include +#include #include @@ -845,6 +846,11 @@ filp = fcheck(fd); if (filp) { struct files_struct * files = current->files; + if (fd < files->max_poll && POLLINUSE(files->poll_fds, fd)) { + FD_CLR(fd, files->poll_fds); + poll_free_wait(&files->poll_wait, fd); + poll_remove_backmap(&filp->f_backmap, fd, files); + } files->fd[fd] = NULL; put_unused_fd(fd); FD_CLR(fd, files->close_on_exec); diff -ur linux-2.2.9-np/fs/pipe.c linux/fs/pipe.c --- linux-2.2.9-np/fs/pipe.c Fri Nov 13 13:07:26 1998 +++ linux/fs/pipe.c Fri May 28 12:58:31 1999 @@ -177,7 +177,7 @@ } } -static unsigned int pipe_poll(struct file * filp, poll_table * wait) +static unsigned int pipe_poll(struct file * filp, struct poll_table_entry * wait) { unsigned int mask; struct inode * inode = filp->f_dentry->d_inode; @@ -198,7 +198,7 @@ * Argh! Why does SunOS have to have different select() behaviour * for pipes and FIFOs? Hate, hate, hate! SunOS lacks POLLHUP. */ -static unsigned int fifo_poll(struct file * filp, poll_table * wait) +static unsigned int fifo_poll(struct file * filp, struct poll_table_entry * wait) { unsigned int mask; struct inode * inode = filp->f_dentry->d_inode; @@ -232,7 +232,7 @@ return pipe_read(filp,buf,count,ppos); } -static unsigned int connect_poll(struct file * filp, poll_table * wait) +static unsigned int connect_poll(struct file * filp, struct poll_table_entry * wait) { struct inode * inode = filp->f_dentry->d_inode; diff -ur linux-2.2.9-np/fs/proc/kmsg.c linux/fs/proc/kmsg.c --- linux-2.2.9-np/fs/proc/kmsg.c Tue Nov 17 13:09:00 1998 +++ linux/fs/proc/kmsg.c Fri May 28 12:58:31 1999 @@ -36,7 +36,7 @@ return do_syslog(2,buf,count); } -static unsigned int kmsg_poll(struct file *file, poll_table * wait) +static unsigned int kmsg_poll(struct file *file, struct poll_table_entry * wait) { poll_wait(file, &log_wait, wait); if (log_size) diff -ur linux-2.2.9-np/fs/select.c linux/fs/select.c --- linux-2.2.9-np/fs/select.c Wed May 26 16:06:16 1999 +++ linux/fs/select.c Fri May 28 13:17:41 1999 @@ -27,90 +27,142 @@ * understand what I'm doing here, then you understand how the linux * sleep/wakeup mechanism works. * - * Two very simple procedures, poll_wait() and free_wait() make all the - * work. poll_wait() is an inline-function defined in , - * as all select/poll functions have to call it to add an entry to the - * poll table. + * Two very simple procedures, poll_wait() and poll_free_wait() make + * all the work. poll_wait() is an inline-function defined in + * , as all select/poll functions have to call it to add + * an entry to the poll table. */ /* - * I rewrote this again to make the poll_table size variable, take some - * more shortcuts, improve responsiveness, and remove another race that - * Linus noticed. -- jrs + * Return an area poll_wait() can use to store the wait_queue entry + * in. This information is kept for the whole lifetime of the fd. + * next will be upated to optimize for ordered fd sets. */ -static void free_wait(poll_table * p) +struct poll_table_entry *get_wait(poll_table **master, + poll_table **next, int fd) { - struct poll_table_entry * entry; - poll_table *old; + poll_table *oldtable = NULL, *table; + struct poll_table_entry *entry; - while (p) { - entry = p->entry + p->nr; - while (p->nr > 0) { - p->nr--; - entry--; - remove_wait_queue(entry->wait_address,&entry->wait); - fput(entry->filp); + int off = fd % __MAX_POLL_TABLE_ENTRIES; + + table = (next && *next) ? *next : *master; + while (table != NULL && table->basefd < fd - off) { + oldtable = table; + table = table->next; + } + + if (table == NULL || table->basefd > fd) { + poll_table *tmp; + tmp = (poll_table *) __get_free_page(GFP_KERNEL); + if (!tmp) { + printk("get_wait: no memory\n"); + return NULL; } - old = p; - p = p->next; - free_page((unsigned long) old); + tmp->entry = (struct poll_table_entry *)(tmp + 1); + tmp->ref = 0; + tmp->basefd = fd - off; + tmp->next = table; + if (oldtable) + oldtable->next = tmp; + else + *master = tmp; + + table = tmp; + } + + table->ref++; + if (next) + *next = table; + + entry = table->entry + off; + entry->wait_address = NULL; + entry->next = NULL; + + return entry; +} + +/* + * Remove the poll_table_entry corresponding to fd from the poll_table + */ + +void poll_free_wait(poll_table ** p, int fd) +{ + poll_table *oldtable = NULL, *table = *p; + struct poll_table_entry *entry, *old = NULL; + + int off = fd % __MAX_POLL_TABLE_ENTRIES; + + while (table != NULL && table->basefd < fd - off) { + oldtable = table; + table = table->next; + } + + if (!table || table->basefd > fd) { + printk("poll_free_wait: no entry for %d,%p\n", fd, current); + return; + } + + entry = table->entry + off; + while (entry && entry->wait_address) { + remove_wait_queue(entry->wait_address, &entry->wait); + fput(entry->filp); + if (old != NULL) + kfree(old); + entry = entry->next; + old = entry; + } + table->ref--; + + if (table->ref == 0) { + if (oldtable) + oldtable->next = table->next; + else + *p = table->next; + + free_page((unsigned long) table); } } -void __pollwait(struct file * filp, struct wait_queue ** wait_address, poll_table *p) +void __pollwait(struct file * filp, struct wait_queue ** wait_address, struct poll_table_entry *p) { - for (;;) { - if (p->nr < __MAX_POLL_TABLE_ENTRIES) { - struct poll_table_entry * entry; -ok_table: - entry = p->entry + p->nr; - entry->filp = filp; - filp->f_count++; - entry->wait_address = wait_address; - entry->wait.task = current; - entry->wait.next = NULL; - add_wait_queue(wait_address,&entry->wait); - p->nr++; + if (p->wait_address) { + struct poll_table_entry *tmp; + if (!(tmp = kmalloc(sizeof(*p), GFP_KERNEL))) { + printk("pollwait: out of memory\n"); return; } - if (p->next == NULL) { - poll_table *tmp = (poll_table *) __get_free_page(GFP_KERNEL); - if (!tmp) - return; - tmp->nr = 0; - tmp->entry = (struct poll_table_entry *)(tmp + 1); - tmp->next = NULL; - p->next = tmp; - p = tmp; - goto ok_table; - } - p = p->next; - } + tmp->next = p->next; + p->next = tmp; + p = tmp; + } + p->filp = filp; + filp->f_count++; + p->wait_address = wait_address; + p->wait.task = current; + p->wait.next = NULL; + add_wait_queue(wait_address, &p->wait); } -#define __IN(fds, n) (fds->in + n) -#define __OUT(fds, n) (fds->out + n) -#define __EX(fds, n) (fds->ex + n) -#define __RES_IN(fds, n) (fds->res_in + n) -#define __RES_OUT(fds, n) (fds->res_out + n) -#define __RES_EX(fds, n) (fds->res_ex + n) - -#define BITS(fds, n) (*__IN(fds, n)|*__OUT(fds, n)|*__EX(fds, n)) - static int max_select_fd(unsigned long n, fd_set_bits *fds) { unsigned long *open_fds; - unsigned long set; + unsigned long set, *in, *out, *ex; int max; /* handle last in-complete long-word first */ set = ~(~0UL << (n & (__NFDBITS-1))); n /= __NFDBITS; + + in = fds->in + n; + out = fds->out + n; + ex = fds->ex + n; + open_fds = current->files->open_fds->fds_bits+n; max = 0; if (set) { - set &= BITS(fds, n); + set &= (*in | *out | *ex); if (set) { if (!(set & ~*open_fds)) goto get_max; @@ -120,7 +172,12 @@ while (n) { open_fds--; n--; - set = BITS(fds, n); + + in--; + out--; + ex--; + + set = (*in | *out | *ex); if (!set) continue; if (set & ~*open_fds) @@ -147,23 +204,110 @@ #define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR) #define POLLEX_SET (POLLPRI) -int do_select(int n, fd_set_bits *fds, long *timeout) +/* + * Simply expand a fd_set * structure and copy the content. + */ + +#define COPYSET(np, op, nold, nnew) do { \ + memcpy(np, op, (nold) / 8); \ + memset(&(np)->fds_bits[(nold) / __NFDBITS], 0, \ + ((nnew) - (nold)) / 8); \ + } while (0); + +#define POLL_HINTS_IN(x) (fd_set *)(x)->poll_hints +#define POLL_HINTS_OUT(x) (fd_set *)&(x)->poll_hints->fds_bits[(x)->max_poll / __NFDBITS] +#define POLL_HINTS_EX(x) (fd_set *)&(x)->poll_hints->fds_bits[2 * (x)->max_poll / __NFDBITS] + +#define INH(x) (in & (inh | poll_in->fds_bits[x])) +#define OUTH(x) (out & (outh | poll_out->fds_bits[x])) +#define EXH(x) (ex & (exh | poll_ex->fds_bits[x])) + +int expand_poll_hints(struct files_struct *files, int newnr) { - poll_table *wait_table, *wait; - int retval, i, off; - long __timeout = *timeout; + int npoll, i, retval = 0; + fd_set *new_pollset = 0, *new_hintset = 0; + unsigned long flags; + + npoll = files->max_poll; + do { + if (npoll < (PAGE_SIZE * 8)) + npoll = PAGE_SIZE * 8; + else { + npoll = npoll * 2; + if (npoll > NR_OPEN) + npoll = NR_OPEN; + } + } while (npoll < newnr); + + npoll = ((npoll + __NFDBITS - 1) / __NFDBITS) * __NFDBITS; + + retval = -ENOMEM; + if (!(new_pollset = alloc_fdset(npoll))) + goto out; + if (!(new_hintset = alloc_fdset(3 * npoll))) + goto out; + retval = 0; + + if (files->max_poll >= npoll) { + /* XXX - this has been in other code also */ + printk("expand_poll_hints: called concurrently\n"); + goto out; + } - wait = wait_table = NULL; - if (__timeout) { - wait_table = (poll_table *) __get_free_page(GFP_KERNEL); - if (!wait_table) - return -ENOMEM; - - wait_table->nr = 0; - wait_table->entry = (struct poll_table_entry *)(wait_table + 1); - wait_table->next = NULL; - wait = wait_table; + spin_lock_irqsave(&files->poll_hint_lock, flags); + i = npoll / __NFDBITS; + if (i) { + int nold = files->max_poll; + fd_set *oldset, *newset; + + /* Copy the fds we are interested in */ + COPYSET(new_pollset, files->poll_fds, nold, npoll); + + /* Copy the fds we have events for in, our and ex */ + COPYSET(new_hintset, files->poll_hints, nold, npoll); + oldset = (fd_set *)&files->poll_hints->fds_bits[nold / __NFDBITS]; + newset = (fd_set *)&new_hintset->fds_bits[npoll / __NFDBITS]; + COPYSET(newset, oldset, nold, npoll); + oldset = (fd_set *)&files->poll_hints->fds_bits[2 * nold / __NFDBITS]; + newset = (fd_set *)&new_hintset->fds_bits[2 * npoll / __NFDBITS]; + COPYSET(newset, oldset, nold, npoll); } + + /* XXX - free_fdset has built in knowledge. Bah. */ + if (files->max_poll / 8 > __FD_SETSIZE) { + free_fdset(files->poll_fds, files->max_poll); + free_fdset(files->poll_hints, 3 * files->max_poll); + } + files->max_poll = npoll; + files->poll_fds = new_pollset; + files->poll_hints = new_hintset; + spin_unlock_irqrestore(&files->poll_hint_lock, flags); + + return 0; + + out: + if (new_pollset) + free_fdset(new_pollset, npoll); + if (new_hintset) + free_fdset(new_hintset, 3 * npoll); + return retval; +} + +int do_select(int n, fd_set_bits *fds, long *timeout) +{ + struct files_struct *files = current->files; + fd_set *poll_in, *poll_out, *poll_ex, *poll_fds; + poll_table *wait_table; + struct poll_table_entry *wait; + int retval, i, j, off, fd; + unsigned long newfds; + long __timeout = *timeout; + unsigned long *inp, *outp, *exp, *rinp, *routp, *rexp; + unsigned long in, out, ex, bits, bit; + unsigned long inh, outh, exh; + unsigned long mask; + struct file *file; + unsigned long flags; lock_kernel(); @@ -172,45 +316,138 @@ goto out; n = retval; retval = 0; + + if (n >= files->max_poll) { + /* + * We are selecting more fds than before, we need to + * increase our hints. + */ + retval = expand_poll_hints(files, n); + if (retval < 0) + goto out; + retval = 0; + } + + /* + * Now we need to enter the new fds into the hints fdset, so + * that we test them also. + */ + + poll_in = POLL_HINTS_IN(files); + poll_out = POLL_HINTS_OUT(files); + poll_ex = POLL_HINTS_EX(files); + poll_fds = files->poll_fds; + for (;;) { + inp = fds->in; + outp = fds->out; + exp = fds->ex; + rinp = fds->res_in; + routp = fds->res_out; + rexp = fds->res_ex; + + wait_table = NULL; current->state = TASK_INTERRUPTIBLE; - for (i = 0 ; i < n; i++) { - unsigned long bit = BIT(i); - unsigned long mask; - struct file *file; + for (i = 0, off = 0; i < n; i += __NFDBITS, off++, + inp++, outp++, exp++, rinp++, routp++, rexp++) { + /* XXX the addition is probably slow */ + in = *inp; + out = *outp; + ex = *exp; + bits = (in|ex|out); + + newfds = bits & ~poll_fds->fds_bits[off]; + + spin_lock_irqsave(&files->poll_hint_lock, flags); + /* Reduce calls to spin_lock, update from last loop */ + if (off > 0) { + poll_in->fds_bits[off-1] |= inh; + poll_out->fds_bits[off-1] |= outh; + poll_ex->fds_bits[off-1] |= exh; + } + inh = poll_in->fds_bits[off] | newfds; + outh = poll_out->fds_bits[off] | newfds; + exh = poll_ex->fds_bits[off] | newfds; + poll_in->fds_bits[off] = 0; + poll_out->fds_bits[off] = 0; + poll_ex->fds_bits[off] = 0; + spin_unlock_irqrestore(&files->poll_hint_lock, flags); + + while ((j = ffs(bits)) && (fd = i + --j) < n) { + bit = (1 << j); + bits &= ~bit; + + /* + * The hints can be updated while we are in + * this loop. + */ + if (!((bit & INH(off)) || (bit & OUTH(off)) || + (bit & EXH(off)))) + continue; + + /* + * The poll_wait routine will increment + * f_count if the file is added to the wait + * table, so we don't need to increment it now. + */ + file = fcheck(fd); + if (!file) /* POLLNVAL, but not used */ + continue; - off = i / __NFDBITS; - if (!(bit & BITS(fds, off))) - continue; - /* - * The poll_wait routine will increment f_count if - * the file is added to the wait table, so we don't - * need to increment it now. - */ - file = fcheck(i); - mask = POLLNVAL; - if (file) { mask = DEFAULT_POLLMASK; - if (file->f_op && file->f_op->poll) + if (file->f_op && file->f_op->poll) { + poll_fds->fds_bits[off] |= bit; + if (newfds & bit) { + wait = get_wait(&files->poll_wait, + &wait_table, fd); + poll_backmap(fd, &file->f_backmap); + } else + wait = NULL; + mask = file->f_op->poll(file, wait); + } + + /* + * Check if the returned poll supports hinting, + * and set the hints accordingly. + */ + if (mask & POLLHINT) { + if (mask & POLLIN_SET) + inh |= bit; + else + inh &= ~bit; + if (mask & POLLOUT_SET) + outh |= bit; + else + outh &= ~bit; + if (mask & POLLEX_SET) + exh |= bit; + else + exh &= ~bit; + } + + if ((mask & POLLIN_SET) && (bit & in)) { + *rinp |= bit; + retval++; + } + if ((mask & POLLOUT_SET) && (bit & out)) { + *routp |= bit; + retval++; + } + if ((mask & POLLEX_SET) && (bit & ex)) { + *rexp |= bit; + retval++; + } } - if ((mask & POLLIN_SET) && ISSET(bit, __IN(fds,off))) { - SET(bit, __RES_IN(fds,off)); - retval++; - wait = NULL; - } - if ((mask & POLLOUT_SET) && ISSET(bit, __OUT(fds,off))) { - SET(bit, __RES_OUT(fds,off)); - retval++; - wait = NULL; - } - if ((mask & POLLEX_SET) && ISSET(bit, __EX(fds,off))) { - SET(bit, __RES_EX(fds,off)); - retval++; - wait = NULL; - } } - wait = NULL; + /* Last hintset needs to be updated */ + if (off > 0) { + spin_lock_irqsave(&files->poll_hint_lock, flags); + poll_in->fds_bits[off-1] |= inh; + poll_out->fds_bits[off-1] |= outh; + poll_ex->fds_bits[off-1] |= exh; + spin_unlock_irqrestore(&files->poll_hint_lock, flags); + } if (retval || !__timeout || signal_pending(current)) break; __timeout = schedule_timeout(__timeout); @@ -218,9 +455,6 @@ current->state = TASK_RUNNING; out: - if (*timeout) - free_wait(wait_table); - /* * Up-to-date the caller timeout. */ @@ -339,54 +573,145 @@ return ret; } -static int do_poll(unsigned int nfds, struct pollfd *fds, poll_table *wait, - long timeout) +static int do_poll(unsigned int nfds, struct pollfd *fds, long timeout) { - int count = 0; + int count = 0, retval = 0, off, bit, inuse; + unsigned long inh, outh, exh, flags; + int hoff, oldfd, fd; + struct files_struct *files = current->files; + fd_set *poll_in, *poll_out, *poll_ex; + struct file *file; + struct poll_table_entry *wait; for (;;) { unsigned int j; struct pollfd * fdpnt; + hoff = -1; + fd = -1; + poll_in = POLL_HINTS_IN(files); + poll_out = POLL_HINTS_OUT(files); + poll_ex = POLL_HINTS_EX(files); current->state = TASK_INTERRUPTIBLE; for (fdpnt = fds, j = 0; j < nfds; j++, fdpnt++) { - int fd; unsigned int mask; + oldfd = fd; mask = 0; + fd = fdpnt->fd; - if (fd >= 0) { - /* poll_wait increments f_count if needed */ - struct file * file = fcheck(fd); - mask = POLLNVAL; - if (file != NULL) { - mask = DEFAULT_POLLMASK; - if (file->f_op && file->f_op->poll) - mask = file->f_op->poll(file, wait); - mask &= fdpnt->events | POLLERR | POLLHUP; + if (fd < 0) { + fdpnt->revents = mask; + continue; + } + + /* poll_wait increments f_count if needed */ + file = fcheck(fd); + + off = fd / __NFDBITS; + bit = BIT(fd); + + if (fd >= files->max_poll) { + retval = expand_poll_hints(files, fd); + if (retval < 0) + goto out; + poll_in = POLL_HINTS_IN(files); + poll_out = POLL_HINTS_OUT(files); + poll_ex = POLL_HINTS_EX(files); + } + + if (hoff == -1 || hoff != (fd & (~(__NFDBITS-1)))) { + hoff = fd & (~(__NFDBITS-1)); + spin_lock_irqsave(&files->poll_hint_lock, flags); + if (oldfd != -1) { + int off = oldfd / __NFDBITS; + poll_in->fds_bits[off] |= inh; + poll_out->fds_bits[off] |= outh; + poll_ex->fds_bits[off] |= exh; } - if (mask) { - wait = NULL; - count++; + inh = poll_in->fds_bits[off]; + outh = poll_in->fds_bits[off]; + exh = poll_in->fds_bits[off]; + poll_in->fds_bits[off] = 0; + poll_out->fds_bits[off] = 0; + poll_ex->fds_bits[off] = 0; + spin_unlock_irqrestore(&files->poll_hint_lock, flags); + } + + if ((bit & files->poll_fds->fds_bits[off]) && + !(((fdpnt->events & POLLIN_SET) && + (bit & (inh | poll_in->fds_bits[off]))) || + ((fdpnt->events & POLLOUT_SET) && + (bit & (outh | poll_out->fds_bits[off]))) || + ((fdpnt->events & POLLEX_SET) && + (bit & (exh | poll_ex->fds_bits[off]))))) + continue; + + inuse = bit & files->poll_fds->fds_bits[off]; + + mask = POLLNVAL; + if (file != NULL) { + mask = DEFAULT_POLLMASK; + if (file->f_op && file->f_op->poll) { + /* Mark fd as being in use */ + files->poll_fds->fds_bits[off] |= bit; + if (!inuse) { + wait = get_wait(&files->poll_wait, NULL, fd); + poll_backmap(fd, &file->f_backmap); + } else + wait = NULL; + mask = file->f_op->poll(file, wait); } + mask &= fdpnt->events | POLLERR | POLLHUP | POLLHINT; } + /* See if device supports hinting */ + if (mask & POLLHINT) { + mask &= ~POLLHINT; + if (mask & POLLIN_SET) + inh |= bit; + else + inh &= ~bit; + if (mask & POLLOUT_SET) + outh |= bit; + else + outh &= ~bit; + if (mask & POLLEX_SET) + exh |= bit; + else + exh &= ~bit; + } else if (!inuse) { + inh |= bit; + outh |= bit; + exh |= bit; + } + + mask = POLLNVAL; + if (mask) + count++; fdpnt->revents = mask; } + if (fd != -1) { + spin_lock_irqsave(&files->poll_hint_lock, flags); + poll_in->fds_bits[off] |= inh; + poll_out->fds_bits[off] |= outh; + poll_ex->fds_bits[off] |= exh; + spin_unlock_irqrestore(&files->poll_hint_lock, flags); + } - wait = NULL; if (count || !timeout || signal_pending(current)) break; timeout = schedule_timeout(timeout); } + retval = count; + out: current->state = TASK_RUNNING; - return count; + return retval; } asmlinkage int sys_poll(struct pollfd * ufds, unsigned int nfds, long timeout) { int i, fdcount, err, size; struct pollfd * fds, *fds1; - poll_table *wait_table = NULL, *wait = NULL; lock_kernel(); /* Do a sanity check on nfds ... */ @@ -403,16 +728,6 @@ } err = -ENOMEM; - if (timeout) { - wait_table = (poll_table *) __get_free_page(GFP_KERNEL); - if (!wait_table) - goto out; - wait_table->nr = 0; - wait_table->entry = (struct poll_table_entry *)(wait_table + 1); - wait_table->next = NULL; - wait = wait_table; - } - size = nfds * sizeof(struct pollfd); fds = (struct pollfd *) kmalloc(size, GFP_KERNEL); if (!fds) @@ -422,7 +737,7 @@ if (copy_from_user(fds, ufds, size)) goto out_fds; - fdcount = do_poll(nfds, fds, wait, timeout); + fdcount = do_poll(nfds, fds, timeout); /* OK, now copy the revents fields back to user space. */ fds1 = fds; @@ -437,8 +752,6 @@ out_fds: kfree(fds); out: - if (wait) - free_wait(wait_table); unlock_kernel(); return err; } diff -ur linux-2.2.9-np/include/linux/fs.h linux/include/linux/fs.h --- linux-2.2.9-np/include/linux/fs.h Wed May 26 16:06:16 1999 +++ linux/include/linux/fs.h Fri May 28 13:04:14 1999 @@ -23,7 +23,7 @@ #include #include -struct poll_table_struct; +struct poll_table_entry; /* @@ -426,6 +426,7 @@ unsigned long f_version; + struct poll_backmap *f_backmap; /* needed for tty driver, and maybe others */ void *private_data; }; @@ -583,7 +584,7 @@ ssize_t (*read) (struct file *, char *, size_t, loff_t *); ssize_t (*write) (struct file *, const char *, size_t, loff_t *); int (*readdir) (struct file *, void *, filldir_t); - unsigned int (*poll) (struct file *, struct poll_table_struct *); + unsigned int (*poll) (struct file *, struct poll_table_entry *); int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long); int (*mmap) (struct file *, struct vm_area_struct *); int (*open) (struct inode *, struct file *); diff -ur linux-2.2.9-np/include/linux/net.h linux/include/linux/net.h --- linux-2.2.9-np/include/linux/net.h Sat May 22 14:15:21 1999 +++ linux/include/linux/net.h Fri May 28 12:58:31 1999 @@ -20,7 +20,7 @@ #include -struct poll_table_struct; +struct poll_table_entry; #define NPROTO 32 /* should be enough for now.. */ @@ -93,7 +93,7 @@ int flags); int (*getname) (struct socket *sock, struct sockaddr *uaddr, int *usockaddr_len, int peer); - unsigned int (*poll) (struct file *file, struct socket *sock, struct poll_table_struct *wait); + unsigned int (*poll) (struct file *file, struct socket *sock, struct poll_table_entry *wait); int (*ioctl) (struct socket *sock, unsigned int cmd, unsigned long arg); int (*listen) (struct socket *sock, int len); diff -ur linux-2.2.9-np/include/linux/poll.h linux/include/linux/poll.h --- linux-2.2.9-np/include/linux/poll.h Wed May 26 16:06:16 1999 +++ linux/include/linux/poll.h Fri May 28 13:04:16 1999 @@ -8,26 +8,132 @@ #include #include #include +#include #include +/* Look at - this has not been assigned elsewhere */ +#define POLLHINT 0x8000 +#define POLLHINTEX 1 +#define POLLHINTIN 2 +#define POLLHINTOUT 4 struct poll_table_entry { + struct poll_table_entry *next; struct file * filp; struct wait_queue wait; struct wait_queue ** wait_address; }; +/* + * The poll_table is utilized in blocks of __MAX_POLL_TABLE_ENTRIES and + * remains in the state of the process over invocations of select() or + * poll(). + */ + typedef struct poll_table_struct { struct poll_table_struct * next; - unsigned int nr; + unsigned int ref; + unsigned int basefd; struct poll_table_entry * entry; } poll_table; +struct poll_backmap { + struct poll_backmap *next; + struct files_struct *files; /* files which has this file as */ + int fd; /* file descriptor number fd */ +}; + #define __MAX_POLL_TABLE_ENTRIES ((PAGE_SIZE - sizeof (poll_table)) / sizeof (struct poll_table_entry)) -extern void __pollwait(struct file * filp, struct wait_queue ** wait_address, poll_table *p); +#define POLLINUSE(x,y) ((x)->fds_bits[(y)/__NFDBITS] & (1UL << ((y)&(__NFDBITS-1)))) + +extern inline void poll_add_hint(struct poll_backmap ** map, int type) +{ + fd_set *hint; + struct poll_backmap *entry; + struct files_struct *files; + unsigned long flags; + + if (!map) + return; + entry = *map; + while (entry) { + files = entry->files; + + spin_lock_irqsave(&files->poll_hint_lock, flags); + if (type & POLLHINTEX) { + hint = (fd_set *)&files->poll_hints->fds_bits[2 * files->max_poll / __NFDBITS]; + hint->fds_bits[entry->fd / __NFDBITS] |= 1 << (entry->fd & (__NFDBITS - 1)); + } + if (type & POLLHINTOUT) { + hint = (fd_set *)&files->poll_hints->fds_bits[files->max_poll / __NFDBITS]; + hint->fds_bits[entry->fd / __NFDBITS] |= 1 << (entry->fd & (__NFDBITS - 1)); + } + if (type & POLLHINTIN) { + hint = files->poll_hints; + hint->fds_bits[entry->fd / __NFDBITS] |= 1 << (entry->fd & (__NFDBITS - 1)); + } + spin_unlock_irqrestore(&files->poll_hint_lock, flags); + + entry = entry->next; + } +} + +extern inline void poll_backmap(int fd, struct poll_backmap ** entry) +{ + struct poll_backmap *tmp; + + if (!entry) + return; + + /* + * See if we have an entry in the backmap already, in general + * we expect this linked list to be very short. + */ + tmp = *entry; + while (tmp != NULL) { + if (tmp->files == current->files && tmp->fd == fd) + return; + tmp = tmp->next; + } + + tmp = (struct poll_backmap *) kmalloc(sizeof(*entry), GFP_KERNEL); + if (tmp == NULL) + return; + + tmp->files = current->files; + tmp->fd = fd; + tmp->next = *entry; + + *entry = tmp; +} + +extern inline void poll_remove_backmap(struct poll_backmap **map, int fd, + struct files_struct *files) +{ + struct poll_backmap *tmp = *map, *old = NULL; + + while (tmp != NULL) { + if (tmp->files == files && tmp->fd == fd) + break; + old = tmp; + tmp = tmp->next; + } + + if (!tmp) + return; + + if (old == NULL) + *map = tmp->next; + else + old->next = tmp->next; + + kfree (tmp); +} + +extern void __pollwait(struct file * filp, struct wait_queue ** wait_address, struct poll_table_entry *p); -extern inline void poll_wait(struct file * filp, struct wait_queue ** wait_address, poll_table *p) +extern inline void poll_wait(struct file * filp, struct wait_queue ** wait_address, struct poll_table_entry *p) { if (p && wait_address) __pollwait(filp, wait_address, p); @@ -101,6 +207,7 @@ } extern int do_select(int n, fd_set_bits *fds, long *timeout); +extern void poll_free_wait(poll_table **p, int fd); #endif /* KERNEL */ diff -ur linux-2.2.9-np/include/linux/sched.h linux/include/linux/sched.h --- linux-2.2.9-np/include/linux/sched.h Wed May 26 16:06:16 1999 +++ linux/include/linux/sched.h Fri May 28 13:04:14 1999 @@ -134,27 +134,41 @@ */ struct files_struct { atomic_t count; + spinlock_t poll_hint_lock; int max_fds; int max_fdset; int next_fd; + int max_poll; struct file ** fd; /* current fd array */ fd_set *close_on_exec; fd_set *open_fds; + fd_set *poll_fds; + fd_set *poll_hints; + struct poll_table_struct *poll_wait; fd_set close_on_exec_init; fd_set open_fds_init; + fd_set poll_fds_init; + fd_set poll_hints_init[3]; struct file * fd_array[NR_OPEN_DEFAULT]; }; #define INIT_FILES { \ ATOMIC_INIT(1), \ + SPIN_LOCK_UNLOCKED, \ NR_OPEN_DEFAULT, \ __FD_SETSIZE, \ 0, \ + __FD_SETSIZE, \ &init_files.fd_array[0], \ &init_files.close_on_exec_init, \ &init_files.open_fds_init, \ + &init_files.poll_fds_init, \ + &init_files.poll_hints_init[0], \ + NULL, \ + { { 0, } }, \ { { 0, } }, \ { { 0, } }, \ + { { { 0, } }, }, \ { NULL, } \ } diff -ur linux-2.2.9-np/include/linux/skbuff.h linux/include/linux/skbuff.h --- linux-2.2.9-np/include/linux/skbuff.h Sat May 22 14:15:23 1999 +++ linux/include/linux/skbuff.h Fri May 28 13:04:16 1999 @@ -572,7 +572,7 @@ } extern struct sk_buff * skb_recv_datagram(struct sock *sk,unsigned flags,int noblock, int *err); -extern unsigned int datagram_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait); +extern unsigned int datagram_poll(struct file *file, struct socket *sock, struct poll_table_entry *wait); extern int skb_copy_datagram(struct sk_buff *from, int offset, char *to,int size); extern int skb_copy_datagram_iovec(struct sk_buff *from, int offset, struct iovec *to,int size); extern void skb_free_datagram(struct sock * sk, struct sk_buff *skb); diff -ur linux-2.2.9-np/include/linux/tty_ldisc.h linux/include/linux/tty_ldisc.h --- linux-2.2.9-np/include/linux/tty_ldisc.h Sat May 22 14:15:22 1999 +++ linux/include/linux/tty_ldisc.h Fri May 28 13:04:14 1999 @@ -65,7 +65,7 @@ * been made to the termios stucture. * * int (*poll)(struct tty_struct * tty, struct file * file, - * poll_table *wait); + * struct poll_table_entry *wait); * * This function is called when a user attempts to select/poll on a * tty device. It is solely the responsibility of the line @@ -120,7 +120,7 @@ unsigned int cmd, unsigned long arg); void (*set_termios)(struct tty_struct *tty, struct termios * old); unsigned int (*poll)(struct tty_struct *, struct file *, - struct poll_table_struct *); + struct poll_table_entry *); /* * The following routines are called from below. diff -ur linux-2.2.9-np/include/net/inet_common.h linux/include/net/inet_common.h --- linux-2.2.9-np/include/net/inet_common.h Thu Feb 26 23:17:58 1998 +++ linux/include/net/inet_common.h Fri May 28 12:58:31 1999 @@ -28,7 +28,7 @@ struct msghdr *msg, int size, struct scm_cookie *scm); extern int inet_shutdown(struct socket *sock, int how); -extern unsigned int inet_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait); +extern unsigned int inet_poll(struct file * file, struct socket *sock, struct poll_table_entry *wait); extern int inet_setsockopt(struct socket *sock, int level, int optname, char *optval, int optlen); diff -ur linux-2.2.9-np/include/net/sock.h linux/include/net/sock.h --- linux-2.2.9-np/include/net/sock.h Sat May 22 14:16:04 1999 +++ linux/include/net/sock.h Fri May 28 13:05:05 1999 @@ -523,6 +523,8 @@ /* Identd */ struct socket *socket; + void *backmap; + /* RPC layer private data */ void *user_data; @@ -557,8 +559,9 @@ void (*write_wakeup)(struct sock *sk); void (*read_wakeup)(struct sock *sk); - unsigned int (*poll)(struct file * file, struct socket *sock, - struct poll_table_struct *wait); + unsigned int (*poll)(struct file * file, + struct socket *sock, + struct poll_table_entry *wait); int (*ioctl)(struct sock *sk, int cmd, unsigned long arg); @@ -755,8 +758,9 @@ struct socket *, int); extern int sock_no_getname(struct socket *, struct sockaddr *, int *, int); -extern unsigned int sock_no_poll(struct file *, struct socket *, - struct poll_table_struct *); +extern unsigned int sock_no_poll(struct file *, + struct socket *, + struct poll_table_entry *); extern int sock_no_ioctl(struct socket *, unsigned int, unsigned long); extern int sock_no_listen(struct socket *, int); diff -ur linux-2.2.9-np/include/net/tcp.h linux/include/net/tcp.h --- linux-2.2.9-np/include/net/tcp.h Sat May 22 14:17:59 1999 +++ linux/include/net/tcp.h Fri May 28 13:07:22 1999 @@ -512,7 +512,7 @@ extern void tcp_close(struct sock *sk, long timeout); extern struct sock * tcp_accept(struct sock *sk, int flags); -extern unsigned int tcp_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait); +extern unsigned int tcp_poll(struct file * file, struct socket *sock, struct poll_table_entry *wait); extern void tcp_write_space(struct sock *sk); extern int tcp_getsockopt(struct sock *sk, int level, diff -ur linux-2.2.9-np/kernel/exit.c linux/kernel/exit.c --- linux-2.2.9-np/kernel/exit.c Wed May 26 16:06:16 1999 +++ linux/kernel/exit.c Fri May 28 13:00:42 1999 @@ -13,6 +13,7 @@ #ifdef CONFIG_BSD_PROCESS_ACCT #include #endif +#include #include #include @@ -169,6 +170,11 @@ if (set & 1) { struct file * file = files->fd[i]; if (file) { + if (i < files->max_poll && + POLLINUSE(files->poll_fds, i)) { + poll_free_wait(&files->poll_wait, i); + poll_remove_backmap(&file->f_backmap, i, files); + } files->fd[i] = NULL; filp_close(file, files); } @@ -197,6 +203,10 @@ if (files->max_fdset > __FD_SETSIZE) { free_fdset(files->open_fds, files->max_fdset); free_fdset(files->close_on_exec, files->max_fdset); + } + if (files->max_poll > __FD_SETSIZE) { + free_fdset(files->poll_fds, files->max_poll); + free_fdset(files->poll_hints, 3 * files->max_poll); } kmem_cache_free(files_cachep, files); } diff -ur linux-2.2.9-np/kernel/fork.c linux/kernel/fork.c --- linux-2.2.9-np/kernel/fork.c Wed May 26 16:06:16 1999 +++ linux/kernel/fork.c Fri May 28 13:01:58 1999 @@ -450,11 +450,16 @@ #endif atomic_set(&newf->count, 1); + newf->poll_hint_lock= SPIN_LOCK_UNLOCKED; newf->next_fd = 0; newf->max_fds = NR_OPEN_DEFAULT; newf->max_fdset = __FD_SETSIZE; + newf->max_poll = __FD_SETSIZE; newf->close_on_exec = &newf->close_on_exec_init; newf->open_fds = &newf->open_fds_init; + newf->poll_fds = &newf->poll_fds_init; + newf->poll_hints = &newf->poll_hints_init[0]; + newf->poll_wait = NULL; newf->fd = &newf->fd_array[0]; /* Even if the old fdset gets grown here, we'll only copy "size" fds */ @@ -473,6 +478,10 @@ memset(&newf->open_fds->fds_bits[start], 0, left); memset(&newf->close_on_exec->fds_bits[start], 0, left); } + + /* Do not copy hints, they will be recreated on the next poll call */ + memset(newf->poll_fds, 0, newf->max_poll / 8); + memset(newf->poll_hints, 0, 3 * newf->max_poll / 8); /* Find the last open fd */ for (i = size/(8*sizeof(long)); i > 0; ) { diff -ur linux-2.2.9-np/net/core/datagram.c linux/net/core/datagram.c --- linux-2.2.9-np/net/core/datagram.c Sun Oct 4 13:19:39 1998 +++ linux/net/core/datagram.c Fri May 28 12:58:31 1999 @@ -212,12 +212,13 @@ * then please supply your own write_space callback. */ -unsigned int datagram_poll(struct file * file, struct socket *sock, poll_table *wait) +unsigned int datagram_poll(struct file * file, struct socket *sock, struct poll_table_entry *wait) { struct sock *sk = sock->sk; unsigned int mask; poll_wait(file, sk->sleep, wait); + sk->backmap = &file->f_backmap; mask = 0; /* exceptional events? */ @@ -236,7 +237,7 @@ mask |= POLLHUP; /* connection hasn't started yet? */ if (sk->state == TCP_SYN_SENT) - return mask; + return mask | POLLHINT; } /* writable? */ @@ -245,5 +246,5 @@ else sk->socket->flags |= SO_NOSPACE; - return mask; + return mask | POLLHINT; } diff -ur linux-2.2.9-np/net/core/sock.c linux/net/core/sock.c --- linux-2.2.9-np/net/core/sock.c Sat May 22 13:43:06 1999 +++ linux/net/core/sock.c Fri May 28 12:58:31 1999 @@ -894,7 +894,7 @@ return -EOPNOTSUPP; } -unsigned int sock_no_poll(struct file * file, struct socket *sock, poll_table *pt) +unsigned int sock_no_poll(struct file * file, struct socket *sock, struct poll_table_entry *pt) { return 0; } @@ -981,6 +981,7 @@ void sock_def_error_report(struct sock *sk) { if (!sk->dead) { + poll_add_hint(sk->backmap, POLLHINTEX); wake_up_interruptible(sk->sleep); sock_wake_async(sk->socket,0); } @@ -989,6 +990,7 @@ void sock_def_readable(struct sock *sk, int len) { if(!sk->dead) { + poll_add_hint(sk->backmap, POLLHINTIN); wake_up_interruptible(sk->sleep); sock_wake_async(sk->socket,1); } @@ -1001,6 +1003,7 @@ */ if(!sk->dead && ((atomic_read(&sk->wmem_alloc) << 1) <= sk->sndbuf)) { + poll_add_hint(sk->backmap, POLLHINTOUT); wake_up_interruptible(sk->sleep); /* Should agree with poll, otherwise some programs break */ @@ -1030,6 +1033,8 @@ sk->state = TCP_CLOSE; sk->zapped = 1; sk->socket = sock; + + sk->backmap = NULL; if(sock) { diff -ur linux-2.2.9-np/net/ipv4/af_inet.c linux/net/ipv4/af_inet.c --- linux-2.2.9-np/net/ipv4/af_inet.c Sat May 22 13:43:04 1999 +++ linux/net/ipv4/af_inet.c Fri May 28 12:58:31 1999 @@ -469,6 +469,7 @@ /* Begin closedown and wake up sleepers. */ if (sock->state != SS_UNCONNECTED) sock->state = SS_DISCONNECTING; + poll_add_hint(sk->backmap, POLLHINTIN|POLLHINTOUT); sk->state_change(sk); /* Applications forget to leave groups before exiting */ @@ -491,6 +492,7 @@ } sock->sk = NULL; sk->socket = NULL; + sk->backmap = NULL; sk->prot->close(sk, timeout); } return(0); @@ -834,12 +836,14 @@ if (sk->prot->shutdown) sk->prot->shutdown(sk, how); /* Wake up anyone sleeping in poll. */ + poll_add_hint(sk->backmap, (how & RCV_SHUTDOWN ? POLLHINTIN : 0) | + (how & SEND_SHUTDOWN ? POLLHINTOUT : 0)); sk->state_change(sk); return(0); } -unsigned int inet_poll(struct file * file, struct socket *sock, poll_table *wait) +unsigned int inet_poll(struct file * file, struct socket *sock, struct poll_table_entry *wait) { struct sock *sk = sock->sk; diff -ur linux-2.2.9-np/net/ipv4/tcp.c linux/net/ipv4/tcp.c --- linux-2.2.9-np/net/ipv4/tcp.c Sat May 22 13:43:04 1999 +++ linux/net/ipv4/tcp.c Fri May 28 12:58:31 1999 @@ -526,7 +526,7 @@ /* * LISTEN is a special case for poll.. */ -static unsigned int tcp_listen_poll(struct sock *sk, poll_table *wait) +static unsigned int tcp_listen_poll(struct sock *sk, struct poll_table_entry *wait) { struct open_request *req, *dummy; @@ -551,15 +551,16 @@ * take care of normal races (between the test and the event) and we don't * go look at any of the socket buffers directly. */ -unsigned int tcp_poll(struct file * file, struct socket *sock, poll_table *wait) +unsigned int tcp_poll(struct file * file, struct socket *sock, struct poll_table_entry *wait) { unsigned int mask; struct sock *sk = sock->sk; struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); poll_wait(file, sk->sleep, wait); + sk->backmap = &file->f_backmap; if (sk->state == TCP_LISTEN) - return tcp_listen_poll(sk, wait); + return tcp_listen_poll(sk, wait) | POLLHINT ; mask = 0; if (sk->err) @@ -600,7 +601,8 @@ if (tp->urg_data & URG_VALID) mask |= POLLPRI; } - return mask; + + return mask | POLLHINT; } /* @@ -612,6 +614,7 @@ if (sk->dead) return; + poll_add_hint(sk->backmap, POLLHINTOUT); wake_up_interruptible(sk->sleep); if (sock_wspace(sk) >= tcp_min_write_space(sk)) @@ -1499,8 +1502,10 @@ /* sk->keepopen = 1; */ sk->shutdown = SHUTDOWN_MASK; - if (!sk->dead) + if (!sk->dead) { + poll_add_hint(sk->backmap, POLLHINTIN|POLLHINTOUT); sk->state_change(sk); + } /* We need to flush the recv. buffs. We do this only on the * descriptor close, not protocol-sourced closes, because the diff -ur linux-2.2.9-np/net/ipv4/tcp_input.c linux/net/ipv4/tcp_input.c --- linux-2.2.9-np/net/ipv4/tcp_input.c Sat May 22 13:43:06 1999 +++ linux/net/ipv4/tcp_input.c Fri May 28 12:58:31 1999 @@ -62,6 +62,7 @@ #include #include #include +#include #ifdef CONFIG_SYSCTL #define SYNC_INIT 0 /* let the user enable it */ @@ -288,8 +289,10 @@ }; tcp_set_state(sk, TCP_CLOSE); sk->shutdown = SHUTDOWN_MASK; - if (!sk->dead) + if (!sk->dead) { + poll_add_hint(sk->backmap, POLLHINTIN|POLLHINTOUT); sk->state_change(sk); + } } /* This tags the retransmission queue when SACKs arrive. */ @@ -1072,8 +1075,10 @@ /* Prevent rcvmsg/sndmsg calls, and wake people up. */ sk->shutdown = SHUTDOWN_MASK; - if(!sk->dead) + if(!sk->dead) { + poll_add_hint(sk->backmap, POLLHINTIN|POLLHINTOUT); sk->state_change(sk); + } } /* @@ -1098,6 +1103,7 @@ tcp_send_ack(sk); if (!sk->dead) { + poll_add_hint(sk->backmap, POLLHINTIN); sk->state_change(sk); sock_wake_async(sk->socket, 1); } @@ -2121,6 +2127,8 @@ tp->copied_seq = tp->rcv_nxt; if(!sk->dead) { + poll_add_hint(sk->backmap, POLLHINTOUT|POLLHINTEX); /* IN and OUT needed ?? */ + sk->state_change(sk); sock_wake_async(sk->socket, 0); } @@ -2251,8 +2259,10 @@ sk->dport = th->source; tp->copied_seq = tp->rcv_nxt; - if(!sk->dead) + if(!sk->dead) { + poll_add_hint(sk->backmap, POLLHINTOUT); sk->state_change(sk); + } tp->snd_una = TCP_SKB_CB(skb)->ack_seq; tp->snd_wnd = htons(th->window) << tp->snd_wscale; @@ -2269,9 +2279,10 @@ if (tp->snd_una == tp->write_seq) { sk->shutdown |= SEND_SHUTDOWN; tcp_set_state(sk, TCP_FIN_WAIT2); - if (!sk->dead) + if (!sk->dead) { + poll_add_hint(sk->backmap, POLLHINTOUT); sk->state_change(sk); - else + } else tcp_reset_msl_timer(sk, TIME_CLOSE, sysctl_tcp_fin_timeout); } break; @@ -2287,8 +2298,10 @@ if (tp->snd_una == tp->write_seq) { sk->shutdown = SHUTDOWN_MASK; tcp_set_state(sk,TCP_CLOSE); - if (!sk->dead) + if (!sk->dead) { + poll_add_hint(sk->backmap, POLLHINTIN|POLLHINTOUT); sk->state_change(sk); + } goto discard; } break; diff -ur linux-2.2.9-np/net/ipv4/tcp_timer.c linux/net/ipv4/tcp_timer.c --- linux-2.2.9-np/net/ipv4/tcp_timer.c Sat May 22 13:43:06 1999 +++ linux/net/ipv4/tcp_timer.c Fri May 28 12:58:31 1999 @@ -21,6 +21,7 @@ */ #include +#include int sysctl_tcp_syn_retries = TCP_SYN_RETRIES; int sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME; @@ -235,8 +236,10 @@ tcp_set_state(sk, TCP_CLOSE); sk->shutdown = SHUTDOWN_MASK; - if (!sk->dead) + if (!sk->dead) { + poll_add_hint(sk->backmap, POLLHINTIN|POLLHINTOUT); sk->state_change(sk); + } } else { tp->probes_out++; tp->pending = TIME_KEEPOPEN; diff -ur linux-2.2.9-np/net/ipv4/timer.c linux/net/ipv4/timer.c --- linux-2.2.9-np/net/ipv4/timer.c Sat May 22 13:42:55 1999 +++ linux/net/ipv4/timer.c Fri May 28 12:58:31 1999 @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -113,8 +114,10 @@ /* We've waited long enough, close the socket. */ tcp_set_state(sk, TCP_CLOSE); sk->shutdown = SHUTDOWN_MASK; - if (!sk->dead) + if (!sk->dead) { + poll_add_hint(sk->backmap, POLLHINTIN|POLLHINTOUT); sk->state_change(sk); + } net_reset_timer (sk, TIME_DONE, TCP_DONE_TIME); break; diff -ur linux-2.2.9-np/net/socket.c linux/net/socket.c --- linux-2.2.9-np/net/socket.c Sat May 22 13:43:04 1999 +++ linux/net/socket.c Fri May 28 12:58:31 1999 @@ -91,8 +91,7 @@ size_t size, loff_t *ppos); static int sock_close(struct inode *inode, struct file *file); -static unsigned int sock_poll(struct file *file, - struct poll_table_struct *wait); +static unsigned int sock_poll(struct file *file, struct poll_table_entry *wait); static int sock_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg); static int sock_fasync(int fd, struct file *filp, int on); @@ -459,7 +458,7 @@ } -static unsigned int sock_poll(struct file *file, poll_table * wait) +static unsigned int sock_poll(struct file *file, struct poll_table_entry * wait) { struct socket *sock; diff -ur linux-2.2.9-np/net/unix/af_unix.c linux/net/unix/af_unix.c --- linux-2.2.9-np/net/unix/af_unix.c Sat May 22 13:43:06 1999 +++ linux/net/unix/af_unix.c Fri May 28 12:58:31 1999 @@ -1453,12 +1453,13 @@ return(0); } -static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait) +static unsigned int unix_poll(struct file * file, struct socket *sock, struct poll_table_entry *wait) { struct sock *sk = sock->sk; unsigned int mask; poll_wait(file, sk->sleep, wait); + sk->backmap = &file->f_backmap; mask = 0; /* exceptional events? */ @@ -1489,9 +1490,11 @@ { if (sk->dead) return; + poll_add_hint(sk->backmap, POLLHINTOUT); wake_up_interruptible(sk->sleep); - if (sk->sndbuf - (int)atomic_read(&sk->wmem_alloc) >= MIN_WRITE_SPACE) + if (sk->sndbuf - (int)atomic_read(&sk->wmem_alloc) >= MIN_WRITE_SPACE){ sock_wake_async(sk->socket, 2); + } } #ifdef CONFIG_PROC_FS