--- /dev/null Tue May 5 16:32:27 1998 +++ drivers/char/devpoll.c Wed Jul 26 15:03:32 2000 @@ -0,0 +1,727 @@ +/* $Id: devpoll.c,v 1.18 2000/07/26 19:03:32 provos Exp $ */ +/* + * /dev/poll + * by Niels Provos + * + * provides poll() support via /dev/poll as in Solaris. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#define DEBUG 1 +#ifdef DEBUG +#define DPRINTK(x) printk x +#define DNPRINTK(n,x) if (n <= DEBUG) printk x +#else +#define DPRINTK(x) +#define DNPRINTK(n,x) +#endif + +/* Various utility functions */ + +#define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM) + +/* Do dynamic hashing */ + +#define INITIAL_BUCKET_BITS 6 +#define MAX_BUCKET_BITS 16 +#define RESIZE_LENGTH 2 + +/* Initalize the hash table */ + +int +dp_init(struct devpoll *dp) +{ + int i; + + DNPRINTK(3,(KERN_INFO "/dev/poll: dp_init\n")); + + dp->dp_lock = RW_LOCK_UNLOCKED; + dp->dp_entries = 0; + dp->dp_max = 0; + dp->dp_avg = dp->dp_count = 0; + dp->dp_cached = dp->dp_calls = 0; + dp->dp_bucket_bits = INITIAL_BUCKET_BITS; + dp->dp_bucket_mask = (1 << INITIAL_BUCKET_BITS) - 1; + dp->dp_tab = kmalloc((dp->dp_bucket_mask + 1) * sizeof(struct fd_list), + GFP_KERNEL); + if (!dp->dp_tab) + return -ENOMEM; + for (i = 0; i <= dp->dp_bucket_mask; i++) { + DLIST_INIT (&dp->dp_tab[i]); + } + + return (0); +} + +int +dp_resize(struct devpoll *dp) +{ + u_int16_t new_mask, old_mask; + int i; + struct fd_list *new_tab, *old_tab; + struct dp_fd *dpfd; + unsigned long flags; + + old_mask = dp->dp_bucket_mask; + new_mask = (old_mask + 1) * 2 - 1; + + DPRINTK((KERN_INFO "/dev/poll: resize %d -> %d\n", + old_mask, new_mask)); + + new_tab = kmalloc((new_mask + 1) * sizeof(struct fd_list), GFP_KERNEL); + if (!new_tab) + return -ENOMEM; + + for (i = 0; i <= new_mask; i++) { + DLIST_INIT(&new_tab[i]); + } + + old_tab = dp->dp_tab; + + /* Rehash all entries */ + write_lock_irqsave(&dp->dp_lock, flags); + for (i = 0; i <= old_mask; i++) { + for (dpfd = DLIST_FIRST(&old_tab[i]); dpfd; + dpfd = DLIST_FIRST(&old_tab[i])) { + DLIST_REMOVE(dpfd, next); + DLIST_INSERT_HEAD(&new_tab[dpfd->pfd.fd & new_mask], + dpfd, next); + } + } + + dp->dp_tab = new_tab; + dp->dp_bucket_bits++; + dp->dp_bucket_mask = new_mask; + write_unlock_irqrestore(&dp->dp_lock, flags); + + kfree (old_tab); + + return (0); +} + +int +dp_insert(struct devpoll *dp, struct pollfd *pfd) +{ + struct dp_fd *dpfd; + u_int16_t bucket = pfd->fd & dp->dp_bucket_mask; + unsigned long flags; + struct file *file; + + dpfd = kmalloc(sizeof(struct dp_fd), GFP_KERNEL); + if (!dpfd) + return -ENOMEM; + + dpfd->flags = 0; + set_bit(DPH_DIRTY, &dpfd->flags); + dpfd->pfd = *pfd; + dpfd->pfd.revents = 0; + + write_lock_irqsave(&dp->dp_lock, flags); + DLIST_INSERT_HEAD(&dp->dp_tab[bucket], dpfd, next); + file = fcheck(pfd->fd); + if (file != NULL) { + write_lock(&(file)->f_dplock); + poll_backmap(pfd->fd, dpfd, &(file)->f_backmap); + write_unlock(&(file)->f_dplock); + set_bit(DPH_BACKMAP, &(dpfd)->flags); + } + write_unlock_irqrestore(&dp->dp_lock, flags); + + dp->dp_entries++; + + /* Check if we need to resize the hash table */ + if ((dp->dp_entries >> dp->dp_bucket_bits) > RESIZE_LENGTH && + dp->dp_bucket_bits < MAX_BUCKET_BITS) + dp_resize(dp); + + return (0); +} + +struct dp_fd * +dp_find(struct devpoll *dp, int fd) +{ + struct dp_fd *dpfd; + u_int16_t bucket = fd & dp->dp_bucket_mask; + + for (dpfd = DLIST_FIRST(&dp->dp_tab[bucket]); + dpfd && dpfd->pfd.fd != fd; + dpfd = DLIST_NEXT(dpfd, next)) + ; + + DNPRINTK(2, (KERN_INFO "dp_find: %d -> %p\n", fd, dpfd)); + + return dpfd; +} + +void +dp_delete(struct devpoll *dp, struct dp_fd *dpfd) +{ + unsigned long flags; + int fd; + struct file *filp; + + write_lock_irqsave(&dp->dp_lock, flags); + DLIST_REMOVE(dpfd, next); + /* Remove backmaps if necessary */ + if (current->files) { + fd = dpfd->pfd.fd; + filp = fcheck(fd); + + if (test_bit(DPH_BACKMAP, &dpfd->flags) && + filp && filp->f_backmap) { + write_lock(&filp->f_dplock); + poll_remove_backmap(&filp->f_backmap, fd, + current->files); + write_unlock(&filp->f_dplock); + } + } + write_unlock_irqrestore(&dp->dp_lock, flags); + + kfree (dpfd); + + dp->dp_entries--; +} + +void +dp_free(struct devpoll *dp) +{ + int i; + struct dp_fd *dpfd; + + lock_kernel(); + for (i = 0; i <= dp->dp_bucket_mask; i++) { + for (dpfd = DLIST_FIRST(&dp->dp_tab[i]); dpfd; + dpfd = DLIST_FIRST(&dp->dp_tab[i])) { + dp_delete(dp, dpfd); + } + } + unlock_kernel(); + + kfree (dp->dp_tab); +} + +/* Some of the functions below are from the bttv.c driver by + * Ralph Metzler and Marcus Metzler with slight variations by me + */ + +/* Given PGD from the address space's page table, return the kernel + * virtual mapping of the physical memory mapped at ADR. + */ +static inline unsigned long +uvirt_to_kva(pgd_t *pgd, unsigned long adr) +{ + unsigned long ret = 0UL; + pmd_t *pmd; + pte_t *ptep, pte; + + if (!pgd_none(*pgd)) { + pmd = pmd_offset(pgd, adr); + if (!pmd_none(*pmd)) { + ptep = pte_offset(pmd, adr); + pte = *ptep; + if(pte_present(pte)) + ret = (pte_page(pte)|(adr&(PAGE_SIZE-1))); + } + } + return ret; +} + +/* Here we want the physical address of the memory. + * This is used when initializing the contents of the + * area and marking the pages as reserved. + */ +static inline unsigned long +kvirt_to_pa(unsigned long adr) +{ + unsigned long va, kva, ret; + + va = VMALLOC_VMADDR(adr); + kva = uvirt_to_kva(pgd_offset_k(va), va); + ret = __pa(kva); + return ret; +} + + +static void * +rvmalloc(unsigned long size) +{ + void * mem; + unsigned long adr, page; + + size = PAGE_ALIGN(size); + mem = vmalloc(size); + if (mem) { + adr = (unsigned long) mem; + while (size > 0) { + page = kvirt_to_pa(adr); + mem_map_reserve(MAP_NR(__va(page))); + adr += PAGE_SIZE; + size -= PAGE_SIZE; + } + } + return mem; +} + +static void +rvfree(void * mem, unsigned long size) +{ + unsigned long adr, page; + + size = PAGE_ALIGN(size); + if (mem) { + adr = (unsigned long) mem; + while (size > 0) { + page = kvirt_to_pa(adr); + mem_map_unreserve(MAP_NR(__va(page))); + adr += PAGE_SIZE; + size -= PAGE_SIZE; + } + vfree(mem); + } +} + +/* + * poll the fds that we keep in our state, return after we reached + * max changed fds or are done. + * XXX - I do not like how the wait table stuff is done. + */ + +int +dp_poll(struct devpoll *dp, int max, poll_table *wait, + long timeout, struct pollfd *rfds, int usemmap) +{ + int count = 0; + + lock_kernel(); + + for (;;) { + unsigned int j; + struct dp_fd *dpfd; + struct pollfd *fdpnt, pfd; + struct file *file; + + current->state = TASK_INTERRUPTIBLE; + for (j = 0; j <= dp->dp_bucket_mask && count < max; j++) { + for (dpfd = DLIST_FIRST(&dp->dp_tab[j]); + dpfd && count < max; + dpfd = DLIST_NEXT(dpfd, next)) { + int fd; + unsigned int mask; + + fdpnt = &dpfd->pfd; + fd = fdpnt->fd; + + /* poll_wait increments f_count if needed */ + file = fcheck(fd); + if (file == NULL) { + dp_delete(dp, dpfd); + continue; + } + + mask = fdpnt->revents; + if (test_and_clear_bit(DPH_DIRTY, + &dpfd->flags) || + wait != NULL || + (mask & fdpnt->events)) { + mask = DEFAULT_POLLMASK; + if (file->f_op && file->f_op->poll) + mask = file->f_op->poll(file, wait); + if (!(mask & POLLHINT)) + set_bit(DPH_DIRTY, &dpfd->flags); + fdpnt->revents = mask; + } else + dp->dp_cached++; + + dp->dp_calls++; + + mask &= fdpnt->events | POLLERR | POLLHUP; + if (mask) { + wait = NULL; + count++; + + if (usemmap) { + *rfds = *fdpnt; + rfds->revents = mask; + } else { + pfd = *fdpnt; + pfd.revents = mask; + __copy_to_user(rfds, &pfd, + sizeof(struct pollfd)); + } + + rfds++; + } + } + } + + wait = NULL; + if (count || !timeout || signal_pending(current)) + break; + timeout = schedule_timeout(timeout); + } + current->state = TASK_RUNNING; + + unlock_kernel(); + + if (!count && signal_pending(current)) + return -EINTR; + + return count; +} + +/* + * close a /dev/poll + */ + +static int +close_devpoll(struct inode * inode, struct file * file) +{ + struct devpoll *dp = file->private_data; + + DNPRINTK(1, (KERN_INFO "close /dev/poll, max: %d, avg: %d(%d/%d) %d/%d\n", + dp->dp_max, + dp->dp_count ? dp->dp_avg/dp->dp_count : 0, + dp->dp_avg, dp->dp_count, + dp->dp_cached, dp->dp_calls)); + + /* free allocated memory */ + if (dp->dp_mmap) + rvfree(dp->dp_mmap, DP_MMAP_SIZE(dp->dp_nfds)); + + /* Free the hash table */ + dp_free(dp); + + kfree(dp); + + MOD_DEC_USE_COUNT; + return 0; +} + +/* + * open a /dev/poll + */ + +static int +open_devpoll(struct inode * inode, struct file * file) +{ + struct devpoll *dp; + int r; + + /* allocated state */ + dp = kmalloc(sizeof(struct devpoll), GFP_KERNEL); + if (dp == NULL) + return -ENOMEM; + dp->dp_nfds = 0; + dp->dp_mmaped = 0; + dp->dp_mmap = NULL; + + if ((r = dp_init(dp))) { + kfree (dp); + return r; + } + + file->private_data = dp; + + MOD_INC_USE_COUNT; + + DNPRINTK(3, (KERN_INFO "open /dev/poll\n")); + + return 0; +} + +/* + * write to /dev/poll: + * a user writes struct pollfds and we add them to our list, or remove + * them if (events & POLLREMOVE) is true + */ + +static ssize_t +write_devpoll(struct file *file, const char *buffer, size_t count, + loff_t *ppos) +{ + int r, rcount; + struct devpoll *dp = file->private_data; + struct pollfd pfd; + struct dp_fd *dpfd; +#ifdef DEBUG + int add = 0, delete = 0, change = 0; +#endif + + DNPRINTK(3, (KERN_INFO "write /dev/poll\n")); + + if (count % sizeof(struct pollfd)) + return -EINVAL; + + if ((r = verify_area(VERIFY_READ, buffer, count))) + return r; + + rcount = count; + + lock_kernel(); + + while (count > 0) { + __copy_from_user(&pfd, buffer, sizeof(pfd)); /* no check */ + + dpfd = dp_find(dp, pfd.fd); + + if (pfd.fd >= current->files->max_fds || + current->files->fd[pfd.fd] == NULL) { + /* Be tolerant, maybe the close happened already */ + pfd.events = POLLREMOVE; + } + /* See if we need to remove the file descriptor. If it + * already exists OR the event fields, otherwise insert + */ + if (pfd.events & POLLREMOVE) { + if (dpfd) + dp_delete(dp, dpfd); +#ifdef DEBUG + delete++; +#endif + } else if (dpfd) { + /* XXX dpfd->pfd.events |= pfd.events; */ + dpfd->pfd.events = pfd.events; +#ifdef DEBUG + change++; +#endif + } else { + dp_insert(dp, &pfd); +#ifdef DEBUG + add++; +#endif + } + + buffer += sizeof(pfd); + count -= sizeof(pfd); + } + + unlock_kernel(); + + if (dp->dp_max < dp->dp_entries) { + dp->dp_max = dp->dp_entries; + DNPRINTK(2, (KERN_INFO "/dev/poll: new max %d\n", dp->dp_max)); + } + + DNPRINTK(3, (KERN_INFO "write /dev/poll: %d entries (%d/%d/%d)\n", + dp->dp_entries, add, delete, change)); + + return (rcount); +} + +static int +ioctl_devpoll(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg) +{ + struct devpoll *dp = file->private_data; + + switch (cmd) { + case DP_ALLOC: + if (arg > current->rlim[RLIMIT_NOFILE].rlim_cur) + return -EINVAL; + if (dp->dp_mmap) + return -EPERM; + dp->dp_mmap = rvmalloc(DP_MMAP_SIZE(arg)); + if (dp->dp_mmap == NULL) + return -ENOMEM; + dp->dp_nfds = arg; + + DPRINTK((KERN_INFO "allocated %d pollfds\n", dp->dp_nfds)); + + return 0; + case DP_FREE: + if (dp->dp_mmap == NULL) + return -EPERM; + rvfree(dp->dp_mmap, DP_MMAP_SIZE(dp->dp_nfds)); + dp->dp_mmap = NULL; + dp->dp_mmaped = 0; + + DPRINTK((KERN_INFO "freed %d pollfds\n", dp->dp_nfds)); + dp->dp_nfds = 0; + + return 0; + case DP_ISPOLLED: { + struct pollfd pfd; + struct dp_fd *dpfd; + + if (copy_from_user(&pfd, (void *)arg, sizeof(pfd))) + return -EFAULT; + dpfd = dp_find(dp, pfd.fd); + if (dpfd == NULL) + return (0); + + /* We poll this fd, return the evens we poll on */ + pfd.events = dpfd->pfd.events; + pfd.revents = 0; + + if (copy_to_user((void *)arg, &pfd, sizeof(pfd))) + return -EFAULT; + return (1); + } + case DP_POLL: { + struct dvpoll dopoll; + int nfds, usemmap = 0; + unsigned long timeout; + poll_table *wait = NULL; + struct pollfd *rpfds; + + if (copy_from_user(&dopoll, (void *)arg, sizeof(dopoll))) + return -EFAULT; + + /* We do not need to check this value, its user space */ + nfds = dopoll.dp_nfds; + if (nfds <= 0) + return -EINVAL; + + if (dopoll.dp_fds == NULL) { + if (dp->dp_mmap == NULL || !dp->dp_mmaped) + return -EINVAL; + rpfds = (struct pollfd *)dp->dp_mmap; + usemmap = 1; + } else { + rpfds = dopoll.dp_fds; + if (verify_area(VERIFY_WRITE, rpfds, + nfds * sizeof(struct pollfd))) + return -EFAULT; + usemmap = 0; + } + + timeout = dopoll.dp_timeout; + if (timeout) { + /* Careful about overflow in the intermediate values */ + if ((unsigned long)timeout < MAX_SCHEDULE_TIMEOUT / HZ) + timeout = (timeout*HZ+999)/1000+1; + else /* Negative or overflow */ + timeout = MAX_SCHEDULE_TIMEOUT; + + /* Initalize wait table */ + if (!(wait = init_wait())) + return -ENOMEM; + } + + nfds = dp_poll(dp, nfds, wait, timeout, rpfds, usemmap); + + DNPRINTK(2, (KERN_INFO "poll time %ld -> %d\n", timeout, nfds)); + + if (wait) + free_wait(wait); + + dp->dp_avg += dp->dp_entries; + dp->dp_count++; + + return nfds; + } + default: + DPRINTK((KERN_INFO "ioctl(%x) /dev/poll\n", cmd)); + break; + } + + return -EINVAL; +} + +/* + * mmap shared memory. the first half is an array of struct pollfd, + * followed by an array of ints to indicate which file descriptors + * changed status. + */ + +static int +mmap_devpoll(struct file *file, struct vm_area_struct *vma) +{ + struct devpoll *dp = file->private_data; + unsigned long page = 0, start, pos; /* Evil type to remap_page_range */ + size_t size, mapsize; + + DPRINTK((KERN_INFO "mmap /dev/poll: %lx %lx\n", + vma->vm_start, vma->vm_offset)); + + if (vma->vm_offset != 0) + return -EINVAL; + + /* Calculate how much memory we can map */ + size = PAGE_ALIGN(DP_MMAP_SIZE(dp->dp_nfds)); + mapsize = PAGE_ALIGN(vma->vm_end - vma->vm_start); + + /* Check if the requested size is within our size */ + if (mapsize > size) + return -EINVAL; + + start = vma->vm_start; + pos = (unsigned long) dp->dp_mmap; + while (mapsize > 0) { + page = kvirt_to_pa(pos); + if (remap_page_range(start, page, mapsize, PAGE_SHARED)) { + DPRINTK((KERN_ERR "mmap /dev/poll failed: %lx\n", + page)); + return -EAGAIN; + } + mapsize -= PAGE_SIZE; + start += PAGE_SIZE; + pos += PAGE_SIZE; + } + + dp->dp_mmaped = 1; + + DPRINTK((KERN_INFO "mmap /dev/poll: %lx %x\n", page, mapsize)); + return 0; +} + +struct file_operations devpoll_fops = { + NULL, /* seek */ + NULL, /* read */ + write_devpoll, + NULL, /* readdir */ + NULL, /* poll */ + ioctl_devpoll, + mmap_devpoll, + open_devpoll, + NULL, /* flush */ + close_devpoll, + NULL, + NULL, /* fasync */ +}; + +static struct miscdevice devpoll = { + DEVPOLL_MINOR, "devpoll", &devpoll_fops +}; + +int __init devpoll_init(void) +{ + printk(KERN_INFO "/dev/poll driver installed.\n"); + misc_register(&devpoll); + + return 0; +} + +#ifdef MODULE + +int init_module(void) +{ + return devpoll_init(); +} + +void cleanup_module(void) +{ + misc_deregister(&devpoll); +} +#endif --- /usr/src/linux-2.2.14/drivers/char/Config.in Wed Mar 29 22:47:34 2000 +++ drivers/char/Config.in Thu Jun 29 13:46:34 2000 @@ -111,6 +111,7 @@ tristate '/dev/nvram support' CONFIG_NVRAM +tristate '/dev/poll support' CONFIG_DEVPOLL bool 'Enhanced Real Time Clock Support' CONFIG_RTC if [ "$CONFIG_ALPHA_BOOK1" = "y" ]; then bool 'Tadpole ANA H8 Support' CONFIG_H8 --- /usr/src/linux-2.2.14/drivers/char/Makefile Wed Mar 29 22:47:34 2000 +++ drivers/char/Makefile Tue Feb 29 19:06:37 2000 @@ -593,6 +593,14 @@ L_OBJS += dz.o endif +ifeq ($(CONFIG_DEVPOLL),y) +L_OBJS += devpoll.o +else + ifeq ($(CONFIG_DEVPOLL),m) + M_OBJS += devpoll.o + endif +endif + include $(TOPDIR)/Rules.make fastdep: --- /usr/src/linux-2.2.14/kernel/ksyms.c Wed Mar 29 22:47:50 2000 +++ kernel/ksyms.c Thu Jul 13 16:38:38 2000 @@ -192,6 +192,8 @@ EXPORT_SYMBOL(vfs_rmdir); EXPORT_SYMBOL(vfs_unlink); EXPORT_SYMBOL(vfs_rename); +EXPORT_SYMBOL(init_wait); +EXPORT_SYMBOL(free_wait); EXPORT_SYMBOL(__pollwait); EXPORT_SYMBOL(ROOT_DEV); EXPORT_SYMBOL(inode_generation_count); --- /usr/src/linux-2.2.14/fs/open.c Wed Mar 29 22:47:49 2000 +++ fs/open.c Thu Jul 13 17:19:44 2000 @@ -9,6 +9,7 @@ #include #include #include +#include #include @@ -829,6 +830,12 @@ retval = 0; if (filp->f_op && filp->f_op->flush) retval = filp->f_op->flush(filp); + if (filp->f_backmap) { + unsigned long flags; + write_lock_irqsave(&filp->f_dplock, flags); + poll_clean_backmap(&filp->f_backmap); + write_unlock_irqrestore(&filp->f_dplock, flags); + } if (dentry->d_inode) locks_remove_posix(filp, id); fput(filp); @@ -853,6 +860,12 @@ files->fd[fd] = NULL; put_unused_fd(fd); FD_CLR(fd, files->close_on_exec); + if (filp->f_backmap) { + unsigned long flags; + write_lock_irqsave(&filp->f_dplock, flags); + poll_remove_backmap(&filp->f_backmap, fd, files); + write_unlock_irqrestore(&filp->f_dplock, flags); + } error = filp_close(filp, files); } unlock_kernel(); --- /usr/src/linux-2.2.14/fs/select.c Wed Mar 29 22:47:05 2000 +++ fs/select.c Thu Jul 13 16:18:52 2000 @@ -39,7 +39,23 @@ * Linus noticed. -- jrs */ -static void free_wait(poll_table * p) +poll_table *init_wait(void) +{ + poll_table *wait_table; + + wait_table = (poll_table *) __get_free_page(GFP_KERNEL); + if (!wait_table) + return NULL; + + wait_table->nr = 0; + wait_table->entry = (struct poll_table_entry *)(wait_table + 1); + wait_table->next = NULL; + + return wait_table; +} + + +void free_wait(poll_table * p) { struct poll_table_entry * entry; poll_table *old; @@ -155,13 +171,9 @@ wait = wait_table = NULL; if (__timeout) { - wait_table = (poll_table *) __get_free_page(GFP_KERNEL); + wait_table = init_wait(); if (!wait_table) return -ENOMEM; - - wait_table->nr = 0; - wait_table->entry = (struct poll_table_entry *)(wait_table + 1); - wait_table->next = NULL; wait = wait_table; } @@ -388,7 +400,7 @@ { int i, fdcount, err, size; struct pollfd * fds, *fds1; - poll_table *wait_table = NULL, *wait = NULL; + poll_table *wait = NULL; lock_kernel(); /* Do a sanity check on nfds ... */ @@ -406,13 +418,9 @@ err = -ENOMEM; if (timeout) { - wait_table = (poll_table *) __get_free_page(GFP_KERNEL); - if (!wait_table) + wait = init_wait(); + if (!wait) goto out; - wait_table->nr = 0; - wait_table->entry = (struct poll_table_entry *)(wait_table + 1); - wait_table->next = NULL; - wait = wait_table; } size = nfds * sizeof(struct pollfd); @@ -440,7 +448,7 @@ kfree(fds); out: if (wait) - free_wait(wait_table); + free_wait(wait); unlock_kernel(); return err; } --- /usr/src/linux-2.2.14/fs/file_table.c Wed Mar 29 22:47:48 2000 +++ fs/file_table.c Mon Jul 17 15:57:12 2000 @@ -83,6 +83,7 @@ f->f_version = ++global_event; f->f_uid = current->fsuid; f->f_gid = current->fsgid; + f->f_dplock = RW_LOCK_UNLOCKED; put_inuse(f); return f; } --- /dev/null Tue May 5 16:32:27 1998 +++ include/linux/devpoll.h Wed Jul 26 15:03:33 2000 @@ -0,0 +1,126 @@ +/* $Id: devpoll.h,v 1.14 2000/07/26 19:03:33 provos Exp $ + * + * /dev/poll + * by Niels Provos + */ + +#ifndef _LINUX_DEVPOLL_H +#define _LINUX_DEVPOLL_H + +#include +#include + +/* + * List definitions adapted from *BSD sys/queue.h. + * I found the Linux macros to be too cumbersome to work with. + */ +#define DLIST_HEAD(name, type) \ +struct name { \ + struct type *lh_first; /* first element */ \ +} + +#define DLIST_ENTRY(type) \ +struct { \ + struct type *le_next; /* next element */ \ + struct type **le_prev; /* address of previous next element */ \ +} + +/* + * List access methods + */ +#define DLIST_FIRST(head) ((head)->lh_first) +#define DLIST_END(head) NULL +#define DLIST_NEXT(elm, field) ((elm)->field.le_next) + +/* + * List functions. + */ +#define DLIST_INIT(head) do { \ + DLIST_FIRST(head) = DLIST_END(head); \ +} while (0) + +#define DLIST_INSERT_HEAD(head, elm, field) do { \ + if (((elm)->field.le_next = (head)->lh_first) != NULL) \ + (head)->lh_first->field.le_prev = &(elm)->field.le_next;\ + (head)->lh_first = (elm); \ + (elm)->field.le_prev = &(head)->lh_first; \ +} while (0) + +#define DLIST_REMOVE(elm, field) do { \ + if ((elm)->field.le_next != NULL) \ + (elm)->field.le_next->field.le_prev = \ + (elm)->field.le_prev; \ + *(elm)->field.le_prev = (elm)->field.le_next; \ +} while (0) + +#define DPH_DIRTY 0 /* entry is dirty - bit */ +#define DPH_BACKMAP 1 /* file has an fd back map - bit*/ + +struct dp_fd { + DLIST_ENTRY(dp_fd) next; + struct pollfd pfd; + int flags; /* for hinting */ +}; + +DLIST_HEAD(fd_list, dp_fd); + +struct devpoll { + struct fd_list *dp_tab; + int dp_entries; /* Entries in hash table */ + int dp_max; /* statistics */ + int dp_avg; /* more */ + int dp_count; + int dp_cached; + int dp_calls; + int dp_bucket_bits; + int dp_bucket_mask; + int dp_nfds; /* Number of poll fds */ + int dp_mmaped; /* Are we mmapped */ + u_char *dp_mmap; /* Memory that has been allocated */ + rwlock_t dp_lock; +}; + +/* Match solaris */ + +struct dvpoll { + struct pollfd * dp_fds; /* Leave this ZERO for mmap */ + int dp_nfds; + int dp_timeout; +}; + + +#define DEVPOLL_MINOR 125 /* Minor device # for /dev/poll */ + + +#define DP_MMAP_SIZE(x) ((x) * sizeof(struct pollfd)) + +#define DP_ALLOC _IOR('P', 1, int) +#define DP_POLL _IOWR('P', 2, struct dvpoll) +#define DP_FREE _IO('P', 3) +#define DP_ISPOLLED _IOWR('P', 4, struct pollfd) + +#ifdef __KERNEL__ +/* Function Prototypes */ + +extern inline void +dp_add_hint(struct poll_backmap **map, rwlock_t *lock) +{ + struct poll_backmap *entry; + struct dp_fd *dpfd; + + if (!map) + return; + + read_lock(lock); + entry = *map; + while (entry) { + dpfd = entry->arg; + set_bit(DPH_DIRTY, &dpfd->flags); /* atomic */ + entry = entry->next; + } + read_unlock(lock); +} +#endif /* __KERNEL__ */ + +#endif + --- /usr/src/linux-2.2.14/include/linux/fs.h Wed Mar 29 23:42:12 2000 +++ include/linux/fs.h Wed Jul 26 14:40:05 2000 @@ -164,6 +164,7 @@ #include #include #include +#include extern void update_atime (struct inode *inode); #define UPDATE_ATIME(inode) update_atime (inode) @@ -424,6 +425,10 @@ int f_error; unsigned long f_version; + + /* used by /dev/poll hinting */ + struct poll_backmap *f_backmap; + rwlock_t f_dplock; /* needed for tty driver, and maybe others */ void *private_data; --- /usr/src/linux-2.2.14/include/linux/poll.h Wed Mar 29 23:42:13 2000 +++ include/linux/poll.h Wed Jul 26 14:40:11 2000 @@ -3,11 +3,18 @@ #include +/* These defines are MI and should not be in asm/poll.h, somebody needs to + * clean that up. + */ +#define POLLREMOVE 0x1000 +#define POLLHINT 0x2000 + #ifdef __KERNEL__ #include #include #include +#include #include @@ -23,6 +30,13 @@ struct poll_table_entry * entry; } poll_table; +struct poll_backmap { + struct poll_backmap *next; + void *arg; /* pointer to devpoll */ + struct files_struct *files; /* files which has this file as */ + int fd; /* file descriptor number fd */ +}; + #define __MAX_POLL_TABLE_ENTRIES ((PAGE_SIZE - sizeof (poll_table)) / sizeof (struct poll_table_entry)) extern void __pollwait(struct file * filp, struct wait_queue ** wait_address, poll_table *p); @@ -100,7 +114,91 @@ memset(fdset, 0, FDS_BYTES(nr)); } +extern inline void +poll_backmap(int fd, void *arg, struct poll_backmap ** entry) +{ + struct poll_backmap *tmp; + + if (!entry) + return; + + /* + * See if we have an entry in the backmap already, in general + * we expect this linked list to be very short. + */ + tmp = *entry; + while (tmp != NULL) { + if (tmp->files == current->files && tmp->fd == fd && + arg == tmp->arg) + return; + tmp = tmp->next; + } + + tmp = (struct poll_backmap *) kmalloc(sizeof(*entry), GFP_KERNEL); + if (tmp == NULL) + return; + + tmp->arg = arg; + tmp->files = current->files; + tmp->fd = fd; + tmp->next = *entry; + + *entry = tmp; +} + +extern inline void +poll_remove_backmap(struct poll_backmap **map, int fd, + struct files_struct *files) +{ + struct poll_backmap *tmp = *map, *old = NULL; + + while (tmp != NULL) { + if (tmp->files == files && tmp->fd == fd) { + struct poll_backmap *next = tmp->next; + if (old == NULL) + *map = next; + else + old->next = next; + kfree (tmp); + tmp = next; + } else { + old = tmp; + tmp = tmp->next; + } + } + + if (!tmp) + return; + + if (old == NULL) + *map = tmp->next; + else + old->next = tmp->next; + + kfree (tmp); +} + +extern inline void +poll_clean_backmap(struct poll_backmap **map) +{ + struct poll_backmap *tmp = *map, *old; + + printk("poll_clean_backmap: map %p\n", map); + printk("poll_clean_backmap: *map %p\n", *map); + + while (tmp) { + printk("poll_clean_backmap: tmp %p\n", tmp); + old = tmp; + tmp = tmp->next; + kfree (old); + } + + *map = NULL; +} + extern int do_select(int n, fd_set_bits *fds, long *timeout); +extern poll_table *init_wait(void); +extern void free_wait(poll_table *p); #endif /* KERNEL */ --- /usr/src/linux-2.2.14/include/net/sock.h Wed Mar 29 23:43:22 2000 +++ include/net/sock.h Wed Jul 26 14:41:20 2000 @@ -524,6 +524,10 @@ /* Identd */ struct socket *socket; + /* For Poll hinting */ + void *backmap; + void *dplock; + /* RPC layer private data */ void *user_data; --- /usr/src/linux-2.2.14/net/core/datagram.c Wed Mar 29 22:46:32 2000 +++ net/core/datagram.c Thu Jul 13 16:29:02 2000 @@ -218,7 +218,9 @@ unsigned int mask; poll_wait(file, sk->sleep, wait); - mask = 0; + sk->backmap = &file->f_backmap; + sk->dplock = &file->f_dplock; + mask = POLLHINT; /* exceptional events? */ if (sk->err || !skb_queue_empty(&sk->error_queue)) --- /usr/src/linux-2.2.14/net/core/sock.c Wed Mar 29 22:46:27 2000 +++ net/core/sock.c Thu Jul 13 18:28:12 2000 @@ -7,7 +7,7 @@ * handler for protocols to use and generic option handler. * * - * Version: $Id: sock.c,v 1.80 1999/05/08 03:04:34 davem Exp $ + * Version: $Id: sock.c,v 1.3 2000/07/13 22:28:12 provos Exp $ * * Authors: Ross Biro, * Fred N. van Kempen, @@ -106,6 +106,7 @@ #include #include #include +#include #include #include @@ -974,13 +975,16 @@ void sock_def_wakeup(struct sock *sk) { - if(!sk->dead) + if(!sk->dead) { + dp_add_hint(sk->backmap, sk->dplock); wake_up_interruptible(sk->sleep); + } } void sock_def_error_report(struct sock *sk) { if (!sk->dead) { + dp_add_hint(sk->backmap, sk->dplock); wake_up_interruptible(sk->sleep); sock_wake_async(sk->socket,0); } @@ -989,6 +993,7 @@ void sock_def_readable(struct sock *sk, int len) { if(!sk->dead) { + dp_add_hint(sk->backmap, sk->dplock); wake_up_interruptible(sk->sleep); sock_wake_async(sk->socket,1); } @@ -1001,6 +1006,7 @@ */ if(!sk->dead && ((atomic_read(&sk->wmem_alloc) << 1) <= sk->sndbuf)) { + dp_add_hint(sk->backmap, sk->dplock); wake_up_interruptible(sk->sleep); /* Should agree with poll, otherwise some programs break */ @@ -1030,6 +1036,9 @@ sk->state = TCP_CLOSE; sk->zapped = 1; sk->socket = sock; + + sk->backmap = NULL; + sk->dplock = NULL; if(sock) { --- /usr/src/linux-2.2.14/net/ipv4/af_inet.c Wed Mar 29 22:47:00 2000 +++ net/ipv4/af_inet.c Thu Jul 13 18:28:12 2000 @@ -5,7 +5,7 @@ * * PF_INET protocol family socket handler. * - * Version: $Id: af_inet.c,v 1.87.2.5 1999/08/08 08:43:10 davem Exp $ + * Version: $Id: af_inet.c,v 1.3 2000/07/13 22:28:12 provos Exp $ * * Authors: Ross Biro, * Fred N. van Kempen, @@ -490,6 +490,8 @@ } sock->sk = NULL; sk->socket = NULL; + sk->backmap = NULL; + sk->dplock = NULL; sk->prot->close(sk, timeout); } return(0); --- /usr/src/linux-2.2.14/net/ipv4/tcp.c Wed Mar 29 22:47:51 2000 +++ net/ipv4/tcp.c Thu Jul 13 18:28:12 2000 @@ -5,7 +5,7 @@ * * Implementation of the Transmission Control Protocol(TCP). * - * Version: $Id: tcp.c,v 1.140.2.5 1999/09/23 19:21:16 davem Exp $ + * Version: $Id: tcp.c,v 1.4 2000/07/13 22:28:12 provos Exp $ * * Authors: Ross Biro, * Fred N. van Kempen, @@ -415,6 +415,7 @@ #include #include #include +#include #include #include @@ -558,8 +559,10 @@ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); poll_wait(file, sk->sleep, wait); + sk->backmap = &file->f_backmap; + sk->dplock = &file->f_dplock; if (sk->state == TCP_LISTEN) - return tcp_listen_poll(sk, wait); + return tcp_listen_poll(sk, wait) | POLLHINT; mask = 0; if (sk->err) @@ -600,7 +603,7 @@ if (tp->urg_data & URG_VALID) mask |= POLLPRI; } - return mask; + return mask | POLLHINT; } /* @@ -612,6 +615,7 @@ if (sk->dead) return; + dp_add_hint(sk->backmap, sk->dplock); wake_up_interruptible(sk->sleep); if (sock_wspace(sk) >= tcp_min_write_space(sk)) @@ -867,8 +871,10 @@ if (PSH_NEEDED) TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; - if (--tp->partial_writers > 0) + if (--tp->partial_writers > 0) { + dp_add_hint(sk->backmap, sk->dplock); wake_up_interruptible(sk->sleep); + } continue; } --- /usr/src/linux-2.2.14/net/unix/af_unix.c Wed Mar 29 22:47:01 2000 +++ net/unix/af_unix.c Thu Jul 13 18:28:13 2000 @@ -8,7 +8,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * - * Version: $Id: af_unix.c,v 1.76.2.2 1999/08/07 10:56:48 davem Exp $ + * Version: $Id: af_unix.c,v 1.3 2000/07/13 22:28:13 provos Exp $ * * Fixes: * Linus Torvalds : Assorted bug cures. @@ -103,6 +103,7 @@ #include #include #include +#include #include #include @@ -1483,7 +1484,9 @@ unsigned int mask; poll_wait(file, sk->sleep, wait); - mask = 0; + sk->backmap = &file->f_backmap; + sk->dplock = &file->f_dplock; + mask = POLLHINT; /* exceptional events? */ if (sk->err) @@ -1513,6 +1516,7 @@ { if (sk->dead) return; + dp_add_hint(sk->backmap, sk->dplock); wake_up_interruptible(sk->sleep); if (sk->sndbuf - (int)atomic_read(&sk->wmem_alloc) >= MIN_WRITE_SPACE) sock_wake_async(sk->socket, 2); --- /usr/src/linux-2.2.14/drivers/char/mem.c Wed Mar 29 22:47:35 2000 +++ drivers/char/mem.c Wed Jul 26 14:31:33 2000 @@ -64,6 +64,9 @@ #ifdef CONFIG_USB_OHCI_HCD int ohci_hcd_init(void); #endif +#ifdef CONFIG_DEVPOLL +int devpoll_init(void); +#endif static ssize_t do_write_mem(struct file * file, void *p, unsigned long realp, const char * buf, size_t count, loff_t *ppos) @@ -687,6 +690,9 @@ #endif #ifdef CONFIG_PHONE telephony_init(); +#endif +#ifdef CONFIG_DEVPOLL + devpoll_init(); #endif return 0; }