/* 
 * Mach Operating System
 * Copyright (c) 1988 Carnegie-Mellon University
 * Copyright (c) 1987 Carnegie-Mellon University
 * All rights reserved.  The CMU software License Agreement specifies
 * the terms and conditions for use and redistribution.
 */

/*
 *	File:	inode_pager.c
 *
 *	"Swap" pager that pages to/from Unix inodes.  Also
 *	handles demand paging from files.
 *
 * HISTORY
 * $Log:	inode_pager.c,v $
 * Revision 2.15  89/01/15  16:39:41  rpd
 * 	Updated includes for the new mach/ directory.
 * 	Use decl_simple_lock_data, simple_lock_addr.
 * 	[89/01/15  15:27:58  rpd]
 * 
 * Revision 2.14  89/01/12  11:16:20  rpd
 * 	Added <kern/std_types.h>.
 * 
 * Revision 2.13  89/01/10  23:33:43  rpd
 * 	Check return status of inode_pager_default allocation.
 * 	[89/01/10  13:36:37  rpd]
 * 	
 * 	Use object_copyin/object_copyout instead of port_copyin/port_copyout.
 * 	[89/01/09  14:50:15  rpd]
 * 
 * Revision 2.12  88/12/19  02:57:32  mwyoung
 * 	Remove more lint.
 * 	[88/12/18            mwyoung]
 * 	
 * 	Identify our task.
 * 	[88/12/12            mwyoung]
 * 	
 * 	Don't iput when encountering a previous paging error in
 * 	inode_pager_timeout, because the iget hasn't been done yet.
 * 	[88/12/06            mwyoung]
 * 	
 * 	MACH_XP: Allow paging to filesystems with large block sizes (by using
 * 	whole filesystem pages for a virtual page anyway).
 * 	
 * 	Change iswap_allocate() to account for wrong block sizes and
 * 	nearly-full filesystems.  Take preferences more seriously.
 * 	Print warnings when paging space is found in later (and less
 * 	desirable) passes.
 * 	[88/12/05            mwyoung]
 * 	
 * 	Use new XPR tags.
 * 	[88/11/22  01:15:22  mwyoung]
 * 	
 * 	Still check for client count in memory_object_terminate handler.
 * 	Restructure that routine to get the required operations out of
 * 	the way first.
 * 	[88/11/17  01:49:45  mwyoung]
 * 	
 * 	Prevent memory_object_terminate from destroying a memory_object
 * 	when another task has send rights.
 * 	
 * 	Forge a memory_object_terminate message when releasing a
 * 	memory_object (acquired through inode_pager_setup()) that
 * 	hasn't been used for mapping.  To do so, the reference must
 * 	be held, but this reference shouldn't prevent destruction.
 * 	Thus, these "dead" references are counted.  But this means
 * 	that memory_object_terminate can't throw away the inode_pager_t
 * 	if there are dead references; therefore, we mark it for
 * 	destruction by the last dead reference.
 * 	
 * 	All this owes to the lack of NO_MORE_SENDERS technology and the
 * 	fact that references are created behind the inode_pager's back
 * 	(in inode_pager_setup()).
 * 	[88/11/10            mwyoung]
 * 	
 * 	Added an empty handler for memory_object_copy().
 * 	[88/11/09            mwyoung]
 * 	
 * 	Fix the non-XP form of inode_uncache_try() also.
 * 	[88/11/06            mwyoung]
 * 	
 * 	Unify the versions of inode_read and inode_write somewhat by
 * 	defining the pager_return_t type internally for MACH_XP.
 * 	[88/11/04            mwyoung]
 * 	
 * 	Clean up inode_pager_t->pager and inode_pager_t->pager_global
 * 	during termination.
 * 	[88/11/04            mwyoung]
 * 	
 * 	Correct copy strategy setting in caching-related calls.  Allow
 * 	strategy to be boot/runtime settable for testing.
 * 	[88/11/03  19:14:07  mwyoung]
 * 
 * Revision 2.11  88/11/14  15:05:28  gm0w
 * 	10-14-88  Michael Young (mwyoung) at Carnegie Mellon
 * 	Recover lost line in inode_pager_terminate_object.
 * 	[88/11/14  14:57:39  mrt]
 * 
 * Revision 2.10  88/10/18  03:42:25  mwyoung
 * 	Eliminate uses of vm_object_uncache in non-MACH_XP clauses.
 * 	[88/10/15            mwyoung]
 * 	
 * 	Documented the data structures and protocols for future reference.
 * 	
 * 	Converted to use all new IPC and memory management primitives, so that
 * 	compatibility code can be destroyed.  Corrected port reference handling.
 * 	
 * 	Allow more than one client to use a memory object.  This can happen
 * 	either because of a race condition during shutdown, or because a client
 * 	has passed on access to the memory object to another host.  [In the
 * 	latter case, we make no consistency guarantees, but try not to crash :-).]
 * 	[88/10/02  17:08:22  mwyoung]
 * 	
 * 	Use the pager_request port for all uncaching operations...
 * 	vm_object_lookup only accepts the control port now.
 * 	
 * 	Add inode_uncache_try(), so that all caching knowledge is buried
 * 	in the inode_pager.  It's unclear why inode_pager_active() isn't
 * 	sufficient to find out, but I don't care to find out why right yet.
 * 	[88/09/18            mwyoung]
 * 	
 * 	Add some tracing code.
 * 	[88/09/18  19:01:22  mwyoung]
 * 	
 * 	Don't use iget() within iswap_allocate(), as it may take a lock
 * 	on a directory inode.  There's no reason that we need a real
 * 	inode anyway.
 * 	[88/08/31            mwyoung]
 * 
 * Revision 2.9  88/08/25  18:24:55  mwyoung
 * 	Corrected include file references.
 * 	[88/08/22            mwyoung]
 * 	
 * 	Use memory_object_data_error when throwing away a pagein
 * 	request, so that the offending thread can get an exception.
 * 	[88/08/20  03:05:31  mwyoung]
 * 	
 * 	MACH_XP: Make a feeble effort to deal with running out of paging
 * 	space -- remember errors on a per-object basis, and throw away
 * 	future requests for error-ridden objects.  [A better solution
 * 	might put aside messages when an error occurs.]
 * 	[88/08/16  04:17:51  mwyoung]
 * 	
 * 	Initialize inode_pager_t->paging inside inode_pager_create.
 * 	[88/08/16  00:32:02  mwyoung]
 * 	
 * 	Add inode_pager_release() to get rid of whatever
 * 	inode_pager_setup() gives out.
 * 	Add size argument to memory_object_create.
 * 	Handle new memory_object_terminate call.
 * 	Remove port_copyout of memory_manager_default to kernel_task.
 * 	[88/08/11  18:52:17  mwyoung]
 * 
 * Revision 2.8  88/08/06  19:24:11  rpd
 * Eliminated use of kern/mach_ipc_defs.h.
 * 
 * Revision 2.7  88/07/23  01:21:49  rpd
 * Changed more occurrences of port_enable to xxx_port_enable.
 * 
 * Revision 2.6  88/07/20  21:09:51  rpd
 * Use old forms of port_allocate and port_enable.
 * 
 * 15-Mar-88  Michael Young (mwyoung) at Carnegie-Mellon University
 *	Attempt to balance pagein requests with pageouts.
 *
 *  8-Mar-88  Michael Young (mwyoung) at Carnegie-Mellon University
 *	Actually free port hash table records.
 *
 * 24-Feb-88  David Golub (dbg) at Carnegie-Mellon University
 *	Handle IO errors on paging (non-XP only).
 *
 * 18-Jan-88  Michael Young (mwyoung) at Carnegie-Mellon University
 *	Delinted.
 *
 *  6-Jan-88  Michael Young (mwyoung) at Carnegie-Mellon University
 *	Use pager_data_unavailable when inode_read fails in inode_pagein!.
 *
 *  6-Dec-87  Michael Young (mwyoung) at Carnegie-Mellon University
 *	Move pager_cache() call and text bit handling inside
 *	inode_pager_setup().
 *
 * 24-Nov-87  Michael Young (mwyoung) at Carnegie-Mellon University
 *	Make all external calls other than inode manipulations go
 *	through IPC.
 *
 *	Condensed history:
 *		Try to avoid deadlock when allocating data structures
 *		 (avie, mwyoung).
 *		Try to printf rather than panic when faced with errors (avie).
 *		"No buffer code" enhancements. (avie)
 *		External paging version. (mwyoung, bolosky)
 *		Allow pageout to ask whether a page has been
 *		 written out.  (dbg)
 *		Keep only a pool of in-core inodes.  (avie)
 *		Use readahead when able. (avie)
 *		Require that inode operations occur on the master
 *		 processor (avie, rvb, dbg).
 *		Combine both "program text" and "normal file" handling
 *		 into one. (avie, mwyoung)
 *		Allocate paging inodes on mounted filesystems (mja);
 *		 allow preferences to be supplied (mwyoung).
 *
 * 12-Mar-86  David Golub (dbg) at Carnegie-Mellon University
 *	Created.
 */

#include <mach_nbc.h>
#include <mach_xp.h>

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/map.h>
#include <sys/dir.h>
#include <sys/user.h>
#include <sys/fs.h>
#include <sys/buf.h>
#include <sys/uio.h>
#include <sys/inode.h>
#include <sys/mount.h>

#include <mach/std_types.h>
#include <mach/mach_types.h>
#include <vm/vm_page.h>
#include <vm/vm_map.h>
#include <vm/vm_kern.h>
#include <kern/parallel.h>
#include <sys/zalloc.h>

#if	MACH_XP
#include <vm/vm_object.h>
#include <sys/task.h>
#include <vm/vm_param.h>
#include <sys/kern_obj.h>
#include <sys/kern_port.h>
#include <kern/ipc_copyin.h>
#include <kern/ipc_copyout.h>
#include <sys/queue.h>
#endif	MACH_XP
#include <sys/port.h>
#include <vm/inode_pager.h>
#include <sys/mfs.h>

#if	MACH_XP
#include <vm/memory_object.h>
#else	MACH_XP
#include <vm/vm_pager.h>
#endif	MACH_XP

#include <sys/xpr.h>

#ifndef	private
#define	private
#endif	private

boolean_t	inode_pager_debug = FALSE;


/*
 *	Inode_pager data structure protocols:
 *
 *	The basic data structure is the inode_pager_t, which
 *	represents a particular (on-disk) inode, and thus a
 *	particular memory object.
 *
 *	Because of other demands for in-memory inodes, the
 *	inode_pager cannot consume too many inode structures.
 *	The inode_pager_iget() routine is used to get a real
 *	inode (including one inode reference) for a particular
 *	inode_pager_t; the inode_pager_iput() routine allows
 *	the inode to be recycled.  [The inode_pager_iget() may
 *	be called multiple times, though different threads.
 *	The count of inode_pager_iget() references is maintained
 *	internally, and does not get reflected in the actual
 *	inode reference count (other than the original call).]
 *
 *	Inode_pager objects can be created through either of
 *	two mechanisms:
 *		User mapping request:
 *			a user requests a memory object that
 *			corresponds to a particular inode;
 *		Kernel temporary object:
 *			the kernel provides a memory object that
 *			it uses for paging temporary data.
 *
 *	To satisfy a user request, the inode_pager must first find
 *	the memory_object associated with that inode if it has
 *	already been created.  [The inode must be resident, because
 *	the requestor must have a reference.]  Thus, the inode
 *	structure has been modified to contain the memory_object port.
 *
 *	This field in the inode is also used to prevent the
 *	destruction/truncation of inodes used strictly for paging.
 *
 *	Currently, the routines in the inode_pager are called
 *	from either of two sources:
 *		Inode_pager task:
 *			A separate singly-threaded task is created
 *			to service the memory management interface.
 *		Kernel-state user threads:
 *			Mapping and cache control requests are made
 *			from the kernel context of the client thread.
 *	The kernel-context threads, as well as outside use of the inode
 *	data structure, requires the inode_pager to synchronize as
 *	though it were multi-threaded.
 *
 *	The routines that act within the inode_pager task make all
 *	Mach kernel function requests through the RPC interface, and
 *	thus use the non-KERNEL data types for data objects (tasks,
 *	memory objects, ports).  Currently, the value of task_self()
 *	for the inode_pager task is cached in the inode_pager_self
 *	variable.
 *
 *	Despite being a separate task, the inode_pager runs within
 *	the kernel virtual address space, so that it may directly access
 *	inode data structures.  Memory allocation may also be done
 *	using the internal kernel functions.
 *
 *	The kernel-context threads also complicate port management.
 *	In addition to maintaining the port names known to the inode_pager
 *	task (including conversion between port names and inode_pager_t's),
 *	the data structures must contain the global names for the
 *	memory_object and memory_object_control ports.
 *
 *	Port rights and references are maintained as follows:
 *		Memory object port:
 *			The inode_pager task has all rights, and
 *			keeps one reference for the global name stored
 *			with the inode_pager_t structure.  [The port
 *			recorded in the inode is the global name, and is
 *			copied from the	inode_pager_t, sharing that
 *			reference.]
 *		Memory object control port:
 *			The inode_pager task has only send rights,
 *			and keeps one reference for the global name
 *			it stores.  [As with the memory_object port,
 *			the global name is copied into the inode itself,
 *			so that control functions can be instigated by
 *			kernel-context client threads.]
 *		Memory object name port:
 *			The inode_pager task has only send rights.
 *			The name port is not even recorded.
 *	Regardless how the object is created, the control and name
 *	ports are created by the kernel and passed through the memory
 *	management interface.
 *
 *	The inode_pager assumes that access to its memory objects
 *	will not be propagated to more that one host, and therefore
 *	provides no consistency guarantees beyond those made by the
 *	kernel.
 *
 *	In the event that more than one host attempts to use an inode
 *	memory object, the inode_pager will only record the last set
 *	of port names.  [This can happen with only one host if a new
 *	mapping is being established while termination of all previous
 *	mappings is taking place.]  Currently, the inode_pager assumes
 *	that its clients adhere to the initialization and termination
 *	protocols in the memory management interface; otherwise, port
 *	rights or out-of-line memory from erroneous messages may be
 *	allowed to accumulate.
 *
 *	As mentioned above, the inode_pager can also provide the backing
 *	storage for temporary memory objects.  Thus, it must adhere to
 *	the restrictions placed on default memory managers for those
 *	temporary objects (and currently, for other objects as well).
 *	
 *	[The phrase "currently" has been used above to denote aspects of
 *	the implementation that could be altered without changing the rest
 *	of the basic documentation.]
 */

/*
 *	Basic inode pager structure
 */

typedef enum {
		IS_OUT,
		IS_IN_MEMORY,
		IS_BUSY
	} inode_pager_state_t;

typedef struct istruct {
#if	!MACH_XP
	boolean_t	is_device;	/* Must be first -- see vm_pager.h */
#endif	!MACH_XP
	queue_chain_t	lru_links;	/* LRU queue links */
	int		client_count;	/* How many memory_object_init's
					 * have we received
					 */
	int		dead_references;/* Number of references that don't
					 * prevent destruction
					 */
	boolean_t	free_me;	/* Should the last dead reference
					 * free the structure?
					 * A poor man's reference count.
					 */
	int		use_count;	/* How many times is the inode
					 * in use?
					 */
	inode_pager_state_t state;	/* istruct state */
	memory_object_t	pager;		/* Pager port */
	port_t		pager_request;	/* Known request port */
	port_t		pager_name;	/* Known name port */
	memory_object_t	pager_global;	/* XXX global name */
	port_t		pager_request_global; /* XXX (global) request port */
	int		errors;		/* Pageout error count */
	int		paging_shift;	/* How to adjust for fs block size */

	unsigned int
	/* boolean_t */	queued:1,	/* on LRU queue? */
			cached:1,	/* Can be cached? */
			paging:1;	/* inode used for paging */
	union {
		struct {
			dev_t		dev;	/* device */
			struct fs	*fs;	/* file system */
			ino_t		ino;	/* inode */
		} fs_ip;
		struct inode	*ip;	/* in memory inode */
	} inode;
} *inode_pager_t;

#define	INODE_PAGER_NULL	((inode_pager_t) 0)

#if	MACH_XP
task_t		inode_pager_task;
port_t		inode_pager_self;
port_set_name_t	inode_pager_enabled_set;

decl_simple_lock_data(,inode_pager_init_lock)

/*
 *	Stuff for keeping a hash table of ports for inode_pager
 *	backed objects.
 */

#define		INODE_PORT_HASH_COUNT	127
typedef struct inode_port_entry {
		queue_chain_t	links;
		port_t		name;
		inode_pager_t	pager_rec;
} *inode_port_entry_t;
#define		INODE_PAGER_NULL	((inode_pager_t) 0)

private
queue_head_t	inode_port_hashtable[INODE_PORT_HASH_COUNT];
private
zone_t		inode_port_hash_zone;


#define		inode_port_hash(name_port) \
			(((int)(name_port) & 0xffffff) % INODE_PORT_HASH_COUNT)

private
void		inode_port_hash_insert(name_port, rec)
	port_t		name_port;
	inode_pager_t	rec;
{
	register
	inode_port_entry_t	new_entry =
		(inode_port_entry_t) zalloc(inode_port_hash_zone);

	new_entry->name = name_port;
	new_entry->pager_rec = rec;
	queue_enter(&inode_port_hashtable[inode_port_hash(name_port)],
			new_entry, inode_port_entry_t, links);
}

private
void		inode_port_hash_init()
{
	register	i;

	inode_port_hash_zone = zinit((vm_size_t) sizeof(struct inode_port_entry),
			(vm_size_t) 10 * ninode * sizeof(struct inode_port_entry),
			PAGE_SIZE, FALSE, "inode_pager port hash");

	for (i = 0; i < INODE_PORT_HASH_COUNT; i++) 
		queue_init(&inode_port_hashtable[i]);
}

private
inode_pager_t	inode_pager_lookup(name_port)
	register port_t name_port;
{
	register
	queue_t bucket =
		(queue_t) &inode_port_hashtable[inode_port_hash(name_port)];
	register
	inode_port_entry_t entry =
		 (inode_port_entry_t) queue_first(bucket);

	while (!queue_end(bucket,&entry->links)) {
		if (entry->name == name_port)
			return(entry->pager_rec);
		entry = (inode_port_entry_t)queue_next(&entry->links);
	}
	return(INODE_PAGER_NULL);
}

private
inode_pager_t	inode_pager_lookup_external(name_port)
	port_t		name_port;
{
	port_reference((kern_port_t) name_port);
	object_copyout(inode_pager_task, (kern_obj_t) name_port,
		       MSG_TYPE_PORT, &name_port);
	return(inode_pager_lookup(name_port));
}


private
void		inode_port_hash_delete(name_port)
	register name_port;
{
	register queue_t bucket = (queue_t) &inode_port_hashtable[inode_port_hash(name_port)];
	register inode_port_entry_t entry = (inode_port_entry_t) queue_first(bucket);

	while (!queue_end(bucket,&entry->links)) {
		if (entry->name == name_port) {
			queue_remove(bucket, entry, inode_port_entry_t,links);
			zfree(inode_port_hash_zone, (vm_offset_t) entry);
			break;
		}
		entry = (inode_port_entry_t)queue_next(&entry->links);
	}
}
#else	MACH_XP

#define	inode_pager_lookup(pager)		((inode_pager_t) pager)
#define	inode_pager_lookup_external(pager)	((inode_pager_t) pager)

#endif	MACH_XP

#if	MACH_NBC > 0
#define	NNBC	0	/* Doesn't work yet? */
#endif	MACH_NBC > 0

#if	NNBC > 0
#include <sys/conf.h>
#include <vm/vm_pageout.h>

extern struct buf *ipb_get();
extern void ipb_put();
#endif	NNBC > 0

/*
 *	Basic inode pager data structures
 */

int			istruct_qcount;		/* # in the queue */
int			istruct_qmax;		/* max to put in queue */
int			istruct_released;	/* # not in memory */
zone_t			istruct_zone;
queue_head_t		istruct_queue;
decl_simple_lock_data(,	istruct_lock)

#if	MACH_XP
void		inode_uncache(ip)
	struct inode	*ip;
{
	if (ip->vm_info->pager_request != PORT_NULL)
		memory_object_set_attributes(vm_object_lookup(ip->vm_info->pager_request),
			TRUE, FALSE, MEMORY_OBJECT_COPY_DELAY);
}
boolean_t	inode_uncache_try(ip)
	struct inode	*ip;
{
	vm_object_t	o;

	if (ip->vm_info->pager_request == PORT_NULL)
		return(FALSE);

	if ((o = vm_object_lookup(ip->vm_info->pager_request)) == VM_OBJECT_NULL) {
		/*
		 *	If there's no memory object for this port, it
		 *	must have been memory_object_terminate'd, and
		 *	thus is not in the cache.
		 */
		return(TRUE);
	}

	if (o->ref_count == 1) {
		memory_object_set_attributes(o, TRUE, FALSE, MEMORY_OBJECT_COPY_DELAY);
		return(TRUE);
	}
	vm_object_deallocate(o);
	return(FALSE);
}
#endif	MACH_XP

/*
 *	Routine:	inode_pager_iput
 *	Function:
 *		Release one use of this inode_pager_t
 *	Implementation:
 *		Put this inode_pager structure at the end of
 *		an LRU reclaim list.  If over the limit on that list,
 *		take some elements off, releasing their in-memory inode
 *		references.
 */
private
void		inode_pager_iput(is)
	register inode_pager_t	is;
{
	register struct inode *ip;

	simple_lock(&istruct_lock);
	is->use_count--;

	assert(is->use_count >= 0);

	ip = is->inode.ip;

	/*
	 *	Only queue inodes used for paging (?)
	 */

	if (is->paging) {
		/*
		 *	If it's currently on the reclaim list (i.e.,
		 *	it's not a brand new structure), remove it from
		 *	its old place.
		 */
		if (is->queued) {
			queue_remove(&istruct_queue, is, inode_pager_t, lru_links);
			istruct_qcount--;
		}
		/*
		 *	Only queue structures which aren't in use by this
		 *	module.
		 */
		if (is->queued = (is->use_count == 0)) {
			queue_enter(&istruct_queue, is, inode_pager_t, lru_links);
			istruct_qcount++;
		}
	}

	/*
	 *	If we're over our quota
	 */

	while (istruct_qcount > istruct_qmax) {
		/*
		 *	Select the first candidate
		 */

		if (queue_empty(&istruct_queue))
			break;
		is = (inode_pager_t) queue_first(&istruct_queue);
		queue_remove(&istruct_queue, is, inode_pager_t, lru_links);
		istruct_qcount--;
		istruct_released++;

		/*
		 *	Save the inode's (dev, fs, ino) information.
		 */

		is->queued = FALSE;
		ip = is->inode.ip;
		is->inode.fs_ip.dev = ip->i_dev;
		is->inode.fs_ip.fs = ip->i_fs;
		is->inode.fs_ip.ino = ip->i_number;

		/*
		 *	Release the inode.  Note that we must drop the
		 *	inode_pager queue lock, so we mark this structure
		 *	as busy momentarily.
		 */

		is->state = IS_BUSY;
		simple_unlock(&istruct_lock);
		irele(ip);
		simple_lock(&istruct_lock);
		is->state = IS_OUT;
		thread_wakeup((int) is);
	}
	simple_unlock(&istruct_lock);
}

/*
 *	inode_pager_iget:
 *
 *	Get an in memory inode corresponding to the specified paging space
 *	and guarantee that it will remain in memory (until furthur action
 *	is taken).
 *
 *	If there is already an in memory inode, we just return it.  If there
 *	is no in memory inode, we do an iget to grab one.
 *
 *	The inode is returned unlocked.
 */
private
struct inode *inode_pager_iget(is)
	inode_pager_t	is;
{
	register struct inode *ip;

	if (is == INODE_PAGER_NULL) {
		panic("inode_pager_iget: null");
		return((struct inode *) 0);
	}

	/*
	 *	Remove possibility of garbage collection.
	 */
	simple_lock(&istruct_lock);
	is->use_count++;
	if (is->queued) {
		queue_remove(&istruct_queue, is, inode_pager_t, lru_links);
		istruct_qcount--;
		is->queued = FALSE;
	}
	while (is->state == IS_BUSY) {
		thread_sleep((int) is, simple_lock_addr(istruct_lock), FALSE);
		simple_lock(&istruct_lock);
	}

	if (is->state == IS_IN_MEMORY) {
		ip = is->inode.ip;
	} else {
		is->state = IS_BUSY;
		simple_unlock(&istruct_lock);
		ip = iget(is->inode.fs_ip.dev, is->inode.fs_ip.fs,
				is->inode.fs_ip.ino);

		if (ip == (struct inode *) 0)
			panic("inode_pager_iget: unable");

		ip->vm_info->pager = is->pager_global;
		ip->vm_info->pager_request = is->pager_request_global;

		iunlock(ip);
		simple_lock(&istruct_lock);
		istruct_released--;
		is->inode.ip = ip;
		is->state = IS_IN_MEMORY;
		thread_wakeup((int) is);
	}
	simple_unlock(&istruct_lock);
	return(ip);
}

/*
 *	inode_pager_create
 *
 *	Create an istruct corresponding to the given ip.
 *
 *	This may potentially cause other incore inodes to be
 *	released (but remembered by the istruct).
 */
private
inode_pager_t	inode_pager_create(ip, canwait, paging)
	register struct inode	*ip;
	boolean_t		canwait;
	boolean_t		paging;
{
	register inode_pager_t	is;

	/*
	 *	XXX This can still livelock -- if the
	 *	pageout daemon needs an inode_pager record
	 *	it won't get one until someone else
	 *	refills the zone.
	 */

#if	MACH_XP
#ifdef	lint
	canwait++;
#endif	lint
	is = (struct istruct *) zalloc(istruct_zone);
#else	MACH_XP
	if (canwait || (vm_page_free_count > 10))
		is = (struct istruct *) zalloc(istruct_zone);
	else
		is = (struct istruct *) zget(istruct_zone);
#endif	MACH_XP

	if (is == INODE_PAGER_NULL)
		return(is);

#if	MACH_XP
	is->pager_global = MEMORY_OBJECT_NULL;
	is->pager_request_global = PORT_NULL;
	is->pager = MEMORY_OBJECT_NULL;
	is->pager_request = PORT_NULL;
#else	MACH_XP
	is->is_device = FALSE;
	is->pager_request_global = is->pager_global = (memory_object_t) is;
#endif	MACH_XP
	is->client_count = 0;
	is->dead_references = 0;
	is->free_me = FALSE;
	is->use_count = 1;
	is->paging = paging;
	ip->vm_info->pager = is->pager_global;
	ip->vm_info->pager_request = is->pager_request_global;
	is->state = IS_IN_MEMORY;
	is->inode.ip = ip;
	is->queued = FALSE;
	is->errors = 0;

	is->paging_shift = 0;
	if (paging) {
		int		block_size;

		for (block_size = ip->i_fs->fs_bsize;
		     block_size > PAGE_SIZE;
		     block_size >>= 1)
			is->paging_shift++;
	}

	iincr_chk(ip);

	inode_pager_iput(is);

	XPR(XPR_INODE_PAGER, ("inode_pager_create: returning %x", (int) is));
	return(is);
}

/*
 *	Routine:	inode_pager_setup
 *	Purpose:
 *		Returns a memory object (that may be used in
 *		a call to vm_map) representing the given inode.
 *	Side effects:
 *		When the memory object returned by this call
 *		is no longer needed (e.g., it has been mapped
 *		into the desired address space), it should be
 *		deallocated using inode_pager_release.
 *	NOTE:
 *		This call does not run in the context of the 
 *		inode_pager task, and therefore must translate
 *		the ports it gets itself.
 */
memory_object_t	inode_pager_setup(ip, is_text, can_cache)
	struct inode	*ip;
	boolean_t	is_text;
	boolean_t	can_cache;
{
	unix_master();

	if (ip == (struct inode *) 0)
		return(MEMORY_OBJECT_NULL);

	if (is_text)
		ip->i_flag |= ITEXT;

	if (ip->vm_info->pager == MEMORY_OBJECT_NULL) {
#if	MACH_XP
		inode_pager_t	is = inode_pager_create(ip, TRUE, FALSE);

		if (is != INODE_PAGER_NULL) {
			if (port_allocate(inode_pager_task, &is->pager) != KERN_SUCCESS)
				panic("inode_pager_setup: can't allocate port");
			if (port_set_add(inode_pager_task,
					inode_pager_enabled_set,
					is->pager)
			    != KERN_SUCCESS)
				panic("inode_pager_setup: can't enable");

			inode_port_hash_insert(is->pager, is);

			/*
			 *	Get the global name for the port.
			 */

			(void) object_copyin(inode_pager_task, is->pager,
					     MSG_TYPE_PORT, FALSE,
					     (kern_obj_t *) &is->pager_global);

			/*
			 *	Save the global name in the inode.
			 *
			 *	[This step comes last so that we've got the association
			 *	entirely set up before the pager value can be grabbed
			 *	from the inode structure by someone else while we're
			 *	blocked in the port operations.]
			 */

			ip->vm_info->pager = is->pager_global;

			is->cached = can_cache;
		}
	}

	assert(ip->vm_info->pager != MEMORY_OBJECT_NULL);

	port_reference(ip->vm_info->pager);

#else	MACH_XP
		(void) inode_pager_create(ip, TRUE, FALSE);
		if (can_cache)
			pager_cache(vm_object_lookup(ip->vm_info->pager), TRUE);
	} 
#endif	MACH_XP

	unix_release();
	return(ip->vm_info->pager);
}

/*
 *	Routine:	inode_pager_no_more_senders
 *	Purpose:
 *		Tell whether there are senders (other than the
 *		number of given dead references) to the
 *		given object.
 */
boolean_t	inode_pager_no_more_senders(object, dead_references)
	memory_object_t	object;		/* GLOBAL name of memory object */
	int		dead_references;
{
	kern_obj_t	port_thingy = (kern_obj_t) object;
	int		count;

	if (object == PORT_NULL)
		return(TRUE);

	simple_lock(&port_thingy->obj_data_lock);
	count = port_thingy->obj_references;
	simple_unlock(&port_thingy->obj_data_lock);

	/*
	 *	If the inode_pager has the only port rights,
	 *	then there should be the following references:
	 *		1 (send, receive, owner) for inode_pager_task
	 *		1 port set membership
	 *		1 for an inode_pager_t->pager_global field
	 */

	return((count - dead_references) <= 3);
}

/*
 *	Routine:	inode_pager_release
 *	Purpose:
 *		Relinquish any references or rights that were
 *		associated with the result of a call to
 *		inode_pager_setup.
 *	NOTE:
 *		This call, like inode_pager_setup, does not run
 *		in the context of the inode_pager.
 */
void		inode_pager_release(object)
	memory_object_t	object;
{
#if	MACH_XP
	inode_pager_t	is = inode_pager_lookup_external(object);

	/*
	 *	If we have the only reference to the object,
	 *	and chose not to map it, we have to forge
	 *	the termination.  [Note that this is the
	 *	external form of memory_object_terminate,
	 *	and results in an IPC event that will be
	 *	processed in the inode_pager task context,
	 *	not ours.  We're in no hurry.]
	 *
	 *	We have to hold a reference in order to send
	 *	the termination message.  Unfortunately, that
	 *	reference might prevent the inode_pager task
	 *	from actually performing the intended destruction.
	 *	Thus, we attach to the inode_pager_t a count of
	 *	references that	don't affect destruction.
	 *
	 *	XXX If NO_MORE_SENDERS support becomes available,
	 *	we don't have to check this manually.
	 *	XXX Alternatively, if we could atomically make
	 *	the memory_object_terminate call and lose our
	 *	reference, then this can be greatly simplified...
	 *	but we don't want to make that sort of assumption
	 *	(i.e., that after a message is queued, the caller
	 *	won't be put to sleep) about the IPC system.
	 */

	unix_master();

	if (is != INODE_PAGER_NULL) {
		int		dead;

		dead = ++is->dead_references;
		if (inode_pager_no_more_senders(object, dead))
			memory_object_terminate(object, PORT_NULL, PORT_NULL);
	}

	if (object != MEMORY_OBJECT_NULL)
		port_release(object);

	if (is != INODE_PAGER_NULL) {
		if (--is->dead_references == 0 && is->free_me)
			zfree(istruct_zone, (vm_offset_t) is);
	}

	unix_release();
#else	MACH_XP
#ifdef	lint
	object++;
#endif	lint
#endif	MACH_XP
}

/*
 *	Routine:	inode_pager_active
 *	Purpose:
 *		Indicates whether the given memory object is
 *		in use as a text file.  [If not, it can be flushed
 *		from the memory cache and written as a file.]
 *	NOTE:
 *		This call does not run in the context of the inode_pager.
 */
boolean_t	inode_pager_active(pager)
	memory_object_t	pager;
{
	if (pager != MEMORY_OBJECT_NULL) {
		inode_pager_t	is;

		if ((is = inode_pager_lookup_external(pager)) == INODE_PAGER_NULL) {
			printf("inode_pager_active: lookup of pager 0x%x failed\n", pager);
			return(FALSE);
		}
		return(is->paging);
	}
	return(FALSE);
}

#if	MACH_XP
#define	PAGER_SUCCESS		0	/* page read or written */
#define	PAGER_ABSENT		1	/* pager does not have page */
#define	PAGER_ERROR		2	/* pager unable to read or write page */

typedef	int		pager_return_t;
#endif	MACH_XP

pager_return_t	inode_read();	/* forward */
pager_return_t	inode_write();	/* forward */

#if	MACH_XP
/*
 *	Make all calls involving the kernel interface go through IPC.
 */

#include <mach/mach_user_internal.h>

/*
 *	Rename all of the functions in the pager interface,
 *	to avoid confusing them with the kernel interface
 */

#define	memory_object_init		inode_pager_init_pager
#define	memory_object_terminate		inode_pager_terminate_object
#define	memory_object_data_request	inode_pagein
#define	memory_object_data_unlock	inode_pagein
#define	memory_object_data_write	inode_data_write
#define	memory_object_data_initialize	inode_data_initialize
#define	memory_object_create		inode_pager_create_pager
#define	memory_object_copy		inode_pager_copy
#define	memory_object_lock_completed	inode_pager_lock_completed


int		inode_pager_pagein_count = 0;
int		inode_pager_pageout_count = 0;
vm_offset_t	inode_pager_input_buffer;

typedef struct data_request {
	queue_chain_t	others;
	memory_object_t	pager;
	port_t		reply_to;
	vm_offset_t	offset;
	vm_size_t	length;
	vm_prot_t	protection_required;
} *data_request_t;
	
queue_head_t	data_request_queue;
zone_t		data_request_zone;
msg_option_t	inode_pager_receive_option = MSG_OPTION_NONE;

kern_return_t	memory_object_data_request(pager, reply_to, offset,
				   length, protection_required)
	memory_object_t	pager;
	port_t		reply_to;
	vm_offset_t	offset;
	vm_size_t	length;
	vm_prot_t	protection_required;
{
	register
	data_request_t	entry =
			 (data_request_t)zalloc(data_request_zone);
		
	if (inode_pager_debug)
		printf("%s: pager=%d, offset=0x%x, length=0x%x\n",
			"memory_object_data_request(inode_pager)",
			pager, offset, length);

	if (length != PAGE_SIZE)
		panic("inode_pagein: bad length");

	/*
	 *	Queue this request until we're not busy.
	 */

	entry->pager = pager;
	entry->reply_to = reply_to;
	entry->offset = offset;
	entry->length = length;
	entry->protection_required = protection_required;
	queue_enter(&data_request_queue, entry,
			data_request_t, others);

	XPR(XPR_INODE_PAGER_DATA, ("memory_object_data_request(inode_pager): pager 0x%x, offset 0x%x",
				entry->pager, entry->offset));

	/*
	 *	Time out when there's nothing else to do.
	 */

	inode_pager_receive_option = RCV_TIMEOUT;

	/*
	 *	XXX May be necessary to check for queue buildup.
	 */

	return(KERN_SUCCESS);
}

void		inode_pager_timeout()
{
	inode_pager_t	is;
	register
	struct inode	*ip;
	register
	data_request_t	entry;
	kern_return_t	result = KERN_SUCCESS;

	/*
	 *	If there are no pending pagein requests
	 */

	if (queue_empty(&data_request_queue)) {
		/*
		 *	Then wait synchronously for the next
		 *	request.
		 */

		inode_pager_receive_option = MSG_OPTION_NONE;
		return;
	}

	/*
	 *	Process exactly one pagein request, but continue
	 *	to accept timeouts on message receive operations.
	 */

	queue_remove_first(&data_request_queue, entry,
			   data_request_t, others);

	XPR(XPR_INODE_PAGER_DATA, ("inode_pager_timeout: pager 0x%x, offset 0x%x",
				entry->pager, entry->offset));

	if (inode_pager_debug)
		printf("inode_pager_timeout: pager=%d, offset=%d\n",
			entry->pager, entry->offset);

	if ((is = inode_pager_lookup(entry->pager)) == INODE_PAGER_NULL) {
		/*
		 *	This object was terminated while this
		 *	request was queued.  Just ignore it.
		 *	[A pagein request may also have been aborted
		 *	while it was queued.  It doesn't hurt to
		 *	provide data anyway, even for a totally unrelated
		 *	object.]
		 */

		result = KERN_NOT_RECEIVER;
		goto cleanup;
	}

	if (is->errors) {
		printf("inode_pager_timeout: dropping request because of");
		printf(" previous paging errors\n");
		result = memory_object_data_error(entry->reply_to,
				entry->offset, PAGE_SIZE,
				KERN_FAILURE);

		goto cleanup;
	}
		
	unix_master();
	ip = inode_pager_iget(is);
	ilock(ip);

	switch(inode_read(ip, inode_pager_input_buffer,
			      (entry->offset << is->paging_shift))) {

		case PAGER_SUCCESS:
			result = memory_object_data_provided(
					entry->reply_to, entry->offset,
					inode_pager_input_buffer, PAGE_SIZE,
					VM_PROT_NONE);
			break;
		case PAGER_ABSENT:
			result = memory_object_data_unavailable(
					entry->reply_to, entry->offset,
					PAGE_SIZE);
			break;
		case PAGER_ERROR:
			result = memory_object_data_error(
					entry->reply_to, entry->offset,
					PAGE_SIZE, KERN_FAILURE);
			break;
		default:
			panic("inode_pagein: bogus return from inode_read");
	}

	iunlock(ip);
	inode_pager_iput(is);

	inode_pager_pagein_count++;

	unix_release();

 cleanup:;

	zfree(data_request_zone, (vm_offset_t) entry);

	if (result != KERN_SUCCESS)
		printf("inode_pager_timeout: bad result (%d)\n", result);
}

/*
 * memory_object_data_initialize: check whether we already have each page, and
 * write it if we do not.  The implementation is far from optimized, and
 * also assumes that the inode_pager is single-threaded.
 */
kern_return_t	memory_object_data_initialize(pager, pager_request, offset, addr, data_cnt)
	memory_object_t	pager;
	port_t		pager_request;
	register
	vm_offset_t	offset;
	register
	pointer_t	addr;
	vm_size_t	data_cnt;
{
	inode_pager_t	is;
	vm_offset_t	amount_sent;

	if (inode_pager_debug)
		printf("%s: pager=%d, offset=0x%x, length=0x%x\n",
			"memory_object_data_initialize(inode_pager)",
			pager, offset, data_cnt);

	unix_master();
	is = inode_pager_lookup(pager);

	for (amount_sent = 0;
	     amount_sent < data_cnt;
	     amount_sent += PAGE_SIZE) {
		if (inode_has_page(pager, (offset + amount_sent) << is->paging_shift))
			memory_object_data_write(pager, pager_request,
				offset, addr, data_cnt);
	}

	unix_release();

	return(KERN_SUCCESS);
}

/*
 * memory_object_data_write: split up the stuff coming in from a memory_object_data_write call
 * into individual pages and pass them off to inode_write.
 */
kern_return_t	memory_object_data_write(pager, pager_request, offset, addr, data_cnt)
	memory_object_t	pager;
	port_t		pager_request;
	register
	vm_offset_t	offset;
	register
	pointer_t	addr;
	vm_size_t	data_cnt;
{
	register
	vm_size_t	amount_sent;
	register struct inode	*ip;
	inode_pager_t	is;

#ifdef	lint
	pager_request++;
#endif	lint

	XPR(XPR_INODE_PAGER_DATA, ("memory_object_data_write(inode_pager): pager 0x%x, offset 0x%x",
				pager, offset));

	if (inode_pager_debug)
		printf("%s: pager=%d, offset=0x%x, length=0x%x\n",
			"pager_data_write(inode_pager)",
			pager, offset, data_cnt);

	if ((data_cnt % PAGE_SIZE) != 0) {
		printf("(inode_pager)memory_object_data_write: not a multiple of a page");
		data_cnt = round_page(data_cnt);
	}

	unix_master();
	is = inode_pager_lookup(pager);
	ip = inode_pager_iget(is);

	for (amount_sent = 0;
	     amount_sent < data_cnt;
	     amount_sent += PAGE_SIZE) {
		vm_size_t	size = PAGE_SIZE;

		ilock(ip);

#if	MACH_NBC
		if (!is->paging) {
			/*
			 *	Ensure that a paging operation doesn't
			 *	accidently extend a "mapped" file.
			 */
			if (offset + size > ip->vm_info->inode_size)
				size = ip->vm_info->inode_size - offset;
		}
#endif	MACH_NBC

		if (inode_write(ip, addr + amount_sent, size,
				 (offset + amount_sent) << is->paging_shift) != PAGER_SUCCESS) {
			printf("inode_pageout: write error, error = %d\n", u.u_error);
			u.u_error = 0;
			is->errors++;
		}

		iunlock(ip);

		inode_pager_pageout_count++;
	}

	inode_pager_iput(is);
	unix_release();

	if (vm_deallocate(inode_pager_self, addr, data_cnt) != KERN_SUCCESS)
		panic("inode_data_write: deallocate failed");

	/*
	 *	After each successful pageout, allow a single
	 *	pending pagein to complete.
	 */

	inode_pager_timeout();

	return(KERN_SUCCESS);
}

kern_return_t	memory_object_copy(old_memory_object, old_memory_control,
					offset, length,
					new_memory_object)
	memory_object_t	old_memory_object;
	memory_object_control_t
			old_memory_control;
	vm_offset_t	offset;
	vm_size_t	length;
	memory_object_t	new_memory_object;
{
#ifdef	lint
	old_memory_object++; old_memory_control++; offset++; length++; new_memory_object++;
#endif	lint
	panic("(inode_pager)memory_object_copy: called");
	return KERN_FAILURE;
}

#else	MACH_XP

pager_return_t inode_pagein(m)
	vm_page_t	m;		/* page to read */
{
	register struct inode	*ip;
	inode_pager_t	is;
	pager_return_t	ret;

	/*
	 *	Get the inode and the offset within it to read from.
	 *	Lock the inode while we play with it.
	 */
	unix_master();

	is = (inode_pager_t) m->object->pager;
	ip = inode_pager_iget(is);
	ilock(ip);

	ret = inode_read(ip, m, m->offset + m->object->paging_offset);

	iunlock(ip);
	inode_pager_iput(is);

	unix_release();
	return(ret);
}

pager_return_t	inode_pageout(m)
	vm_page_t	m;
{
	register struct inode	*ip;
	vm_size_t	size = PAGE_SIZE;
	inode_pager_t	is;
	vm_offset_t	f_offset;
	pager_return_t	ret;

	unix_master();

	is = (inode_pager_t) m->object->pager;
	ip = inode_pager_iget(is);
	f_offset = m->offset + m->object->paging_offset;

	ilock(ip);

	size = PAGE_SIZE;
#if	MACH_NBC
	if (!is->paging) {
		/*
		 *	Be sure that a paging operation doesn't
		 *	accidently extend the size of "mapped" file.
		 *
		 *	However, we do extend the i_size up to the current
		 *	size kept in the vm_info structure.
		 */
		if (f_offset + size > ip->vm_info->inode_size)
			size = ip->vm_info->inode_size - f_offset;
	}
#endif	MACH_NBC
	ret = inode_write(ip, VM_PAGE_TO_PHYS(m), size, f_offset);

	if (ret == PAGER_SUCCESS) {
		m->clean = TRUE;			/* XXX - wrong place */
		pmap_clear_modify(VM_PAGE_TO_PHYS(m));	/* XXX - wrong place */
	}
	else {
		printf("inode_pageout: failed!\n");
	}
	inode_pager_iput(is);
	iunlock(ip);

	unix_release();
	return(ret);
}
#endif	MACH_XP

int		inode_read_aheads = 0;
int		inode_read_individuals = 0;

#if	MACH_XP
pager_return_t	inode_read(ip, buffer, f_offset)
	register struct inode	*ip;
	vm_offset_t		buffer;
	vm_offset_t		f_offset;
#else	MACH_XP
pager_return_t	inode_read(ip, m, f_offset)
	register struct inode	*ip;
	vm_page_t	m;
	vm_offset_t	f_offset;	/* byte offset within file block */
#endif	MACH_XP
{
	vm_offset_t	p_offset;	/* byte offset within physical page */
	dev_t		dev;
	register struct fs	*fs;
	daddr_t		lbn, bn;
	int		size;
	long		bsize;
	int		csize, on, n, save_error, err;
	u_long		diff;
	struct buf	*bp;

#if	MACH_XP
#define	ZERO_PAGE	bzero((caddr_t) buffer, PAGE_SIZE);
#else	MACH_XP
#define	ZERO_PAGE	vm_page_zero_fill(m);
#endif	MACH_XP

	/*
	 *	Get the inode and the offset within it to read from.
	 */

	p_offset = 0;

	dev = ip->i_dev;
	fs = ip->i_fs;
	bsize = fs->fs_bsize;
	csize = PAGE_SIZE;

	/*
	 * Be sure that data not in the file is zero filled.
	 * The easiest way to do this is to zero the entire
	 * page now.
	 */

	if (ip->i_size < (f_offset + csize)) {
		ZERO_PAGE;
	}

	/*
	 *	Read from the inode until we've filled the page.
	 */
	do {
		/*
		 *	Find block and offset within it for our data.
		 */
		lbn = lblkno(fs, f_offset);	/* logical block number */
		on  = blkoff(fs, f_offset);	/* byte offset within block */

		/*
		 *	Find the size we can read - don't go beyond the
		 *	end of a block.
		 */
		n = MIN((unsigned)(bsize - on), csize);
		diff = ip->i_size - f_offset;
		if (ip->i_size <= f_offset) {
			if (p_offset == 0) {
				/*
				 * entire block beyond end of file -
				 * doesn't exist
				 */
				return(PAGER_ABSENT);
			}
			/*
			 * block partially there - zero the rest of it
			 */
			break;
		}
		if (diff < n)
			n = diff;

		/*
		 *	Read the index to find the disk block to read
		 *	from.  If there is no block, report that we don't
		 *	have this data.
		 *
		 *	!!! Assumes that:
		 *		1) Any offset will be on a fragment boundary
		 *		2) The inode has whole page
		 */
		save_error = u.u_error;
		u.u_error = 0;
		/* changes u.u_error! */
		bn = fsbtodb(fs,
			bmap(ip, lbn, B_READ, (int)(on+n) ));
		err = u.u_error;
		u.u_error = save_error;

		if (err) {
			printf("IO error on pagein: error = %d.\n",err);
			return(PAGER_ERROR);
		}

		if ((long)bn < 0)
			return(PAGER_ABSENT);

		size = blksize(fs, ip, lbn);

#if	NNBC > 0
		bp = ipb_get();		/* grab a buffer */
		grab_memory(bp->b_un.b_addr, m->object, f_offset - on, size);
		bp->b_flags = B_BUSY | B_READ;
		bp->b_bcount = size;
		bp->b_blkno = bn;
		bp->b_dev = dev;
		(*bdevsw[major(dev)].d_strategy)(bp);
		u.u_ru.ru_inblock++;
		biowait(bp);
		/*
		 *	If result is unaligned, then copy to page.
		 *	(The page wouldn't have been mapped, since
		 *	the mapping into the buffer only occurs
		 *	on aligned pages).
		 */
		if (((on % PAGE_SIZE) != 0) || (p_offset % PAGE_SIZE != 0)) {
			copy_to_phys(bp->b_un.b_addr+on,
				VM_PAGE_TO_PHYS(m) + p_offset,
				n);
		}
		release_memory(bp->b_un.b_addr, m->object, f_offset - on,
					size);
		ipb_put(bp);		/* release the buffer */
#else	NNBC > 0
		/*
		 *	Read the block through the buffer pool,
		 *	then copy it to the physical memory already
		 *	allocated for this page.
		 */

		if ((ip->i_lastr + 1) == lbn) {
			inode_read_aheads++;
			bp = breada(dev, bn, size, rablock, rasize);
		} else {
			inode_read_individuals++;
			bp = bread(dev, bn, size);
		}
		ip->i_lastr = lbn;

		n = MIN(n, size - bp->b_resid);
		if (bp->b_flags & B_ERROR) {
			brelse(bp);
			printf("IO error on pagein (bread)\n");
			return(PAGER_ERROR);
		}
#if	MACH_XP
		bcopy(bp->b_un.b_addr+on, (caddr_t) buffer + p_offset,
			(unsigned int) n);
#else	MACH_XP
		copy_to_phys(bp->b_un.b_addr+on,
				VM_PAGE_TO_PHYS(m) + p_offset,
				n);
#endif	MACH_XP
#endif	NNBC > 0

		/*
		 *	Account for how much we've read this time
		 *	around.
		 */
		csize -= n;
		p_offset += n;
		f_offset += n;

#if	NNBC > 0
#else	NNBC > 0
		if (n + on == bsize || f_offset == ip->i_size)
			bp->b_flags |= B_AGE;
		brelse(bp);
#endif	NNBC > 0

	} while (csize > 0 && n != 0);

	return(PAGER_SUCCESS);
}

pager_return_t	inode_write(ip, addr, csize, f_offset)
	register struct inode	*ip;
	vm_offset_t	addr;
	vm_size_t	csize;
	vm_offset_t	f_offset;	/* byte offset within file block */
{
	vm_offset_t	p_offset;	/* byte offset within physical page */
	dev_t		dev;
	register struct fs	*fs;
	daddr_t		lbn, bn;
	int		size;
	long		bsize;
	int		on, n, save_error, err;
	struct buf	*bp;
#if	MACH_NBC
	extern int	nbc_debug;
#endif	MACH_NBC

	unix_master();

	p_offset = 0;

	dev = ip->i_dev;
	fs = ip->i_fs;
	bsize = fs->fs_bsize;

#if	MACH_NBC
	if ((nbc_debug & 0x8) && ip->vm_info->object) {
		uprintf("inode_write: ip 0x%x, f_offset = %d, size = %d.\n",
				ip, f_offset, csize);
	}
#endif	MACH_NBC

	do {
		lbn = lblkno(fs, f_offset);	/* logical block number */
		on  = blkoff(fs, f_offset);	/* byte offset within block */

		n   = MIN((unsigned)(bsize - on), csize);

		save_error = u.u_error;
		u.u_error = 0;
		/* changes u.u_error! */

		/*
		 *	The B_XXX argument to the bmap() call is used
		 *	by the NBC system to direct inode flushing.
		 */

		bn = fsbtodb(fs, bmap(ip, lbn, B_WRITE | B_XXX, (int)(on+n) ));
		err = u.u_error;
		u.u_error = save_error;

		if (err || (long) bn < 0) {
			printf("IO error on pageout: error = %d.\n",err);
			unix_release();
			return(PAGER_ERROR);
		}

		if (f_offset + n > ip->i_size) {
			ip->i_size = f_offset + n;
#if	MACH_NBC
			if (nbc_debug & 0x8) {
				uprintf("inode extended to %d bytes\n",
					ip->i_size);
			}
#endif	MACH_NBC
		}

		size = blksize(fs, ip, lbn);

		if (n == bsize)
			bp = getblk(dev, bn, size);
		else
			bp = bread(dev, bn, size);

		n = MIN(n, size - bp->b_resid);
		if (bp->b_flags & B_ERROR) {
			brelse(bp);
			printf("IO error on pageout (bread)\n");
			unix_release();
			return(PAGER_ERROR);
		}
#if	MACH_XP
		bcopy((caddr_t) (addr + p_offset), bp->b_un.b_addr+on, 
			(unsigned int) n);
#else	MACH_XP
		copy_from_phys(addr + p_offset, bp->b_un.b_addr+on, n);
#endif	MACH_XP

		csize -= n;
		p_offset += n;
		f_offset += n;

		if (n + on == bsize) {
			bp->b_flags |= B_AGE;
			bawrite(bp);
		}
		else
			bdwrite(bp);
		ip->i_flag |= IUPD|ICHG;

	} while (csize != 0 && n != 0);

	unix_release();
	return(PAGER_SUCCESS);
}

/*
 *	inode_has_page:
 *
 *	Parameters:
 *		pager
 *		id		paging object
 *		offset		Offset in paging object to test
 *
 *	Assumptions:
 *		This is only used on shadowing (copy) objects.
 *		If part of the page has been found, we assume that the
 *		entire page is in the inode.
 */
boolean_t inode_has_page(pager, offset)
	memory_object_t	pager;
	vm_offset_t	offset;
{
	register struct inode	*ip;
	vm_offset_t	f_offset;	/* byte offset within file block */
	register struct fs	*fs;
	daddr_t		lbn, bn;
	long		bsize;
	int		csize, on, n, save_error, err;
	u_long		diff;
	inode_pager_t	is;

	/*
	 * For now, we do all inode hacking on the master cpu.
	 */

	unix_master();

#if	MACH_XP
	if ((is = inode_pager_lookup(pager)) == INODE_PAGER_NULL)
		return(FALSE);
#else	MACH_XP
	if ((is = inode_pager_lookup_external(pager)) == INODE_PAGER_NULL)
		panic("inode_has_page: failed lookup");
#endif	MACH_XP

	ip = inode_pager_iget(is);
	f_offset = offset;

	ilock(ip);

	fs = ip->i_fs;
	bsize = fs->fs_bsize;
	csize = PAGE_SIZE;

	/*
	 *	Find block and offset within it for our data.
	 */
	lbn = lblkno(fs, f_offset);	/* logical block number */
	on  = blkoff(fs, f_offset);	/* byte offset within block */

	/*
	 *	Find the size we can read - don't go beyond the
	 *	end of a block.
	 */
	n = MIN((unsigned)(bsize - on), csize);
	diff = ip->i_size - f_offset;
	if (ip->i_size <= f_offset) {
		/*
		 * entire block beyond end of file -
		 * doesn't exist
		 */
		inode_pager_iput(is);
		iunlock(ip);
		unix_release();
		return(FALSE);
	}

	if (diff < n)
		n = diff;

	/*
	 *	Read the index to find the disk block to read
	 *	from.  If there is no block, report that we don't
	 *	have this data.
	 *
	 *	!!! Assumes that:
	 *		1) Any offset will be on a fragment boundary
	 *		2) The inode won't have just part of a page
	 */
	save_error = u.u_error;
	u.u_error = 0;
	/* changes u.u_error! */
	bn = fsbtodb(fs,
		bmap(ip, lbn, B_READ, (int)(on+n) ));
	err = u.u_error;
	u.u_error = save_error;

	if (err) {
		printf("IO error on has_page: error = %d.\n",err);
		inode_pager_iput(is);
		iunlock(ip);
		unix_release();
		return(TRUE);
	}

	if ((long)bn < 0) {
		inode_pager_iput(is);
		iunlock(ip);
		unix_release();
		return(FALSE);	/* page not in inode */
	}

	/*
	 *	We know that we have at least part of the page.
	 *	Assume it is all there.
	 */

	inode_pager_iput(is);
	iunlock(ip);
	unix_release();
	return(TRUE);
}


/*
 *	Swapping preferences table, paired with the mount table.
 */

struct {
	boolean_t	swap_preferred;
	boolean_t	swap_never;
} iswap_table[NMOUNT];

/*
 *	Routine:	inode_swap_preference
 *	Function:
 *		Allow user to express a preference over filesystems
 *		used for paging.
 *	Arguments:
 *		The device in question is specified by (block device)
 *		major/minor number.
 *	In/out conditions:
 *		The specified filesystem must already be mounted.
 */
kern_return_t inode_swap_preference(device, prefer, never)
	dev_t		device;
	boolean_t	prefer;
	boolean_t	never;
{
	register int		i;

	if (!suser())
		return(KERN_NO_ACCESS);

	if (device == NODEV)
		return(KERN_INVALID_ARGUMENT);

	unix_master();

	for (i = 0; i < NMOUNT; i++)
		if (mount[i].m_dev == device)
			if (mount[i].m_bufp != NULL) {
				iswap_table[i].swap_preferred = prefer;
				iswap_table[i].swap_never = never;
			} else {
				unix_release();
				return(KERN_INVALID_ARGUMENT);
			}
	unix_release();
	return(KERN_SUCCESS);
}			
	
/*
 *	Routine:	iswap_allocate
 *	Function:
 *		Allocate an inode for paging out a kernel-created
 *		memory object.
 *
 *	Implementation:
 *		Looks through the mounted filesystems for the
 *		one with the most free space.  First, only "preferred"
 *		filesystems are considered, then those that are
 *		not prohibited.
 *
 *		In order to use the inode's disk block map to
 *		determine whether a page has ever been written,
 *		filesystems for which the block size is a
 *		perfect divisor of the page size are preferred.
 *
 *	In/out conditions:
 *		The inode is returned locked.
 */
struct inode	*iswap_allocate(bsize)
	vm_size_t	bsize;
{
	struct inode	*ip;
	int		pass;
	int 		mostf;
	int 		mostidx, midx;
	struct mount	*mp;
	struct fs	*fs;
	static
	char		*warnings[] = {
				(char *) 0,	/* Nothing */
				(char *) 0,	/* None preferred */
				"swapping to filesystem with smaller blocksize",
				"swapping to filesystem with larger blocksize",
				"SWAPPING BEYOND FREE RESERVE (REBOOT RECOMMENDED)",
				"UNABLE TO FIND SWAP SPACE (REBOOT RECOMMENDED)"
			};


	/*
	 *	Look thru all mounted file systems for the one with
	 *	the most free space that meets the right criteria.
	 *	During each pass, the criteria are relaxed somewhat.
	 */

	unix_master();

	mostidx = -1;
	mostf   = 0;

	for (pass = 1; (pass <= 5) && (mostidx == -1); pass++) {
		for (midx = 0; midx < NMOUNT; midx++) {
			int ffrags;
	
			mp = &mount[midx];	
			if (mp->m_bufp == NULL || mp->m_dev == NODEV)
				continue;

			/*
			 *	Never write on read-only filesystems
			 */

			fs = mp->m_bufp->b_un.b_fs;

			if (fs->fs_ronly)
				continue;

			/*
			 *	Abide by advisory information...
			 *	never swap on things so marked.
			 */

			if (iswap_table[midx].swap_never)
				continue;
	
			/*
			 *	Take preferences pretty seriously --
			 *	ignore other heuristics.
			 */

			if (!iswap_table[midx].swap_preferred) {
				if (pass == 1)
					continue;

				/* 
				 *	We prefer to page only to filesystems
				 *	where the block size exactly matches
				 *	the page size.
				 *
				 *	Filesystems that have smaller block
				 *	sizes are better -- they just do more
				 *	I/O's per page.
				 *
				 *	If all else fails, we can use a larger
				 *	block size at the expense of disk space.
				 */

				if ((pass <= 2) && (fs->fs_bsize < bsize))
					continue;

				if ((pass <= 3) && (fs->fs_bsize > bsize))
					continue;
			}
#if	!MACH_XP
			if (fs->fs_bsize > bsize) continue;
#endif	!MACH_XP

			/*
			 *	See how much free space is available...
			 *	allow for the filesystem's free reserve,
			 *	unless we have no other options.
			 */

			ffrags = freespace(fs, (pass <= 4) ? fs->fs_minfree : 0);
			if (ffrags > mostf) {
				mostf = ffrags;
				mostidx = midx;
			}
		}
	}

	ip = (struct inode *) 0;

	if (mostidx == -1)
		goto done;

	if (warnings[pass-2] != (char *) 0) {
		printf("inode_pager (iswap_allocate): %s\n", warnings[pass-2]);
		/* Only issue a given warning once */
		warnings[pass-2] = (char *) 0;
	}

	/**/ {
	struct inode root_dummy;

	/*
	 *	Avoid actually looking up an inode in the
	 *	mounted filesystem, because that requires
	 *	taking an inode lock on a directory.
	 *
	 *	The only fields that get used in the "parent inode"
	 *	are the filesystem and device fields.  [This is
	 *	of course taken from knowledge of the implementation.]
	 *
	 *	The only hazard here is that the filesystem can
	 *	be unmounted while we're attempting to allocate
	 *	the inode.  This could be corrected by synchronizing
	 *	unmount operations with the inode_pager.
	 */

	root_dummy.i_fs = mount[mostidx].m_bufp->b_un.b_fs;
	root_dummy.i_dev = mount[mostidx].m_dev;
	ip = ialloc(&root_dummy, (ino_t)0, 0);
	/**/ }

	if (ip == (struct inode *) 0)
		goto done;

	ip->i_flag |= IACC|IUPD|ICHG;
	ip->i_nlink = 0;	/* fsck will remove */
	ip->i_uid = 0;		/* which user? */
	ip->i_gid = 0;
	ip->i_mode = IFREG;

 done: ;
	unix_release();
	return(ip);
}

#if	MACH_XP
port_t		inode_pager_default = PORT_NULL;

/*
 *	Routine:	memory_object_create
 *	Purpose:
 *		Handle requests for memory objects from the
 *		kernel.
 *	Notes:
 *		Because we only give out the default memory
 *		manager port to the kernel, we don't have to
 *		be so paranoid about the contents.
 */
kern_return_t	memory_object_create(old_pager, new_pager, new_size, new_pager_request, new_pager_name, new_page_size)
	port_t		old_pager;
	port_rcv_t	new_pager;
	vm_size_t	new_size;
	port_t		new_pager_request;
	port_t		new_pager_name;
	vm_size_t	new_page_size;
{
	inode_pager_t	is;
	struct inode	*ip;

#ifdef	lint
	/* XXX Eventually, use new_size to optimize for small objects */
	new_size++;
#endif	lint

	if (inode_pager_debug)
		printf("%s: new_pager=%d, new_request=%d, new_name=%d\n",
			"memory_object_data_create(inode_pager)",
			new_pager, new_pager_request, new_pager_name);

	if (old_pager != inode_pager_default) {
		printf("memory_object_create(inode_pager): non-kernel caller!\n");

		/*
		 * XXX Should throw away spurious port rights -- 
		 * use port_status to avoid giving away important ports
		 */
		return(KERN_FAILURE);
	}

	if (new_page_size != PAGE_SIZE) {
		printf("memory_object_create(inode_pager): wrong page size\n");
		return(KERN_INVALID_ARGUMENT);
	}

	if ((ip = iswap_allocate(PAGE_SIZE)) == (struct inode *) 0) {
		printf("memory_object_create(inode_pager): unable to find an inode");
		printf(" [REBOOT SUGGESTED]\n");
		return(KERN_RESOURCE_SHORTAGE);
	}

	if ((is = inode_pager_create(ip, TRUE, TRUE)) == INODE_PAGER_NULL) {
		printf("memory_object_create(inode_pager): unable to allocate");
		printf(" inode_pager structure [REBOOT SUGGESTED]\n");
		return(KERN_RESOURCE_SHORTAGE);
	}

	is->client_count++;

	/*
	 *	Determine the global name, and wedge it
	 *	into the inode structure
	 */

	(void) object_copyin(inode_pager_task, new_pager,
			     MSG_TYPE_PORT, FALSE,
			     (kern_obj_t *) &is->pager_global);
	ip->vm_info->pager = is->pager_global;

	/*
	 *	Set up associations between these ports
	 *	and this inode_pager structure
	 */

	is->pager = new_pager;
	is->pager_request = new_pager_request;
	is->pager_request_global = PORT_NULL;
	is->pager_name = new_pager_name;
	inode_port_hash_insert(new_pager, is);
	iput(ip);

	if (port_set_add(inode_pager_self, inode_pager_enabled_set, new_pager)
	    != KERN_SUCCESS)
		panic("memory_object_create: couldn't enable");

	return(KERN_SUCCESS);
}

memory_object_copy_strategy_t inode_copy_strategy = MEMORY_OBJECT_COPY_DELAY;

kern_return_t	memory_object_init(pager, pager_request, pager_name, pager_page_size)
	port_t		pager;
	port_t		pager_request;
	port_t		pager_name;
	vm_size_t	pager_page_size;
{
	inode_pager_t	is;
	struct inode	*ip;

	if (inode_pager_debug)
		printf("%s: pager=%d, request=%d, name=%d\n",
			"memory_object_data_init(inode_pager)",
			pager, pager_request, pager_name);

	if (pager_page_size != PAGE_SIZE) {
		printf("memory_object_init: wrong page size");
		return(KERN_FAILURE);
	}

	if ((is = inode_pager_lookup(pager)) == INODE_PAGER_NULL) {
		printf("memory_object_init: bogus pager");
		return(KERN_FAILURE);
	}

	/*
	 *	We have to accept the possibility that another
	 *	memory_object_init call has already been performed
	 *	(either because the object got remapped before the
	 *	termination completed, or because of a malicious
	 *	client).
	 *
	 *	If we're faced with multiple clients, we only save
	 *	the state for the latest one.  [This means that
	 *	attempts to uncache the object may fail.]
	 *	XXX We could try to handle this by only allowing
	 *	the latest kernel to cache the data, or no kernel
	 *	to cache it should there be more than one.
	 */

	if (is->client_count++ > 0)
		printf("memory_object_init(inode_pager): multiple clients!\n");

	is->pager_request = pager_request;
	is->pager_name = pager_name;

	if (is->pager_request_global != PORT_NULL)
		port_release(is->pager_request_global);

	(void) object_copyin(inode_pager_task, pager_request,
			     MSG_TYPE_PORT, FALSE,
			     (kern_obj_t *) &is->pager_request_global);

	ip = inode_pager_iget(is);
	ip->vm_info->pager_request = is->pager_request_global;
	inode_pager_iput(is);

	(void) memory_object_set_attributes(pager_request, TRUE,
					is->cached, inode_copy_strategy);

	return(KERN_SUCCESS);
}

kern_return_t	memory_object_terminate(pager, pager_request, pager_name)
	port_t		pager;
	port_all_t	pager_request;
	port_all_t	pager_name;
{
	inode_pager_t	is = inode_pager_lookup(pager);
	struct inode	*ip;

	/*
	 *	Throw away the port rights, regardless whether this
	 *	request made any sense at all.  In order for the
	 *	message to be accepted, they must have been port_all_t's.
	 *	Therefore, they can't be any of the ports we already owned.
	 */

	if (pager_name != PORT_NULL)
		port_deallocate(inode_pager_self, pager_name);
	if (pager_request != PORT_NULL)
		port_deallocate(inode_pager_self, pager_request);

	/*
	 *	Only clean up if the object has not grown other
	 *	references while we've been running.
	 *
	 *	We move to the master processor to prevent other references
	 *	from appearing (and so that the dead reference count is
	 *	safe).
	 */

	if (is == INODE_PAGER_NULL)
		return(KERN_SUCCESS);

	is->client_count--;

	unix_master();

	if (!inode_pager_no_more_senders(is->pager_global, is->dead_references)) {
		unix_release();
		return(KERN_SUCCESS);
	}

	if (is->client_count != 0)
		printf("(inode_pager)memory_object_terminate: client count\n");
	if (is->use_count != 0)
		printf("(inode_pager)memory_object_terminate: use count");

	/*
	 *	Release the inode reference.
	 */

	ip = inode_pager_iget(is);
	ip->i_flag &= ~ITEXT;
	ip->vm_info->pager = MEMORY_OBJECT_NULL;
	irele(ip);

	/*
	 *	Release the port references held in the inode_pager
	 *	data structure.
	 */

	if (is->pager_global != PORT_NULL) {
		port_release(is->pager_global);
		is->pager_global = PORT_NULL;
	}
	if (is->pager_request_global != PORT_NULL) {
		port_release(is->pager_request_global);
		is->pager_request_global = PORT_NULL;
	}

	/*
	 *	Remove the memory object port association, and then
	 *	the destroy the port itself.
	 */

	inode_port_hash_delete(is->pager);
	port_deallocate(inode_pager_self, is->pager);

	/*
	 *	Remove the structure from the LRU queue.
	 */

	simple_lock(&istruct_lock);
	if (is->queued) {
		queue_remove(&istruct_queue, is, inode_pager_t, lru_links);
		istruct_qcount--;
	}
	simple_unlock(&istruct_lock);

	/*
	 *	Free the structure
	 *
	 *	If there are dying references, they'll need to
	 *	access the inode_pager structure (to decrement
	 *	that field), so we leave it to the last reference.
	 */

	if (is->dead_references == 0)
		zfree(istruct_zone, (vm_offset_t) is);
	 else
	 	is->free_me = TRUE;

	unix_release();
	return(KERN_SUCCESS);
}

void		inode_pager_terminate(pager)
	port_t		pager;
{
	printf("inode_pager_terminate: unexpected, port=0x%x\n", pager);
}

kern_return_t	memory_object_lock_completed(memory_object, pager_request_port,
					offset,length)
	memory_object_t	memory_object;
	port_t		pager_request_port;
	vm_offset_t	offset;
	vm_size_t	length;
{
#ifdef	lint
	memory_object++; pager_request_port++; offset++; length++;
#endif	lint

	printf("memory_object_lock_completed(inode_pager): called\n");
	return(KERN_FAILURE);
}

/*
 *	Include the server loop
 */

#define	SERVER_LOOP		inode_pager_server_loop
#define	SERVER_NAME		"inode_pager"
#define	TERMINATE_FUNCTION	inode_pager_terminate
#define	SERVER_DISPATCH(in,out)	\
		(inode_pager_server(in, out) || \
		 inode_pager_default_server(in, out))
#define	RECEIVE_OPTION		inode_pager_receive_option
#define	TIMEOUT_FUNCTION	inode_pager_timeout()
#define	LOCAL_PORT		inode_pager_enabled_set

#include <kern/server_loop.c>

#define	memory_object_server		inode_pager_server
#include <mach/memory_object_server.c>
#define	memory_object_default_server	inode_pager_default_server
#include <mach/memory_object_default_server.c>

void		inode_pager()
{
	extern void task_name();

	task_name("inode_pager");

	/*
	 *	Initialize the name port hashing stuff.
	 */

	inode_port_hash_init();

	inode_pager_task = current_task();

	inode_pager_task->reply_port = PORT_NULL;
	inode_pager_self = task_self();

	/*
	 *	We are the default pager.
	 *	Initialize the "default pager" port.
	 */

	simple_lock(&inode_pager_init_lock);
	if (port_allocate(inode_pager_self, &inode_pager_default) != KERN_SUCCESS)
		panic("inode_pager: can't allocate default port");
	if (port_set_backlog(inode_pager_self, inode_pager_default, 2) != KERN_SUCCESS)
		panic("inode_pager: can't set backlog on default pager");

	if (port_set_allocate(inode_pager_self, &inode_pager_enabled_set)
	    != KERN_SUCCESS)
		panic("inode_pager: cannot create enabled port set");

	if (port_set_add(inode_pager_self,
			inode_pager_enabled_set,
			inode_pager_default)
	    != KERN_SUCCESS)
		panic("inode_pager: cannot enable default port");

	(void) object_copyin(inode_pager_task, inode_pager_default,
			     MSG_TYPE_PORT, FALSE,
			     (kern_obj_t *) &memory_manager_default);
	thread_wakeup((int) &memory_manager_default);
	simple_unlock(&inode_pager_init_lock);

	/*
	 *	Allocate the buffer for pagein.
	 *	[We wire down the buffer for now.]
	 */

	if (vm_allocate(inode_pager_self, &inode_pager_input_buffer,
			PAGE_SIZE, TRUE) != KERN_SUCCESS)
		panic("inode_pagein: cannot allocate a buffer!");
	vm_map_pageable(inode_pager_task->map, inode_pager_input_buffer, 
			inode_pager_input_buffer + PAGE_SIZE, FALSE);

	/*
	 *	Initialize the pending pagein requests data structures.
	 */

	data_request_zone = zinit((vm_size_t) sizeof(struct data_request),
				(vm_size_t) 4096 *
					(vm_size_t) sizeof(struct data_request),
				(vm_size_t) sizeof(struct data_request),
				FALSE, "inode pager pending data requests");
	queue_init(&data_request_queue);

	SERVER_LOOP();
}

#else	MACH_XP

memory_object_t inode_alloc(size)
	vm_size_t	size;
{
	struct inode	*ip;
	inode_pager_t 	is;

#ifdef	lint
	size++;
#endif	lint

	unix_master();

	/*
	 *	Get a new inode, then turn it into a paging
	 *	space.
	 */
	if ((ip = iswap_allocate(PAGE_SIZE)) == (struct inode *) 0) {
		unix_release();
		return(MEMORY_OBJECT_NULL);
	}
	if ((is = inode_pager_create(ip, FALSE, TRUE)) == INODE_PAGER_NULL) {
		iput(ip);	/* will free inode */
		unix_release();
		return(MEMORY_OBJECT_NULL);
	}
	iput(ip);
	unix_release();
	return((memory_object_t) is);
}

boolean_t inode_dealloc(pager)
	memory_object_t	pager;
{
	register struct inode	*ip;
	inode_pager_t	is = (inode_pager_t) pager;

	unix_master();

	XPR(XPR_INODE_PAGER, ("inode_dealloc: pager %x", pager));

	ip = inode_pager_iget(is);

	if (is->use_count > 1) {
		printf("ip = 0x%x, is = 0x%x, use_count = %d\n", ip,
					is, is->use_count);
	}
	ip->i_flag &= ~ITEXT;
	ip->vm_info->pager = MEMORY_OBJECT_NULL; /* so irele will free */
	irele(ip);

	simple_lock(&istruct_lock);
	if (is->queued) {
		queue_remove(&istruct_queue, is, inode_pager_t, lru_links);
		istruct_qcount--;
	}
	simple_unlock(&istruct_lock);
	zfree(istruct_zone, (vm_offset_t) is);
	unix_release();
}

/*
 *	Remove an inode from the object cache.
 */
void inode_uncache(ip)
	register struct inode	*ip;
{
	register boolean_t	was_locked;

	unix_master();

	/*
	 * The act of uncaching may cause an object to be deallocated
	 * which may need to wait for the pageout daemon which in turn
	 * may be waiting for this inode's lock, so be sure to unlock
	 * and relock later if necessary.  (This of course means that
	 * code calling this routine must be able to handle the fact
	 * that the inode has been unlocked temporarily).  This code, of
	 * course depends on the Unix master restriction for proper
	 * synchronization.
	 */
	if (was_locked = (ip->i_flag & ILOCKED))
		IUNLOCK(ip);

	/*
	 *	Given all the different and wierd ways various
	 *	forms of exec mis-use this pager, we don't really
	 *	know whether inodes are "text" or "inode," so just
	 *	try flushing both.
	 */

#if	MACH_NBC
	mfs_uncache(ip);
#endif	MACH_NBC
	pager_cache(vm_object_lookup(ip->vm_info->pager), FALSE);

	if (was_locked)
		ILOCK(ip);

	unix_release();
}
boolean_t	inode_uncache_try(ip)
	register struct inode	*ip;
{
	vm_object_t	o;
	boolean_t	result;

	unix_master();

	if ((o = vm_object_lookup(ip->vm_info->pager)) != VM_OBJECT_NULL)
		inode_uncache(ip);

	result = ((o == VM_OBJECT_NULL) || (o->ref_count == 1));
	vm_object_deallocate(o);

	unix_release();
	return(result);
}
#endif	MACH_XP

#if	NNBC > 0
/*
 *	Map the inode's pages into memory specified by the buffer.
 */
grab_memory(virt, object, offset, size)
	vm_offset_t		virt;
	register vm_object_t	object;
	vm_offset_t		offset;
	vm_size_t		size;
{
	register vm_page_t	m;
	register vm_offset_t	v, cur, end;

	v = virt;
	cur = offset;
	end = offset + size;
	vm_object_lock(object);
	/*	spin until there are enough free pages */
	vm_page_lock_queues();
	while (vm_page_free_count < atop(size)) {
		vm_page_unlock_queues();
		vm_object_unlock(object);
		/* NOTE: inode is still locked so if pageout tries paging
		 * out to inode we are reading from we deadlock. XXX */
		VM_WAIT;
		vm_object_lock(object);
		vm_page_lock_queues();
	}
	while (cur < end) {
		m = vm_page_lookup(object, cur);
		if (m == VM_PAGE_NULL) {
			m = vm_page_alloc(object, cur);
			if (m == VM_PAGE_NULL) {
				panic("grab memory: no reserved page");
			}
			m->inactive = TRUE;	/* so will queue in release */
			vm_page_inactive_count++;
		}
		else {
			if (m->inactive) {
				queue_remove(&vm_page_queue_inactive, m,
						vm_page_t, pageq);
			} 
			if (m->active) {
				queue_remove(&vm_page_queue_active, m,
						vm_page_t, pageq);
			}
		}
		pmap_enter(kernel_pmap, v, VM_PAGE_TO_PHYS(m),
			VM_PROT_READ|VM_PROT_WRITE, TRUE);
		cur += PAGE_SIZE;
		v += PAGE_SIZE;
	}
	vm_page_unlock_queues();
	vm_object_unlock(object);
}

release_memory(virt, object, offset, size)
	vm_offset_t		virt;
	register vm_object_t	object;
	vm_offset_t		offset;
	vm_size_t		size;
{
	register vm_page_t	m;
	register vm_offset_t	cur, end;

	pmap_remove(kernel_pmap, virt, virt + size);

	cur = offset;
	end = offset + size;
	vm_object_lock(object);
	vm_page_lock_queues();
	while (cur < end) {
		m = vm_page_lookup(object, cur);
		pmap_clear_modify(VM_PAGE_TO_PHYS(m));
		if (m->inactive) {
			queue_enter(&vm_page_queue_inactive, m,
					vm_page_t, pageq);
		} 

		if (m->active) {
			queue_enter(&vm_page_queue_active, m,
					vm_page_t, pageq);
		}
		PAGE_WAKEUP(m);
		cur += PAGE_SIZE;
	}
	vm_page_unlock_queues();
	vm_object_unlock(object);
}
/*
 *	Mapped I/O data structures.
 */

#define	IPB_MAXBUFS	20		/* number of buffers to use */

vm_offset_t	ipb_va;			/* beginning reserved virtual addr */
struct buf	ipb_buffers[IPB_MAXBUFS]; /* the buffers (static!) */
struct buf	*ipb_list;		/* list of free buffers */
boolean_t	ipb_waiting;		/* anyone waiting for a buffer? */
decl_simple_lock_data(,ipb_lock)

struct buf *ipb_get()
{
	register struct buf	*bp;

	simple_lock(&ipb_lock);
	bp = ipb_list;
	while (bp == 0) {
		ipb_waiting = TRUE;
		thread_sleep((int) &ipb_list, &ipb_lock, FALSE);
		simple_lock(&ipb_lock);
		bp = ipb_list;
	}
	ipb_list = bp->b_forw;
	simple_unlock(&ipb_lock);
	return(bp);
}

ipb_put(bp)
	register struct buf	*bp;
{
	simple_lock(&ipb_lock);
	bp->b_forw = ipb_list;
	ipb_list = bp;
	if (ipb_waiting)
		thread_wakeup((int) &ipb_list, &ipb_lock, FALSE);
	simple_unlock(&ipb_lock);
}
#endif	NNBC > 0

void 	inode_pager_bootstrap()
{
	register vm_size_t	size;
	register int		i;
#if	NNBC > 0
	register struct buf	*bp;
#endif	NNBC > 0

	/*
	 *	Initialize zone of paging structures.
	 */

	size = (vm_size_t) sizeof(struct istruct);
	istruct_zone = zinit(size,
			(vm_size_t) size*ninode*10,
			(vm_size_t) 10*1024,
			FALSE, "inode pager structures");
	queue_init(&istruct_queue);
	simple_lock_init(&istruct_lock);
	istruct_qcount = 0;
	istruct_qmax = ninode/4;
	istruct_released = 0;

	for (i = 0; i < NMOUNT; i++) {
		iswap_table[i].swap_preferred = FALSE;
		iswap_table[i].swap_never = FALSE;
	}
	iswap_table[0].swap_never = TRUE;

#if	NNBC > 0
	ipb_va = kmem_alloc_pageable(kernel_map, MAXBSIZE * IPB_MAXBUFS);
	ipb_waiting = FALSE;
	simple_lock_init(&ipb_lock);
	ipb_list = (struct buf *) 0;
	bp = ipb_buffers;
	for (i = 0; i < IPB_MAXBUFS; i++) {
		bp->b_forw = ipb_list;	/* insert into list */
		ipb_list = bp;
		bp->b_un.b_addr = (caddr_t) ipb_va + MAXBSIZE*i; /* note VA */
		bp++;
	}
#endif	NNBC > 0
}
