/* 
 * Mach Operating System
 * Copyright (c) 1987 Carnegie-Mellon University
 * All rights reserved.  The CMU software License Agreement specifies
 * the terms and conditions for use and redistribution.
 */
/*
 *	File:	mfs_prim.h
 *	Author:	Avadis Tevanian, Jr.
 *
 *	Copyright (C) 1987, Avadis Tevanian, Jr.
 *
 *	Support for mapped file system implementation.
 *
 * HISTORY
 * $Log:	mfs_prim.c,v $
 * Revision 2.3  89/01/15  21:24:23  rpd
 * 	Updated includes to the new style.
 * 	Use decl_simple_lock_data.
 * 
 * 29-Jan-88  David Golub (dbg) at Carnegie-Mellon University
 *	Corrected calls to inode_pager_setup and kmem_alloc.
 *
 * 15-Sep-87  Michael Young (mwyoung) at Carnegie-Mellon University
 *	De-linted.
 *
 * 18-Jun-87  Michael Young (mwyoung) at Carnegie-Mellon University
 *	Make most of this file dependent on MACH_NBC.
 *
 * 30-Apr-87  Avadis Tevanian (avie) at Carnegie-Mellon University
 *	Created.
 */

#include <mach_nbc.h>

#include <sys/lock.h>
#include <sys/mfs.h>

#include <sys/param.h>		/* all */
#include <sys/systm.h>		/* for */
#include <sys/mount.h>		/* inode.h */
#include <sys/dir.h>		/* Sure */
#include <sys/user.h>		/* is */
#include <sys/inode.h>		/* ugly */

#include <vm/vm_kern.h>
#include <vm/memory_object.h>
#include <vm/vm_param.h>

/*
 *	Private variables and macros.
 */

queue_head_t		vm_info_queue;		/* lru list of structures */
decl_simple_lock_data(,	vm_info_lock_data)	/* lock for lru list */

#define	vm_info_lock()		simple_lock(&vm_info_lock_data)
#define	vm_info_unlock()	simple_unlock(&vm_info_lock_data)

#if	MACH_NBC
lock_data_t		mfs_alloc_lock_data;
boolean_t		mfs_alloc_wanted;
long			mfs_alloc_blocks = 0;

#define mfs_alloc_lock()	lock_write(&mfs_alloc_lock_data)
#define mfs_alloc_unlock()	lock_write_done(&mfs_alloc_lock_data)

vm_map_t	mfs_map;

/*
 *	mfs_map_size is the number of bytes of VM to use for file mapping.
 *	It should be set by machine dependent code (before the call to
 *	mfs_init) if the default is inappropriate.
 *
 *	mfs_max_window is the largest window size that will be given to
 *	a file mapping.  A default value is computed in mfs_init based on
 *	mfs_map_size.  This too may be set by machine dependent code
 *	if the default is not appropriate.
 */

vm_size_t	mfs_map_size = 8*1024*1024;	/* size in bytes */
vm_size_t	mfs_max_window = 0;		/* largest window to use */

#define CHUNK_SIZE	(64*1024)	/* XXX */
#endif	MACH_NBC

/*
 *	mfs_init:
 *
 *	Initialize the mfs module.
 */

mfs_init()
{
	register struct vm_info	*vp;
	register struct inode	*ip;
	int			i;
#if	MACH_NBC
	int			min, max;
#endif	MACH_NBC

	queue_init(&vm_info_queue);
	simple_lock_init(&vm_info_lock_data);
#if	MACH_NBC
	lock_init(&mfs_alloc_lock_data, TRUE);
	mfs_alloc_wanted = FALSE;
	mfs_map = kmem_suballoc(kernel_map, &min, &max, mfs_map_size, TRUE);
	if (mfs_max_window == 0)
		mfs_max_window = mfs_map_size / 20;
	if (mfs_max_window < CHUNK_SIZE)
		mfs_max_window = CHUNK_SIZE;
#endif	MACH_NBC

	vp = (struct vm_info *) kmem_alloc(kernel_map,
				(vm_size_t)ninode*sizeof(struct vm_info));
	ip = inode;
	for (i = 0; i < ninode; i++)
		vm_info_init(ip++, vp++);
}

/*
 *	vm_info_init:
 *
 *	Initialize a vm_info structure for an inode.
 */
vm_info_init(ip, vp)
	struct inode *ip;
	register struct vm_info	*vp;
{
	vp->map_count = 0;
	vp->use_count = 0;
	vp->va = 0;
	vp->size = 0;
	vp->offset = 0;
	vp->queued = FALSE;
	vp->mapped = FALSE;
	vp->inode_size = 0;
	lock_init(&vp->lock, TRUE);	/* sleep lock */
	vp->object = VM_OBJECT_NULL;
	ip->vm_info = vp;
}

#if	MACH_NBC
/*
 *	map_inode:
 *
 *	Indicate that the specified inode should be mapped into VM.
 *	A reference count is maintained for each mapped file.
 */
map_inode(ip)
	register struct inode	*ip;
{
	register struct vm_info	*vp;
	memory_object_t	pager;
	decl_simple_lock_data(extern,vm_alloc_lock)

	vp = ip->vm_info;
	if (vp->map_count++ > 0)
		return;		/* file already mapped */

	if (vp->mapped)
		return;		/* file was still cached */

	pager = vp->pager = (memory_object_t) inode_pager_setup(ip, FALSE, TRUE);
				/* not a TEXT file, can cache */
	/*
	 *	Lookup what object is actually holding this file's
	 *	pages so we can flush them when necessary.  This
	 *	would be done differently in an out-of-kernel implementation.
	 *
	 *	Note that the lookup keeps a reference to the object which
	 *	we must release elsewhere.
	 */
	simple_lock(&vm_alloc_lock);
	vp->object = vm_object_lookup(pager);
	if (vp->object == VM_OBJECT_NULL) {
		vp->object = vm_object_allocate(0);
		vm_object_enter(vp->object, pager);
		vm_object_setpager(vp->object, pager, (vm_offset_t) 0, FALSE);
	}
	simple_unlock(&vm_alloc_lock);

	vp->va = 0;
	vp->size = 0;
	vp->offset = 0;
	vp->mapped = TRUE;
	vp->inode_size = ip->i_size;

	/*
	 *	If the file is less that the maximum window size then
	 *	just map the whole file now.
	 */

	if (vp->inode_size > 0 && vp->inode_size < mfs_max_window)
		remap_inode(ip, 0, vp->inode_size);

	vm_info_lock();
	queue_enter(&vm_info_queue, vp, struct vm_info *, lru_links);
	vp->queued = TRUE;
	vm_info_unlock();
}

/*
 *	unmap_inode:
 *
 *	Called when an inode is closed.
 */
unmap_inode(ip)
	register struct inode	*ip;
{
	register struct vm_info	*vp;

	vp = ip->vm_info;
	if (!vp->mapped)
		return;	/* not a mapped file */
	if (--vp->map_count > 0)
		return;

	/*
	 *	If there are no links left to the file then release
	 *	the resources held.  If there are links left, then keep
	 *	the file mapped under the assumption that someone else
	 *	will soon map the same file.
	 */

	if (ip->i_nlink == 0) {
		mfs_memfree(vp);
	}
}

/*
 *	remap_inode:
 *
 *	Remap the specified inode (due to extension of the file perhaps).
 *	Upon return, it should be possible to access data in the file
 *	starting at the "start" address for "size" bytes.
 */
remap_inode(ip, start, size)
	register struct inode	*ip;
	vm_offset_t		start;
	register vm_size_t	size;
{
	register struct vm_info	*vp;
	vm_offset_t		addr, offset;
	kern_return_t		ret;

	vp = ip->vm_info;
	/*
	 *	Remove old mapping (making its space available).
	 */
	if (vp->size > 0)
		mfs_map_remove(vp->va, vp->va + vp->size);

	offset = trunc_page(start);
	size = round_page(start + size) - offset;
	if (size < CHUNK_SIZE)
		size = CHUNK_SIZE;
	do {
		addr = vm_map_min(mfs_map);
		mfs_alloc_lock();
		ret = vm_allocate_with_pager(mfs_map, &addr, size, TRUE, vp->pager, offset);
		/*
		 *	If there was no space, see if we can free up mappings
		 *	on the LRU list.  If not, just wait for someone else
		 *	to free their memory.
		 */
		if (ret == KERN_NO_SPACE) {
			register struct vm_info	*vp1;

			vm_info_lock();
			vp1 = VM_INFO_NULL;
			if (!queue_empty(&vm_info_queue)) {
				vp1 = (struct vm_info *)
						queue_first(&vm_info_queue);
				queue_remove(&vm_info_queue, vp1,
					struct vm_info *, lru_links);
				vp1->queued = FALSE;
			}
			vm_info_unlock();
			/*
			 *	If we found someone, free up its memory.
			 */
			if (vp1 != VM_INFO_NULL) {
				mfs_alloc_unlock();
				mfs_memfree(vp1);
				mfs_alloc_lock();
			}
			else {
				mfs_alloc_wanted = TRUE;
				assert_wait(&mfs_map, FALSE);
				mfs_alloc_blocks++;	/* statistic only */
				mfs_alloc_unlock();
				thread_block();
				mfs_alloc_lock();
			}
		}
		else if (ret != KERN_SUCCESS) {
			printf("Unexpected error on file map, ret = %d.\n",
					ret);
			panic("remap_inode");
		}
		mfs_alloc_unlock();
	} while (ret != KERN_SUCCESS);
	/*
	 *	Fill in variables corresponding to new mapping.
	 */
	vp->va = addr;
	vp->size = size;
	vp->offset = offset;
	return(TRUE);
}

/*
 *	mfs_trunc:
 *
 *	The specified inode is truncated to the specified size.
 */
mfs_trunc(ip, length)
	register struct inode	*ip;
	register int		length;
{
	register struct vm_info	*vp;
	register vm_size_t	size, rsize;

	vp = ip->vm_info;
	if (!vp->mapped)
		return;	/* not a mapped file */
	if (length > ip->i_size) {
		return;
	}

	lock_write(&vp->lock);

	/*
	 *	Unmap everything past the new end page.
	 *	Also flush any pages that may be left in the object using
	 *	ino_flush (is this necessary?).
	 */
	size = round_page(length);
	rsize = size - vp->offset;	/* size relative to mapped offset */
	if (vp->size > 0 && rsize < vp->size) {
		mfs_map_remove(vp->va + rsize, vp->va + vp->size);
		ino_flush(ip, size, vp->size - rsize);
		vp->size = rsize;		/* mapped size */
	}
	/*
	 *	If the new length isn't page aligned, zero the extra
	 *	bytes in the last page.
	 */
	if (length != size) {
		bzero(vp->va + vp->offset + length, (size - length));
	}
	vp->inode_size = length;	/* file size */
	lock_write_done(&vp->lock);
}

/*
 *	mfs_get:
 *
 *	Get locked access to the specified file.  The start and size describe
 *	the address range that will be accessed in the near future and
 *	serves as a hint of where to map the file if it is not already
 *	mapped.  Upon return, it is guaranteed that there is enough VM
 *	available for remapping operations within that range (each window
 *	no larger than the chunk size).
 */
mfs_get(ip, start, size)
	register struct inode	*ip;
	vm_offset_t		start;
	register vm_size_t	size;
{
	register struct vm_info	*vp;

	vp = ip->vm_info;

	/*
	 *	Remove from LRU list (if its there).
	 */
	vm_info_lock();
	if (vp->queued)
		queue_remove(&vm_info_queue, vp, struct vm_info *, lru_links);
	vp->queued = FALSE;
	vp->use_count++;	/* to protect requeueing in mfs_put */
	vm_info_unlock();

	/*
	 *	Lock out others using this file.
	 */
	lock_write(&vp->lock);

	/*
	 *	If the requested size is larger than the size we have
	 *	mapped, be sure we can get enough VM now.  This size
	 *	is bounded by the maximum window size.
	 */

	if (size > mfs_max_window)
		size = mfs_max_window;

	if (size > vp->size) {
		remap_inode(ip, start, size);
	}

}

/*
 *	mfs_put:
 *
 *	Indicate that locked access is no longer desired of a file.
 */
mfs_put(ip)
	register struct inode	*ip;
{
	register struct vm_info	*vp;

	vp = ip->vm_info;
	/*
	 *	Place back on LRU list if noone else using it.
	 */
	vm_info_lock();
	if (--vp->use_count == 0) {
		queue_enter(&vm_info_queue, vp, struct vm_info *, lru_links);
		vp->queued = TRUE;
	}
	vm_info_unlock();
	/*
	 *	Let others at file.
	 */
	lock_write_done(&vp->lock);
}

/*
 *	mfs_uncache:
 *
 *	Make sure there are no cached mappings for the specified inode.
 */
mfs_uncache(ip)
	register struct inode	*ip;
{
	register struct vm_info	*vp;

	vp = ip->vm_info;
	/*
	 *	If the file is mapped but there is noone actively using
	 *	it then remove its mappings.
	 */
	if (vp->mapped && vp->map_count == 0) {
		mfs_memfree(vp);
	}
}

mfs_memfree(vp)
	register struct vm_info	*vp;
{
	vm_info_lock();
	if (vp->queued) {
		queue_remove(&vm_info_queue, vp, struct vm_info *, lru_links);
		vp->queued = FALSE;
	}
	vm_info_unlock();
	lock_write(&vp->lock);
	mfs_map_remove(vp->va, vp->va + vp->size);
	vp->size = 0;
	vp->va = 0;
	if (vp->map_count == 0) {	/* cached only */
		vp->mapped = FALSE;
		/* lookup (in map_inode) gained a reference, so lose it */
		vm_object_deallocate(vp->object);
		vp->object = VM_OBJECT_NULL;
	}
	lock_write_done(&vp->lock);
}

/*
 *	mfs_cache_clear:
 *
 *	Clear the mapped file cache.  Note that the map_count is implicitly
 *	locked by the Unix file system code that calls this routine.
 */
mfs_cache_clear()
{
	register struct vm_info	*vp;

	vm_info_lock();
	vp = (struct vm_info *) queue_first(&vm_info_queue);
	while (!queue_end(&vm_info_queue, (queue_entry_t) vp)) {
		if (vp->map_count == 0) {
			vm_info_unlock();
			mfs_memfree(vp);
			/*
			 *	Since we unlocked, be sure to restart.
			 */
			vm_info_lock();
			vp = (struct vm_info *) queue_first(&vm_info_queue);
		}
		else {
			vp = (struct vm_info *) queue_next(&vp->lru_links);
		}
	}
	vm_info_unlock();
}

/*
 *	mfs_map_remove:
 *
 *	Remove specified address range from the mfs map and wake up anyone
 *	waiting for map space.
 */

mfs_map_remove(start, end)
	vm_offset_t	start;
	vm_size_t	end;
{
	mfs_alloc_lock();
	vm_map_remove(mfs_map, start, end);
	if (mfs_alloc_wanted) {
		mfs_alloc_wanted = FALSE;
		thread_wakeup(&mfs_map);
	}
	mfs_alloc_unlock();
}
#endif	MACH_NBC
