diff -urN linux-2.4.16-reiserfspatches-immutable/Makefile linux-2.4.16-reiserfspatches-immutable-ctx4/Makefile
--- linux-2.4.16-reiserfspatches-immutable/Makefile	Mon Dec 10 13:12:57 2001
+++ linux-2.4.16-reiserfspatches-immutable-ctx4/Makefile	Mon Dec 10 15:01:37 2001
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 4
 SUBLEVEL = 16
-EXTRAVERSION =
+EXTRAVERSION =ctx-4
 
 KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION)
 
diff -urN linux-2.4.16-reiserfspatches-immutable/arch/i386/kernel/entry.S linux-2.4.16-reiserfspatches-immutable-ctx4/arch/i386/kernel/entry.S
--- linux-2.4.16-reiserfspatches-immutable/arch/i386/kernel/entry.S	Mon Dec 10 13:12:28 2001
+++ linux-2.4.16-reiserfspatches-immutable-ctx4/arch/i386/kernel/entry.S	Mon Dec 10 15:01:37 2001
@@ -622,6 +622,8 @@
 	.long SYMBOL_NAME(sys_ni_syscall)	/* Reserved for Security */
 	.long SYMBOL_NAME(sys_gettid)
 	.long SYMBOL_NAME(sys_readahead)	/* 225 */
+	.long SYMBOL_NAME(sys_new_s_context)
+	.long SYMBOL_NAME(sys_set_ipv4root) 
 
 	.rept NR_syscalls-(.-sys_call_table)/4
 		.long SYMBOL_NAME(sys_ni_syscall)
diff -urN linux-2.4.16-reiserfspatches-immutable/arch/i386/kernel/ptrace.c linux-2.4.16-reiserfspatches-immutable-ctx4/arch/i386/kernel/ptrace.c
--- linux-2.4.16-reiserfspatches-immutable/arch/i386/kernel/ptrace.c	Mon Dec 10 13:12:42 2001
+++ linux-2.4.16-reiserfspatches-immutable-ctx4/arch/i386/kernel/ptrace.c	Mon Dec 10 15:01:37 2001
@@ -170,7 +170,7 @@
 	if (child)
 		get_task_struct(child);
 	read_unlock(&tasklist_lock);
-	if (!child)
+	if (!child || child->s_context != current->s_context)
 		goto out;
 
 	ret = -EPERM;
diff -urN linux-2.4.16-reiserfspatches-immutable/fs/exec.c linux-2.4.16-reiserfspatches-immutable-ctx4/fs/exec.c
--- linux-2.4.16-reiserfspatches-immutable/fs/exec.c	Mon Dec 10 13:12:37 2001
+++ linux-2.4.16-reiserfspatches-immutable-ctx4/fs/exec.c	Mon Dec 10 15:01:37 2001
@@ -685,7 +685,7 @@
 	kernel_cap_t new_permitted, working;
 	int do_unlock = 0;
 
-	new_permitted = cap_intersect(bprm->cap_permitted, cap_bset);
+	new_permitted = cap_intersect(bprm->cap_permitted, current->cap_bset);
 	working = cap_intersect(bprm->cap_inheritable,
 				current->cap_inheritable);
 	new_permitted = cap_combine(new_permitted, working);
diff -urN linux-2.4.16-reiserfspatches-immutable/fs/ext2/ialloc.c~ linux-2.4.16-reiserfspatches-immutable-ctx4/fs/ext2/ialloc.c~
--- linux-2.4.16-reiserfspatches-immutable/fs/ext2/ialloc.c~	Thu Jan  1 01:00:00 1970
+++ linux-2.4.16-reiserfspatches-immutable-ctx4/fs/ext2/ialloc.c~	Mon Dec 10 14:28:03 2001
@@ -0,0 +1,510 @@
+/*
+ *  linux/fs/ext2/ialloc.c
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ *  BSD ufs-inspired inode and directory allocation by 
+ *  Stephen Tweedie (sct@dcs.ed.ac.uk), 1993
+ *  Big-endian to little-endian byte-swapping/bitmaps by
+ *        David S. Miller (davem@caip.rutgers.edu), 1995
+ */
+
+#include <linux/config.h>
+#include <linux/fs.h>
+#include <linux/ext2_fs.h>
+#include <linux/locks.h>
+#include <linux/quotaops.h>
+
+
+/*
+ * ialloc.c contains the inodes allocation and deallocation routines
+ */
+
+/*
+ * The free inodes are managed by bitmaps.  A file system contains several
+ * blocks groups.  Each group contains 1 bitmap block for blocks, 1 bitmap
+ * block for inodes, N blocks for the inode table and data blocks.
+ *
+ * The file system contains group descriptors which are located after the
+ * super block.  Each descriptor contains the number of the bitmap block and
+ * the free blocks count in the block.  The descriptors are loaded in memory
+ * when a file system is mounted (see ext2_read_super).
+ */
+
+
+/*
+ * Read the inode allocation bitmap for a given block_group, reading
+ * into the specified slot in the superblock's bitmap cache.
+ *
+ * Return buffer_head of bitmap on success or NULL.
+ */
+static struct buffer_head *read_inode_bitmap (struct super_block * sb,
+					       unsigned long block_group)
+{
+	struct ext2_group_desc *desc;
+	struct buffer_head *bh = NULL;
+
+	desc = ext2_get_group_desc(sb, block_group, NULL);
+	if (!desc)
+		goto error_out;
+
+	bh = bread(sb->s_dev, le32_to_cpu(desc->bg_inode_bitmap),
+			sb->s_blocksize);
+	if (!bh)
+		ext2_error (sb, "read_inode_bitmap",
+			    "Cannot read inode bitmap - "
+			    "block_group = %lu, inode_bitmap = %lu",
+			    block_group, (unsigned long) desc->bg_inode_bitmap);
+error_out:
+	return bh;
+}
+
+/*
+ * load_inode_bitmap loads the inode bitmap for a blocks group
+ *
+ * It maintains a cache for the last bitmaps loaded.  This cache is managed
+ * with a LRU algorithm.
+ *
+ * Notes:
+ * 1/ There is one cache per mounted file system.
+ * 2/ If the file system contains less than EXT2_MAX_GROUP_LOADED groups,
+ *    this function reads the bitmap without maintaining a LRU cache.
+ * 
+ * Return the buffer_head of the bitmap or the ERR_PTR(error)
+ */
+static struct buffer_head *load_inode_bitmap (struct super_block * sb,
+					      unsigned int block_group)
+{
+	int i, slot = 0;
+	struct ext2_sb_info *sbi = &sb->u.ext2_sb;
+	struct buffer_head *bh = sbi->s_inode_bitmap[0];
+
+	if (block_group >= sbi->s_groups_count)
+		ext2_panic (sb, "load_inode_bitmap",
+			    "block_group >= groups_count - "
+			    "block_group = %d, groups_count = %lu",
+			     block_group, sbi->s_groups_count);
+
+	if (sbi->s_loaded_inode_bitmaps > 0 &&
+	    sbi->s_inode_bitmap_number[0] == block_group && bh)
+		goto found;
+
+	if (sbi->s_groups_count <= EXT2_MAX_GROUP_LOADED) {
+		slot = block_group;
+		bh = sbi->s_inode_bitmap[slot];
+		if (!bh)
+			goto read_it;
+		if (sbi->s_inode_bitmap_number[slot] == slot)
+			goto found;
+		ext2_panic (sb, "load_inode_bitmap",
+			    "block_group != inode_bitmap_number");
+	}
+
+	bh = NULL;
+	for (i = 0; i < sbi->s_loaded_inode_bitmaps &&
+		    sbi->s_inode_bitmap_number[i] != block_group;
+	     i++)
+		;
+	if (i < sbi->s_loaded_inode_bitmaps)
+		bh = sbi->s_inode_bitmap[i];
+	else if (sbi->s_loaded_inode_bitmaps < EXT2_MAX_GROUP_LOADED)
+		sbi->s_loaded_inode_bitmaps++;
+	else
+		brelse (sbi->s_inode_bitmap[--i]);
+
+	while (i--) {
+		sbi->s_inode_bitmap_number[i+1] = sbi->s_inode_bitmap_number[i];
+		sbi->s_inode_bitmap[i+1] = sbi->s_inode_bitmap[i];
+	}
+
+read_it:
+	if (!bh)
+		bh = read_inode_bitmap (sb, block_group);
+	sbi->s_inode_bitmap_number[slot] = block_group;
+	sbi->s_inode_bitmap[slot] = bh;
+	if (!bh)
+		return ERR_PTR(-EIO);
+found:
+	return bh;
+}
+
+/*
+ * NOTE! When we get the inode, we're the only people
+ * that have access to it, and as such there are no
+ * race conditions we have to worry about. The inode
+ * is not on the hash-lists, and it cannot be reached
+ * through the filesystem because the directory entry
+ * has been deleted earlier.
+ *
+ * HOWEVER: we must make sure that we get no aliases,
+ * which means that we have to call "clear_inode()"
+ * _before_ we mark the inode not in use in the inode
+ * bitmaps. Otherwise a newly created file might use
+ * the same inode number (not actually the same pointer
+ * though), and then we'd have two inodes sharing the
+ * same inode number and space on the harddisk.
+ */
+void ext2_free_inode (struct inode * inode)
+{
+	struct super_block * sb = inode->i_sb;
+	int is_directory;
+	unsigned long ino;
+	struct buffer_head * bh;
+	struct buffer_head * bh2;
+	unsigned long block_group;
+	unsigned long bit;
+	struct ext2_group_desc * desc;
+	struct ext2_super_block * es;
+
+	ino = inode->i_ino;
+	ext2_debug ("freeing inode %lu\n", ino);
+
+	/*
+	 * Note: we must free any quota before locking the superblock,
+	 * as writing the quota to disk may need the lock as well.
+	 */
+	if (!is_bad_inode(inode)) {
+		/* Quota is already initialized in iput() */
+	    	DQUOT_FREE_INODE(inode);
+		DQUOT_DROP(inode);
+	}
+
+	lock_super (sb);
+	es = sb->u.ext2_sb.s_es;
+	is_directory = S_ISDIR(inode->i_mode);
+
+	/* Do this BEFORE marking the inode not in use or returning an error */
+	clear_inode (inode);
+
+	if (ino < EXT2_FIRST_INO(sb) ||
+	    ino > le32_to_cpu(es->s_inodes_count)) {
+		ext2_error (sb, "ext2_free_inode",
+			    "reserved or nonexistent inode %lu", ino);
+		goto error_return;
+	}
+	block_group = (ino - 1) / EXT2_INODES_PER_GROUP(sb);
+	bit = (ino - 1) % EXT2_INODES_PER_GROUP(sb);
+	bh = load_inode_bitmap (sb, block_group);
+	if (IS_ERR(bh))
+		goto error_return;
+
+	/* Ok, now we can actually update the inode bitmaps.. */
+	if (!ext2_clear_bit (bit, bh->b_data))
+		ext2_error (sb, "ext2_free_inode",
+			      "bit already cleared for inode %lu", ino);
+	else {
+		desc = ext2_get_group_desc (sb, block_group, &bh2);
+		if (desc) {
+			desc->bg_free_inodes_count =
+				cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) + 1);
+			if (is_directory)
+				desc->bg_used_dirs_count =
+					cpu_to_le16(le16_to_cpu(desc->bg_used_dirs_count) - 1);
+		}
+		mark_buffer_dirty(bh2);
+		es->s_free_inodes_count =
+			cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) + 1);
+		mark_buffer_dirty(sb->u.ext2_sb.s_sbh);
+	}
+	mark_buffer_dirty(bh);
+	if (sb->s_flags & MS_SYNCHRONOUS) {
+		ll_rw_block (WRITE, 1, &bh);
+		wait_on_buffer (bh);
+	}
+	sb->s_dirt = 1;
+error_return:
+	unlock_super (sb);
+}
+
+/*
+ * There are two policies for allocating an inode.  If the new inode is
+ * a directory, then a forward search is made for a block group with both
+ * free space and a low directory-to-inode ratio; if that fails, then of
+ * the groups with above-average free space, that group with the fewest
+ * directories already is chosen.
+ *
+ * For other inodes, search forward from the parent directory\'s block
+ * group to find a free inode.
+ */
+
+static int find_group_dir(struct super_block *sb, int parent_group)
+{
+	struct ext2_super_block * es = sb->u.ext2_sb.s_es;
+	int ngroups = sb->u.ext2_sb.s_groups_count;
+	int avefreei = le32_to_cpu(es->s_free_inodes_count) / ngroups;
+	struct ext2_group_desc *desc, *best_desc = NULL;
+	struct buffer_head *bh, *best_bh = NULL;
+	int group, best_group = -1;
+
+	for (group = 0; group < ngroups; group++) {
+		desc = ext2_get_group_desc (sb, group, &bh);
+		if (!desc || !desc->bg_free_inodes_count)
+			continue;
+		if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
+			continue;
+		if (!best_desc || 
+		    (le16_to_cpu(desc->bg_free_blocks_count) >
+		     le16_to_cpu(best_desc->bg_free_blocks_count))) {
+			best_group = group;
+			best_desc = desc;
+			best_bh = bh;
+		}
+	}
+	if (!best_desc)
+		return -1;
+	best_desc->bg_free_inodes_count =
+		cpu_to_le16(le16_to_cpu(best_desc->bg_free_inodes_count) - 1);
+	best_desc->bg_used_dirs_count =
+		cpu_to_le16(le16_to_cpu(best_desc->bg_used_dirs_count) + 1);
+	mark_buffer_dirty(best_bh);
+	return best_group;
+}
+
+static int find_group_other(struct super_block *sb, int parent_group)
+{
+	int ngroups = sb->u.ext2_sb.s_groups_count;
+	struct ext2_group_desc *desc;
+	struct buffer_head *bh;
+	int group, i;
+
+	/*
+	 * Try to place the inode in its parent directory
+	 */
+	group = parent_group;
+	desc = ext2_get_group_desc (sb, group, &bh);
+	if (desc && le16_to_cpu(desc->bg_free_inodes_count))
+		goto found;
+
+	/*
+	 * Use a quadratic hash to find a group with a
+	 * free inode
+	 */
+	for (i = 1; i < ngroups; i <<= 1) {
+		group += i;
+		if (group >= ngroups)
+			group -= ngroups;
+		desc = ext2_get_group_desc (sb, group, &bh);
+		if (desc && le16_to_cpu(desc->bg_free_inodes_count))
+			goto found;
+	}
+
+	/*
+	 * That failed: try linear search for a free inode
+	 */
+	group = parent_group + 1;
+	for (i = 2; i < ngroups; i++) {
+		if (++group >= ngroups)
+			group = 0;
+		desc = ext2_get_group_desc (sb, group, &bh);
+		if (desc && le16_to_cpu(desc->bg_free_inodes_count))
+			goto found;
+	}
+
+	return -1;
+
+found:
+	desc->bg_free_inodes_count =
+		cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) - 1);
+	mark_buffer_dirty(bh);
+	return group;
+}
+
+struct inode * ext2_new_inode (const struct inode * dir, int mode)
+{
+	struct super_block * sb;
+	struct buffer_head * bh;
+	struct buffer_head * bh2;
+	int group, i;
+	ino_t ino;
+	struct inode * inode;
+	struct ext2_group_desc * desc;
+	struct ext2_super_block * es;
+	int err;
+
+	sb = dir->i_sb;
+	inode = new_inode(sb);
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
+
+	lock_super (sb);
+	es = sb->u.ext2_sb.s_es;
+repeat:
+	if (S_ISDIR(mode))
+		group = find_group_dir(sb, dir->u.ext2_i.i_block_group);
+	else 
+		group = find_group_other(sb, dir->u.ext2_i.i_block_group);
+
+	err = -ENOSPC;
+	if (group == -1)
+		goto fail;
+
+	err = -EIO;
+	bh = load_inode_bitmap (sb, group);
+	if (IS_ERR(bh))
+		goto fail2;
+
+	i = ext2_find_first_zero_bit ((unsigned long *) bh->b_data,
+				      EXT2_INODES_PER_GROUP(sb));
+	if (i >= EXT2_INODES_PER_GROUP(sb))
+		goto bad_count;
+	ext2_set_bit (i, bh->b_data);
+
+	mark_buffer_dirty(bh);
+	if (sb->s_flags & MS_SYNCHRONOUS) {
+		ll_rw_block (WRITE, 1, &bh);
+		wait_on_buffer (bh);
+	}
+
+	ino = group * EXT2_INODES_PER_GROUP(sb) + i + 1;
+	if (ino < EXT2_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
+		ext2_error (sb, "ext2_new_inode",
+			    "reserved inode or inode > inodes count - "
+			    "block_group = %d,inode=%ld", group, ino);
+		err = -EIO;
+		goto fail2;
+	}
+
+	es->s_free_inodes_count =
+		cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) - 1);
+	mark_buffer_dirty(sb->u.ext2_sb.s_sbh);
+	sb->s_dirt = 1;
+	inode->i_uid = current->fsuid;
+	if (test_opt (sb, GRPID))
+		inode->i_gid = dir->i_gid;
+	else if (dir->i_mode & S_ISGID) {
+		inode->i_gid = dir->i_gid;
+		if (S_ISDIR(mode))
+			mode |= S_ISGID;
+	} else
+		inode->i_gid = current->fsgid;
+	inode->i_mode = mode;
+
+	inode->i_ino = ino;
+	inode->i_blksize = PAGE_SIZE;	/* This is the optimal IO size (for stat), not the fs block size */
+	inode->i_blocks = 0;
+	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+	inode->u.ext2_i.i_new_inode = 1;
+	inode->u.ext2_i.i_flags = dir->u.ext2_i.i_flags;
+	if (S_ISLNK(mode))
+		inode->u.ext2_i.i_flags &= ~(EXT2_IMMUTABLE_FILE_FL|EXT2_IMMUTABLE_LINK_FL|EXT2_APPEND_FL);
+	inode->u.ext2_i.i_block_group = group;
+	if (inode->u.ext2_i.i_flags & EXT2_SYNC_FL)
+		inode->i_flags |= S_SYNC;
+	insert_inode_hash(inode);
+	inode->i_generation = event++;
+	mark_inode_dirty(inode);
+
+	unlock_super (sb);
+	if(DQUOT_ALLOC_INODE(inode)) {
+		DQUOT_DROP(inode);
+		inode->i_flags |= S_NOQUOTA;
+		inode->i_nlink = 0;
+		iput(inode);
+		return ERR_PTR(-EDQUOT);
+	}
+	ext2_debug ("allocating inode %lu\n", inode->i_ino);
+	return inode;
+
+fail2:
+	desc = ext2_get_group_desc (sb, group, &bh2);
+	desc->bg_free_inodes_count =
+		cpu_to_le16(le16_to_cpu(desc->bg_free_inodes_count) + 1);
+	if (S_ISDIR(mode))
+		desc->bg_used_dirs_count =
+			cpu_to_le16(le16_to_cpu(desc->bg_used_dirs_count) - 1);
+	mark_buffer_dirty(bh2);
+fail:
+	unlock_super(sb);
+	make_bad_inode(inode);
+	iput(inode);
+	return ERR_PTR(err);
+
+bad_count:
+	ext2_error (sb, "ext2_new_inode",
+		    "Free inodes count corrupted in group %d",
+		    group);
+	/* Is it really ENOSPC? */
+	err = -ENOSPC;
+	if (sb->s_flags & MS_RDONLY)
+		goto fail;
+
+	desc = ext2_get_group_desc (sb, group, &bh2);
+	desc->bg_free_inodes_count = 0;
+	mark_buffer_dirty(bh2);
+	goto repeat;
+}
+
+unsigned long ext2_count_free_inodes (struct super_block * sb)
+{
+#ifdef EXT2FS_DEBUG
+	struct ext2_super_block * es;
+	unsigned long desc_count = 0, bitmap_count = 0;
+	int i;
+
+	lock_super (sb);
+	es = sb->u.ext2_sb.s_es;
+	for (i = 0; i < sb->u.ext2_sb.s_groups_count; i++) {
+		struct ext2_group_desc *desc = ext2_get_group_desc (sb, i, NULL);
+		struct buffer_head *bh;
+		unsigned x;
+
+		if (!desc)
+			continue;
+		desc_count += le16_to_cpu(desc->bg_free_inodes_count);
+		bh = load_inode_bitmap (sb, i);
+		if (IS_ERR(bh))
+			continue;
+
+		x = ext2_count_free (bh, EXT2_INODES_PER_GROUP(sb) / 8);
+		printk ("group %d: stored = %d, counted = %lu\n",
+			i, le16_to_cpu(desc->bg_free_inodes_count), x);
+		bitmap_count += x;
+	}
+	printk("ext2_count_free_inodes: stored = %lu, computed = %lu, %lu\n",
+		le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count);
+	unlock_super (sb);
+	return desc_count;
+#else
+	return le32_to_cpu(sb->u.ext2_sb.s_es->s_free_inodes_count);
+#endif
+}
+
+#ifdef CONFIG_EXT2_CHECK
+/* Called at mount-time, super-block is locked */
+void ext2_check_inodes_bitmap (struct super_block * sb)
+{
+	struct ext2_super_block * es = sb->u.ext2_sb.s_es;
+	unsigned long desc_count = 0, bitmap_count = 0;
+	int i;
+
+	for (i = 0; i < sb->u.ext2_sb.s_groups_count; i++) {
+		struct ext2_group_desc *desc = ext2_get_group_desc(sb, i, NULL);
+		struct buffer_head *bh;
+		unsigned x;
+
+		if (!desc)
+			continue;
+		desc_count += le16_to_cpu(desc->bg_free_inodes_count);
+		bh = load_inode_bitmap (sb, i);
+		if (IS_ERR(bh))
+			continue;
+		
+		x = ext2_count_free (bh, EXT2_INODES_PER_GROUP(sb) / 8);
+		if (le16_to_cpu(desc->bg_free_inodes_count) != x)
+			ext2_error (sb, "ext2_check_inodes_bitmap",
+				    "Wrong free inodes count in group %d, "
+				    "stored = %d, counted = %lu", i,
+				    le16_to_cpu(desc->bg_free_inodes_count), x);
+		bitmap_count += x;
+	}
+	if (le32_to_cpu(es->s_free_inodes_count) != bitmap_count)
+		ext2_error (sb, "ext2_check_inodes_bitmap",
+			    "Wrong free inodes count in super block, "
+			    "stored = %lu, counted = %lu",
+			    (unsigned long)le32_to_cpu(es->s_free_inodes_count),
+			    bitmap_count);
+}
+#endif
diff -urN linux-2.4.16-reiserfspatches-immutable/fs/ext2/inode.c~ linux-2.4.16-reiserfspatches-immutable-ctx4/fs/ext2/inode.c~
--- linux-2.4.16-reiserfspatches-immutable/fs/ext2/inode.c~	Thu Jan  1 01:00:00 1970
+++ linux-2.4.16-reiserfspatches-immutable-ctx4/fs/ext2/inode.c~	Mon Dec 10 14:28:03 2001
@@ -0,0 +1,1165 @@
+/*
+ *  linux/fs/ext2/inode.c
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ *  from
+ *
+ *  linux/fs/minix/inode.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  Goal-directed block allocation by Stephen Tweedie
+ * 	(sct@dcs.ed.ac.uk), 1993, 1998
+ *  Big-endian to little-endian byte-swapping/bitmaps by
+ *        David S. Miller (davem@caip.rutgers.edu), 1995
+ *  64-bit file support on 64-bit platforms by Jakub Jelinek
+ * 	(jj@sunsite.ms.mff.cuni.cz)
+ *
+ *  Assorted race fixes, rewrite of ext2_get_block() by Al Viro, 2000
+ */
+
+#include <linux/fs.h>
+#include <linux/ext2_fs.h>
+#include <linux/locks.h>
+#include <linux/smp_lock.h>
+#include <linux/sched.h>
+#include <linux/highuid.h>
+#include <linux/quotaops.h>
+#include <linux/module.h>
+
+MODULE_AUTHOR("Remy Card and others");
+MODULE_DESCRIPTION("Second Extended Filesystem");
+MODULE_LICENSE("GPL");
+
+
+static int ext2_update_inode(struct inode * inode, int do_sync);
+
+/*
+ * Called at each iput()
+ */
+void ext2_put_inode (struct inode * inode)
+{
+	ext2_discard_prealloc (inode);
+}
+
+/*
+ * Called at the last iput() if i_nlink is zero.
+ */
+void ext2_delete_inode (struct inode * inode)
+{
+	lock_kernel();
+
+	if (is_bad_inode(inode) ||
+	    inode->i_ino == EXT2_ACL_IDX_INO ||
+	    inode->i_ino == EXT2_ACL_DATA_INO)
+		goto no_delete;
+	inode->u.ext2_i.i_dtime	= CURRENT_TIME;
+	mark_inode_dirty(inode);
+	ext2_update_inode(inode, IS_SYNC(inode));
+	inode->i_size = 0;
+	if (inode->i_blocks)
+		ext2_truncate (inode);
+	ext2_free_inode (inode);
+
+	unlock_kernel();
+	return;
+no_delete:
+	unlock_kernel();
+	clear_inode(inode);	/* We must guarantee clearing of inode... */
+}
+
+void ext2_discard_prealloc (struct inode * inode)
+{
+#ifdef EXT2_PREALLOCATE
+	lock_kernel();
+	/* Writer: ->i_prealloc* */
+	if (inode->u.ext2_i.i_prealloc_count) {
+		unsigned short total = inode->u.ext2_i.i_prealloc_count;
+		unsigned long block = inode->u.ext2_i.i_prealloc_block;
+		inode->u.ext2_i.i_prealloc_count = 0;
+		inode->u.ext2_i.i_prealloc_block = 0;
+		/* Writer: end */
+		ext2_free_blocks (inode, block, total);
+	}
+	unlock_kernel();
+#endif
+}
+
+static int ext2_alloc_block (struct inode * inode, unsigned long goal, int *err)
+{
+#ifdef EXT2FS_DEBUG
+	static unsigned long alloc_hits = 0, alloc_attempts = 0;
+#endif
+	unsigned long result;
+
+
+#ifdef EXT2_PREALLOCATE
+	/* Writer: ->i_prealloc* */
+	if (inode->u.ext2_i.i_prealloc_count &&
+	    (goal == inode->u.ext2_i.i_prealloc_block ||
+	     goal + 1 == inode->u.ext2_i.i_prealloc_block))
+	{		
+		result = inode->u.ext2_i.i_prealloc_block++;
+		inode->u.ext2_i.i_prealloc_count--;
+		/* Writer: end */
+		ext2_debug ("preallocation hit (%lu/%lu).\n",
+			    ++alloc_hits, ++alloc_attempts);
+	} else {
+		ext2_discard_prealloc (inode);
+		ext2_debug ("preallocation miss (%lu/%lu).\n",
+			    alloc_hits, ++alloc_attempts);
+		if (S_ISREG(inode->i_mode))
+			result = ext2_new_block (inode, goal, 
+				 &inode->u.ext2_i.i_prealloc_count,
+				 &inode->u.ext2_i.i_prealloc_block, err);
+		else
+			result = ext2_new_block (inode, goal, 0, 0, err);
+	}
+#else
+	result = ext2_new_block (inode, goal, 0, 0, err);
+#endif
+	return result;
+}
+
+typedef struct {
+	u32	*p;
+	u32	key;
+	struct buffer_head *bh;
+} Indirect;
+
+static inline void add_chain(Indirect *p, struct buffer_head *bh, u32 *v)
+{
+	p->key = *(p->p = v);
+	p->bh = bh;
+}
+
+static inline int verify_chain(Indirect *from, Indirect *to)
+{
+	while (from <= to && from->key == *from->p)
+		from++;
+	return (from > to);
+}
+
+/**
+ *	ext2_block_to_path - parse the block number into array of offsets
+ *	@inode: inode in question (we are only interested in its superblock)
+ *	@i_block: block number to be parsed
+ *	@offsets: array to store the offsets in
+ *
+ *	To store the locations of file's data ext2 uses a data structure common
+ *	for UNIX filesystems - tree of pointers anchored in the inode, with
+ *	data blocks at leaves and indirect blocks in intermediate nodes.
+ *	This function translates the block number into path in that tree -
+ *	return value is the path length and @offsets[n] is the offset of
+ *	pointer to (n+1)th node in the nth one. If @block is out of range
+ *	(negative or too large) warning is printed and zero returned.
+ *
+ *	Note: function doesn't find node addresses, so no IO is needed. All
+ *	we need to know is the capacity of indirect blocks (taken from the
+ *	inode->i_sb).
+ */
+
+/*
+ * Portability note: the last comparison (check that we fit into triple
+ * indirect block) is spelled differently, because otherwise on an
+ * architecture with 32-bit longs and 8Kb pages we might get into trouble
+ * if our filesystem had 8Kb blocks. We might use long long, but that would
+ * kill us on x86. Oh, well, at least the sign propagation does not matter -
+ * i_block would have to be negative in the very beginning, so we would not
+ * get there at all.
+ */
+
+static int ext2_block_to_path(struct inode *inode, long i_block, int offsets[4])
+{
+	int ptrs = EXT2_ADDR_PER_BLOCK(inode->i_sb);
+	int ptrs_bits = EXT2_ADDR_PER_BLOCK_BITS(inode->i_sb);
+	const long direct_blocks = EXT2_NDIR_BLOCKS,
+		indirect_blocks = ptrs,
+		double_blocks = (1 << (ptrs_bits * 2));
+	int n = 0;
+
+	if (i_block < 0) {
+		ext2_warning (inode->i_sb, "ext2_block_to_path", "block < 0");
+	} else if (i_block < direct_blocks) {
+		offsets[n++] = i_block;
+	} else if ( (i_block -= direct_blocks) < indirect_blocks) {
+		offsets[n++] = EXT2_IND_BLOCK;
+		offsets[n++] = i_block;
+	} else if ((i_block -= indirect_blocks) < double_blocks) {
+		offsets[n++] = EXT2_DIND_BLOCK;
+		offsets[n++] = i_block >> ptrs_bits;
+		offsets[n++] = i_block & (ptrs - 1);
+	} else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) {
+		offsets[n++] = EXT2_TIND_BLOCK;
+		offsets[n++] = i_block >> (ptrs_bits * 2);
+		offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1);
+		offsets[n++] = i_block & (ptrs - 1);
+	} else {
+		ext2_warning (inode->i_sb, "ext2_block_to_path", "block > big");
+	}
+	return n;
+}
+
+/**
+ *	ext2_get_branch - read the chain of indirect blocks leading to data
+ *	@inode: inode in question
+ *	@depth: depth of the chain (1 - direct pointer, etc.)
+ *	@offsets: offsets of pointers in inode/indirect blocks
+ *	@chain: place to store the result
+ *	@err: here we store the error value
+ *
+ *	Function fills the array of triples <key, p, bh> and returns %NULL
+ *	if everything went OK or the pointer to the last filled triple
+ *	(incomplete one) otherwise. Upon the return chain[i].key contains
+ *	the number of (i+1)-th block in the chain (as it is stored in memory,
+ *	i.e. little-endian 32-bit), chain[i].p contains the address of that
+ *	number (it points into struct inode for i==0 and into the bh->b_data
+ *	for i>0) and chain[i].bh points to the buffer_head of i-th indirect
+ *	block for i>0 and NULL for i==0. In other words, it holds the block
+ *	numbers of the chain, addresses they were taken from (and where we can
+ *	verify that chain did not change) and buffer_heads hosting these
+ *	numbers.
+ *
+ *	Function stops when it stumbles upon zero pointer (absent block)
+ *		(pointer to last triple returned, *@err == 0)
+ *	or when it gets an IO error reading an indirect block
+ *		(ditto, *@err == -EIO)
+ *	or when it notices that chain had been changed while it was reading
+ *		(ditto, *@err == -EAGAIN)
+ *	or when it reads all @depth-1 indirect blocks successfully and finds
+ *	the whole chain, all way to the data (returns %NULL, *err == 0).
+ */
+static Indirect *ext2_get_branch(struct inode *inode,
+				 int depth,
+				 int *offsets,
+				 Indirect chain[4],
+				 int *err)
+{
+	kdev_t dev = inode->i_dev;
+	int size = inode->i_sb->s_blocksize;
+	Indirect *p = chain;
+	struct buffer_head *bh;
+
+	*err = 0;
+	/* i_data is not going away, no lock needed */
+	add_chain (chain, NULL, inode->u.ext2_i.i_data + *offsets);
+	if (!p->key)
+		goto no_block;
+	while (--depth) {
+		bh = bread(dev, le32_to_cpu(p->key), size);
+		if (!bh)
+			goto failure;
+		/* Reader: pointers */
+		if (!verify_chain(chain, p))
+			goto changed;
+		add_chain(++p, bh, (u32*)bh->b_data + *++offsets);
+		/* Reader: end */
+		if (!p->key)
+			goto no_block;
+	}
+	return NULL;
+
+changed:
+	*err = -EAGAIN;
+	goto no_block;
+failure:
+	*err = -EIO;
+no_block:
+	return p;
+}
+
+/**
+ *	ext2_find_near - find a place for allocation with sufficient locality
+ *	@inode: owner
+ *	@ind: descriptor of indirect block.
+ *
+ *	This function returns the prefered place for block allocation.
+ *	It is used when heuristic for sequential allocation fails.
+ *	Rules are:
+ *	  + if there is a block to the left of our position - allocate near it.
+ *	  + if pointer will live in indirect block - allocate near that block.
+ *	  + if pointer will live in inode - allocate in the same cylinder group.
+ *	Caller must make sure that @ind is valid and will stay that way.
+ */
+
+static inline unsigned long ext2_find_near(struct inode *inode, Indirect *ind)
+{
+	u32 *start = ind->bh ? (u32*) ind->bh->b_data : inode->u.ext2_i.i_data;
+	u32 *p;
+
+	/* Try to find previous block */
+	for (p = ind->p - 1; p >= start; p--)
+		if (*p)
+			return le32_to_cpu(*p);
+
+	/* No such thing, so let's try location of indirect block */
+	if (ind->bh)
+		return ind->bh->b_blocknr;
+
+	/*
+	 * It is going to be refered from inode itself? OK, just put it into
+	 * the same cylinder group then.
+	 */
+	return (inode->u.ext2_i.i_block_group * 
+		EXT2_BLOCKS_PER_GROUP(inode->i_sb)) +
+	       le32_to_cpu(inode->i_sb->u.ext2_sb.s_es->s_first_data_block);
+}
+
+/**
+ *	ext2_find_goal - find a prefered place for allocation.
+ *	@inode: owner
+ *	@block:  block we want
+ *	@chain:  chain of indirect blocks
+ *	@partial: pointer to the last triple within a chain
+ *	@goal:	place to store the result.
+ *
+ *	Normally this function find the prefered place for block allocation,
+ *	stores it in *@goal and returns zero. If the branch had been changed
+ *	under us we return -EAGAIN.
+ */
+
+static inline int ext2_find_goal(struct inode *inode,
+				 long block,
+				 Indirect chain[4],
+				 Indirect *partial,
+				 unsigned long *goal)
+{
+	/* Writer: ->i_next_alloc* */
+	if (block == inode->u.ext2_i.i_next_alloc_block + 1) {
+		inode->u.ext2_i.i_next_alloc_block++;
+		inode->u.ext2_i.i_next_alloc_goal++;
+	} 
+	/* Writer: end */
+	/* Reader: pointers, ->i_next_alloc* */
+	if (verify_chain(chain, partial)) {
+		/*
+		 * try the heuristic for sequential allocation,
+		 * failing that at least try to get decent locality.
+		 */
+		if (block == inode->u.ext2_i.i_next_alloc_block)
+			*goal = inode->u.ext2_i.i_next_alloc_goal;
+		if (!*goal)
+			*goal = ext2_find_near(inode, partial);
+		return 0;
+	}
+	/* Reader: end */
+	return -EAGAIN;
+}
+
+/**
+ *	ext2_alloc_branch - allocate and set up a chain of blocks.
+ *	@inode: owner
+ *	@num: depth of the chain (number of blocks to allocate)
+ *	@offsets: offsets (in the blocks) to store the pointers to next.
+ *	@branch: place to store the chain in.
+ *
+ *	This function allocates @num blocks, zeroes out all but the last one,
+ *	links them into chain and (if we are synchronous) writes them to disk.
+ *	In other words, it prepares a branch that can be spliced onto the
+ *	inode. It stores the information about that chain in the branch[], in
+ *	the same format as ext2_get_branch() would do. We are calling it after
+ *	we had read the existing part of chain and partial points to the last
+ *	triple of that (one with zero ->key). Upon the exit we have the same
+ *	picture as after the successful ext2_get_block(), excpet that in one
+ *	place chain is disconnected - *branch->p is still zero (we did not
+ *	set the last link), but branch->key contains the number that should
+ *	be placed into *branch->p to fill that gap.
+ *
+ *	If allocation fails we free all blocks we've allocated (and forget
+ *	their buffer_heads) and return the error value the from failed
+ *	ext2_alloc_block() (normally -ENOSPC). Otherwise we set the chain
+ *	as described above and return 0.
+ */
+
+static int ext2_alloc_branch(struct inode *inode,
+			     int num,
+			     unsigned long goal,
+			     int *offsets,
+			     Indirect *branch)
+{
+	int blocksize = inode->i_sb->s_blocksize;
+	int n = 0;
+	int err;
+	int i;
+	int parent = ext2_alloc_block(inode, goal, &err);
+
+	branch[0].key = cpu_to_le32(parent);
+	if (parent) for (n = 1; n < num; n++) {
+		struct buffer_head *bh;
+		/* Allocate the next block */
+		int nr = ext2_alloc_block(inode, parent, &err);
+		if (!nr)
+			break;
+		branch[n].key = cpu_to_le32(nr);
+		/*
+		 * Get buffer_head for parent block, zero it out and set 
+		 * the pointer to new one, then send parent to disk.
+		 */
+		bh = getblk(inode->i_dev, parent, blocksize);
+		lock_buffer(bh);
+		memset(bh->b_data, 0, blocksize);
+		branch[n].bh = bh;
+		branch[n].p = (u32*) bh->b_data + offsets[n];
+		*branch[n].p = branch[n].key;
+		mark_buffer_uptodate(bh, 1);
+		unlock_buffer(bh);
+		mark_buffer_dirty_inode(bh, inode);
+		if (IS_SYNC(inode) || inode->u.ext2_i.i_osync) {
+			ll_rw_block (WRITE, 1, &bh);
+			wait_on_buffer (bh);
+		}
+		parent = nr;
+	}
+	if (n == num)
+		return 0;
+
+	/* Allocation failed, free what we already allocated */
+	for (i = 1; i < n; i++)
+		bforget(branch[i].bh);
+	for (i = 0; i < n; i++)
+		ext2_free_blocks(inode, le32_to_cpu(branch[i].key), 1);
+	return err;
+}
+
+/**
+ *	ext2_splice_branch - splice the allocated branch onto inode.
+ *	@inode: owner
+ *	@block: (logical) number of block we are adding
+ *	@chain: chain of indirect blocks (with a missing link - see
+ *		ext2_alloc_branch)
+ *	@where: location of missing link
+ *	@num:   number of blocks we are adding
+ *
+ *	This function verifies that chain (up to the missing link) had not
+ *	changed, fills the missing link and does all housekeeping needed in
+ *	inode (->i_blocks, etc.). In case of success we end up with the full
+ *	chain to new block and return 0. Otherwise (== chain had been changed)
+ *	we free the new blocks (forgetting their buffer_heads, indeed) and
+ *	return -EAGAIN.
+ */
+
+static inline int ext2_splice_branch(struct inode *inode,
+				     long block,
+				     Indirect chain[4],
+				     Indirect *where,
+				     int num)
+{
+	int i;
+
+	/* Verify that place we are splicing to is still there and vacant */
+
+	/* Writer: pointers, ->i_next_alloc* */
+	if (!verify_chain(chain, where-1) || *where->p)
+		/* Writer: end */
+		goto changed;
+
+	/* That's it */
+
+	*where->p = where->key;
+	inode->u.ext2_i.i_next_alloc_block = block;
+	inode->u.ext2_i.i_next_alloc_goal = le32_to_cpu(where[num-1].key);
+
+	/* Writer: end */
+
+	/* We are done with atomic stuff, now do the rest of housekeeping */
+
+	inode->i_ctime = CURRENT_TIME;
+
+	/* had we spliced it onto indirect block? */
+	if (where->bh) {
+		mark_buffer_dirty_inode(where->bh, inode);
+		if (IS_SYNC(inode) || inode->u.ext2_i.i_osync) {
+			ll_rw_block (WRITE, 1, &where->bh);
+			wait_on_buffer(where->bh);
+		}
+	}
+
+	if (IS_SYNC(inode) || inode->u.ext2_i.i_osync)
+		ext2_sync_inode (inode);
+	else
+		mark_inode_dirty(inode);
+	return 0;
+
+changed:
+	for (i = 1; i < num; i++)
+		bforget(where[i].bh);
+	for (i = 0; i < num; i++)
+		ext2_free_blocks(inode, le32_to_cpu(where[i].key), 1);
+	return -EAGAIN;
+}
+
+/*
+ * Allocation strategy is simple: if we have to allocate something, we will
+ * have to go the whole way to leaf. So let's do it before attaching anything
+ * to tree, set linkage between the newborn blocks, write them if sync is
+ * required, recheck the path, free and repeat if check fails, otherwise
+ * set the last missing link (that will protect us from any truncate-generated
+ * removals - all blocks on the path are immune now) and possibly force the
+ * write on the parent block.
+ * That has a nice additional property: no special recovery from the failed
+ * allocations is needed - we simply release blocks and do not touch anything
+ * reachable from inode.
+ */
+
+static int ext2_get_block(struct inode *inode, long iblock, struct buffer_head *bh_result, int create)
+{
+	int err = -EIO;
+	int offsets[4];
+	Indirect chain[4];
+	Indirect *partial;
+	unsigned long goal;
+	int left;
+	int depth = ext2_block_to_path(inode, iblock, offsets);
+
+	if (depth == 0)
+		goto out;
+
+	lock_kernel();
+reread:
+	partial = ext2_get_branch(inode, depth, offsets, chain, &err);
+
+	/* Simplest case - block found, no allocation needed */
+	if (!partial) {
+got_it:
+		bh_result->b_dev = inode->i_dev;
+		bh_result->b_blocknr = le32_to_cpu(chain[depth-1].key);
+		bh_result->b_state |= (1UL << BH_Mapped);
+		/* Clean up and exit */
+		partial = chain+depth-1; /* the whole chain */
+		goto cleanup;
+	}
+
+	/* Next simple case - plain lookup or failed read of indirect block */
+	if (!create || err == -EIO) {
+cleanup:
+		while (partial > chain) {
+			brelse(partial->bh);
+			partial--;
+		}
+		unlock_kernel();
+out:
+		return err;
+	}
+
+	/*
+	 * Indirect block might be removed by truncate while we were
+	 * reading it. Handling of that case (forget what we've got and
+	 * reread) is taken out of the main path.
+	 */
+	if (err == -EAGAIN)
+		goto changed;
+
+	if (ext2_find_goal(inode, iblock, chain, partial, &goal) < 0)
+		goto changed;
+
+	left = (chain + depth) - partial;
+	err = ext2_alloc_branch(inode, left, goal,
+					offsets+(partial-chain), partial);
+	if (err)
+		goto cleanup;
+
+	if (ext2_splice_branch(inode, iblock, chain, partial, left) < 0)
+		goto changed;
+
+	bh_result->b_state |= (1UL << BH_New);
+	goto got_it;
+
+changed:
+	while (partial > chain) {
+		brelse(partial->bh);
+		partial--;
+	}
+	goto reread;
+}
+
+static int ext2_writepage(struct page *page)
+{
+	return block_write_full_page(page,ext2_get_block);
+}
+static int ext2_readpage(struct file *file, struct page *page)
+{
+	return block_read_full_page(page,ext2_get_block);
+}
+static int ext2_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
+{
+	return block_prepare_write(page,from,to,ext2_get_block);
+}
+static int ext2_bmap(struct address_space *mapping, long block)
+{
+	return generic_block_bmap(mapping,block,ext2_get_block);
+}
+static int ext2_direct_IO(int rw, struct inode * inode, struct kiobuf * iobuf, unsigned long blocknr, int blocksize)
+{
+	return generic_direct_IO(rw, inode, iobuf, blocknr, blocksize, ext2_get_block);
+}
+struct address_space_operations ext2_aops = {
+	readpage: ext2_readpage,
+	writepage: ext2_writepage,
+	sync_page: block_sync_page,
+	prepare_write: ext2_prepare_write,
+	commit_write: generic_commit_write,
+	bmap: ext2_bmap,
+	direct_IO: ext2_direct_IO,
+};
+
+/*
+ * Probably it should be a library function... search for first non-zero word
+ * or memcmp with zero_page, whatever is better for particular architecture.
+ * Linus?
+ */
+static inline int all_zeroes(u32 *p, u32 *q)
+{
+	while (p < q)
+		if (*p++)
+			return 0;
+	return 1;
+}
+
+/**
+ *	ext2_find_shared - find the indirect blocks for partial truncation.
+ *	@inode:	  inode in question
+ *	@depth:	  depth of the affected branch
+ *	@offsets: offsets of pointers in that branch (see ext2_block_to_path)
+ *	@chain:	  place to store the pointers to partial indirect blocks
+ *	@top:	  place to the (detached) top of branch
+ *
+ *	This is a helper function used by ext2_truncate().
+ *
+ *	When we do truncate() we may have to clean the ends of several indirect
+ *	blocks but leave the blocks themselves alive. Block is partially
+ *	truncated if some data below the new i_size is refered from it (and
+ *	it is on the path to the first completely truncated data block, indeed).
+ *	We have to free the top of that path along with everything to the right
+ *	of the path. Since no allocation past the truncation point is possible
+ *	until ext2_truncate() finishes, we may safely do the latter, but top
+ *	of branch may require special attention - pageout below the truncation
+ *	point might try to populate it.
+ *
+ *	We atomically detach the top of branch from the tree, store the block
+ *	number of its root in *@top, pointers to buffer_heads of partially
+ *	truncated blocks - in @chain[].bh and pointers to their last elements
+ *	that should not be removed - in @chain[].p. Return value is the pointer
+ *	to last filled element of @chain.
+ *
+ *	The work left to caller to do the actual freeing of subtrees:
+ *		a) free the subtree starting from *@top
+ *		b) free the subtrees whose roots are stored in
+ *			(@chain[i].p+1 .. end of @chain[i].bh->b_data)
+ *		c) free the subtrees growing from the inode past the @chain[0].p
+ *			(no partially truncated stuff there).
+ */
+
+static Indirect *ext2_find_shared(struct inode *inode,
+				int depth,
+				int offsets[4],
+				Indirect chain[4],
+				u32 *top)
+{
+	Indirect *partial, *p;
+	int k, err;
+
+	*top = 0;
+	for (k = depth; k > 1 && !offsets[k-1]; k--)
+		;
+	partial = ext2_get_branch(inode, k, offsets, chain, &err);
+	/* Writer: pointers */
+	if (!partial)
+		partial = chain + k-1;
+	/*
+	 * If the branch acquired continuation since we've looked at it -
+	 * fine, it should all survive and (new) top doesn't belong to us.
+	 */
+	if (!partial->key && *partial->p)
+		/* Writer: end */
+		goto no_top;
+	for (p=partial; p>chain && all_zeroes((u32*)p->bh->b_data,p->p); p--)
+		;
+	/*
+	 * OK, we've found the last block that must survive. The rest of our
+	 * branch should be detached before unlocking. However, if that rest
+	 * of branch is all ours and does not grow immediately from the inode
+	 * it's easier to cheat and just decrement partial->p.
+	 */
+	if (p == chain + k - 1 && p > chain) {
+		p->p--;
+	} else {
+		*top = *p->p;
+		*p->p = 0;
+	}
+	/* Writer: end */
+
+	while(partial > p)
+	{
+		brelse(partial->bh);
+		partial--;
+	}
+no_top:
+	return partial;
+}
+
+/**
+ *	ext2_free_data - free a list of data blocks
+ *	@inode:	inode we are dealing with
+ *	@p:	array of block numbers
+ *	@q:	points immediately past the end of array
+ *
+ *	We are freeing all blocks refered from that array (numbers are
+ *	stored as little-endian 32-bit) and updating @inode->i_blocks
+ *	appropriately.
+ */
+static inline void ext2_free_data(struct inode *inode, u32 *p, u32 *q)
+{
+	unsigned long block_to_free = 0, count = 0;
+	unsigned long nr;
+
+	for ( ; p < q ; p++) {
+		nr = le32_to_cpu(*p);
+		if (nr) {
+			*p = 0;
+			/* accumulate blocks to free if they're contiguous */
+			if (count == 0)
+				goto free_this;
+			else if (block_to_free == nr - count)
+				count++;
+			else {
+				mark_inode_dirty(inode);
+				ext2_free_blocks (inode, block_to_free, count);
+			free_this:
+				block_to_free = nr;
+				count = 1;
+			}
+		}
+	}
+	if (count > 0) {
+		mark_inode_dirty(inode);
+		ext2_free_blocks (inode, block_to_free, count);
+	}
+}
+
+/**
+ *	ext2_free_branches - free an array of branches
+ *	@inode:	inode we are dealing with
+ *	@p:	array of block numbers
+ *	@q:	pointer immediately past the end of array
+ *	@depth:	depth of the branches to free
+ *
+ *	We are freeing all blocks refered from these branches (numbers are
+ *	stored as little-endian 32-bit) and updating @inode->i_blocks
+ *	appropriately.
+ */
+static void ext2_free_branches(struct inode *inode, u32 *p, u32 *q, int depth)
+{
+	struct buffer_head * bh;
+	unsigned long nr;
+
+	if (depth--) {
+		int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb);
+		for ( ; p < q ; p++) {
+			nr = le32_to_cpu(*p);
+			if (!nr)
+				continue;
+			*p = 0;
+			bh = bread (inode->i_dev, nr, inode->i_sb->s_blocksize);
+			/*
+			 * A read failure? Report error and clear slot
+			 * (should be rare).
+			 */ 
+			if (!bh) {
+				ext2_error(inode->i_sb, "ext2_free_branches",
+					"Read failure, inode=%ld, block=%ld",
+					inode->i_ino, nr);
+				continue;
+			}
+			ext2_free_branches(inode,
+					   (u32*)bh->b_data,
+					   (u32*)bh->b_data + addr_per_block,
+					   depth);
+			bforget(bh);
+			ext2_free_blocks(inode, nr, 1);
+			mark_inode_dirty(inode);
+		}
+	} else
+		ext2_free_data(inode, p, q);
+}
+
+void ext2_truncate (struct inode * inode)
+{
+	u32 *i_data = inode->u.ext2_i.i_data;
+	int addr_per_block = EXT2_ADDR_PER_BLOCK(inode->i_sb);
+	int offsets[4];
+	Indirect chain[4];
+	Indirect *partial;
+	int nr = 0;
+	int n;
+	long iblock;
+	unsigned blocksize;
+
+	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+	    S_ISLNK(inode->i_mode)))
+		return;
+	if (IS_APPEND(inode) || IS_IMMUTABLE_FILE(inode))
+		return;
+
+	ext2_discard_prealloc(inode);
+
+	blocksize = inode->i_sb->s_blocksize;
+	iblock = (inode->i_size + blocksize-1)
+					>> EXT2_BLOCK_SIZE_BITS(inode->i_sb);
+
+	block_truncate_page(inode->i_mapping, inode->i_size, ext2_get_block);
+
+	n = ext2_block_to_path(inode, iblock, offsets);
+	if (n == 0)
+		return;
+
+	if (n == 1) {
+		ext2_free_data(inode, i_data+offsets[0],
+					i_data + EXT2_NDIR_BLOCKS);
+		goto do_indirects;
+	}
+
+	partial = ext2_find_shared(inode, n, offsets, chain, &nr);
+	/* Kill the top of shared branch (already detached) */
+	if (nr) {
+		if (partial == chain)
+			mark_inode_dirty(inode);
+		else
+			mark_buffer_dirty_inode(partial->bh, inode);
+		ext2_free_branches(inode, &nr, &nr+1, (chain+n-1) - partial);
+	}
+	/* Clear the ends of indirect blocks on the shared branch */
+	while (partial > chain) {
+		ext2_free_branches(inode,
+				   partial->p + 1,
+				   (u32*)partial->bh->b_data + addr_per_block,
+				   (chain+n-1) - partial);
+		mark_buffer_dirty_inode(partial->bh, inode);
+		if (IS_SYNC(inode)) {
+			ll_rw_block (WRITE, 1, &partial->bh);
+			wait_on_buffer (partial->bh);
+		}
+		brelse (partial->bh);
+		partial--;
+	}
+do_indirects:
+	/* Kill the remaining (whole) subtrees */
+	switch (offsets[0]) {
+		default:
+			nr = i_data[EXT2_IND_BLOCK];
+			if (nr) {
+				i_data[EXT2_IND_BLOCK] = 0;
+				mark_inode_dirty(inode);
+				ext2_free_branches(inode, &nr, &nr+1, 1);
+			}
+		case EXT2_IND_BLOCK:
+			nr = i_data[EXT2_DIND_BLOCK];
+			if (nr) {
+				i_data[EXT2_DIND_BLOCK] = 0;
+				mark_inode_dirty(inode);
+				ext2_free_branches(inode, &nr, &nr+1, 2);
+			}
+		case EXT2_DIND_BLOCK:
+			nr = i_data[EXT2_TIND_BLOCK];
+			if (nr) {
+				i_data[EXT2_TIND_BLOCK] = 0;
+				mark_inode_dirty(inode);
+				ext2_free_branches(inode, &nr, &nr+1, 3);
+			}
+		case EXT2_TIND_BLOCK:
+			;
+	}
+	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+	if (IS_SYNC(inode))
+		ext2_sync_inode (inode);
+	else
+		mark_inode_dirty(inode);
+}
+
+void ext2_read_inode (struct inode * inode)
+{
+	struct buffer_head * bh;
+	struct ext2_inode * raw_inode;
+	unsigned long block_group;
+	unsigned long group_desc;
+	unsigned long desc;
+	unsigned long block;
+	unsigned long offset;
+	struct ext2_group_desc * gdp;
+
+	if ((inode->i_ino != EXT2_ROOT_INO && inode->i_ino != EXT2_ACL_IDX_INO &&
+	     inode->i_ino != EXT2_ACL_DATA_INO &&
+	     inode->i_ino < EXT2_FIRST_INO(inode->i_sb)) ||
+	    inode->i_ino > le32_to_cpu(inode->i_sb->u.ext2_sb.s_es->s_inodes_count)) {
+		ext2_error (inode->i_sb, "ext2_read_inode",
+			    "bad inode number: %lu", inode->i_ino);
+		goto bad_inode;
+	}
+	block_group = (inode->i_ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb);
+	if (block_group >= inode->i_sb->u.ext2_sb.s_groups_count) {
+		ext2_error (inode->i_sb, "ext2_read_inode",
+			    "group >= groups count");
+		goto bad_inode;
+	}
+	group_desc = block_group >> EXT2_DESC_PER_BLOCK_BITS(inode->i_sb);
+	desc = block_group & (EXT2_DESC_PER_BLOCK(inode->i_sb) - 1);
+	bh = inode->i_sb->u.ext2_sb.s_group_desc[group_desc];
+	if (!bh) {
+		ext2_error (inode->i_sb, "ext2_read_inode",
+			    "Descriptor not loaded");
+		goto bad_inode;
+	}
+
+	gdp = (struct ext2_group_desc *) bh->b_data;
+	/*
+	 * Figure out the offset within the block group inode table
+	 */
+	offset = ((inode->i_ino - 1) % EXT2_INODES_PER_GROUP(inode->i_sb)) *
+		EXT2_INODE_SIZE(inode->i_sb);
+	block = le32_to_cpu(gdp[desc].bg_inode_table) +
+		(offset >> EXT2_BLOCK_SIZE_BITS(inode->i_sb));
+	if (!(bh = bread (inode->i_dev, block, inode->i_sb->s_blocksize))) {
+		ext2_error (inode->i_sb, "ext2_read_inode",
+			    "unable to read inode block - "
+			    "inode=%lu, block=%lu", inode->i_ino, block);
+		goto bad_inode;
+	}
+	offset &= (EXT2_BLOCK_SIZE(inode->i_sb) - 1);
+	raw_inode = (struct ext2_inode *) (bh->b_data + offset);
+
+	inode->i_mode = le16_to_cpu(raw_inode->i_mode);
+	inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
+	inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
+	if(!(test_opt (inode->i_sb, NO_UID32))) {
+		inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
+		inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
+	}
+	inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
+	inode->i_size = le32_to_cpu(raw_inode->i_size);
+	inode->i_atime = le32_to_cpu(raw_inode->i_atime);
+	inode->i_ctime = le32_to_cpu(raw_inode->i_ctime);
+	inode->i_mtime = le32_to_cpu(raw_inode->i_mtime);
+	inode->u.ext2_i.i_dtime = le32_to_cpu(raw_inode->i_dtime);
+	/* We now have enough fields to check if the inode was active or not.
+	 * This is needed because nfsd might try to access dead inodes
+	 * the test is that same one that e2fsck uses
+	 * NeilBrown 1999oct15
+	 */
+	if (inode->i_nlink == 0 && (inode->i_mode == 0 || inode->u.ext2_i.i_dtime)) {
+		/* this inode is deleted */
+		brelse (bh);
+		goto bad_inode;
+	}
+	inode->i_blksize = PAGE_SIZE;	/* This is the optimal IO size (for stat), not the fs block size */
+	inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
+	inode->i_version = ++event;
+	inode->u.ext2_i.i_flags = le32_to_cpu(raw_inode->i_flags);
+	inode->u.ext2_i.i_faddr = le32_to_cpu(raw_inode->i_faddr);
+	inode->u.ext2_i.i_frag_no = raw_inode->i_frag;
+	inode->u.ext2_i.i_frag_size = raw_inode->i_fsize;
+	inode->u.ext2_i.i_file_acl = le32_to_cpu(raw_inode->i_file_acl);
+	if (S_ISREG(inode->i_mode))
+		inode->i_size |= ((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32;
+	else
+		inode->u.ext2_i.i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl);
+	inode->i_generation = le32_to_cpu(raw_inode->i_generation);
+	inode->u.ext2_i.i_prealloc_count = 0;
+	inode->u.ext2_i.i_block_group = block_group;
+
+	/*
+	 * NOTE! The in-memory inode i_data array is in little-endian order
+	 * even on big-endian machines: we do NOT byteswap the block numbers!
+	 */
+	for (block = 0; block < EXT2_N_BLOCKS; block++)
+		inode->u.ext2_i.i_data[block] = raw_inode->i_block[block];
+
+	if (inode->i_ino == EXT2_ACL_IDX_INO ||
+	    inode->i_ino == EXT2_ACL_DATA_INO)
+		/* Nothing to do */ ;
+	else if (S_ISREG(inode->i_mode)) {
+		inode->i_op = &ext2_file_inode_operations;
+		inode->i_fop = &ext2_file_operations;
+		inode->i_mapping->a_ops = &ext2_aops;
+	} else if (S_ISDIR(inode->i_mode)) {
+		inode->i_op = &ext2_dir_inode_operations;
+		inode->i_fop = &ext2_dir_operations;
+		inode->i_mapping->a_ops = &ext2_aops;
+	} else if (S_ISLNK(inode->i_mode)) {
+		if (!inode->i_blocks)
+			inode->i_op = &ext2_fast_symlink_inode_operations;
+		else {
+			inode->i_op = &page_symlink_inode_operations;
+			inode->i_mapping->a_ops = &ext2_aops;
+		}
+	} else 
+		init_special_inode(inode, inode->i_mode,
+				   le32_to_cpu(raw_inode->i_block[0]));
+	brelse (bh);
+	inode->i_attr_flags = 0;
+	if (inode->u.ext2_i.i_flags & EXT2_SYNC_FL) {
+		inode->i_attr_flags |= ATTR_FLAG_SYNCRONOUS;
+		inode->i_flags |= S_SYNC;
+	}
+	if (inode->u.ext2_i.i_flags & EXT2_APPEND_FL) {
+		inode->i_attr_flags |= ATTR_FLAG_APPEND;
+		inode->i_flags |= S_APPEND;
+	}
+	if (inode->u.ext2_i.i_flags & EXT2_IMMUTABLE_FILE_FL) {
+		inode->i_attr_flags |= ATTR_FLAG_IMMUTABLE_FILE;
+		inode->i_flags |= S_IMMUTABLE_FILE;
+	}
+	if (inode->u.ext2_i.i_flags & EXT2_IMMUTABLE_LINK_FL) {
+		inode->i_attr_flags |= ATTR_FLAG_IMMUTABLE_LINK;
+		inode->i_flags |= S_IMMUTABLE_LINK;
+	}
+	if (inode->u.ext2_i.i_flags & EXT2_NOATIME_FL) {
+		inode->i_attr_flags |= ATTR_FLAG_NOATIME;
+		inode->i_flags |= S_NOATIME;
+	}
+	return;
+	
+bad_inode:
+	make_bad_inode(inode);
+	return;
+}
+
+static int ext2_update_inode(struct inode * inode, int do_sync)
+{
+	struct buffer_head * bh;
+	struct ext2_inode * raw_inode;
+	unsigned long block_group;
+	unsigned long group_desc;
+	unsigned long desc;
+	unsigned long block;
+	unsigned long offset;
+	int err = 0;
+	struct ext2_group_desc * gdp;
+
+	if ((inode->i_ino != EXT2_ROOT_INO &&
+	     inode->i_ino < EXT2_FIRST_INO(inode->i_sb)) ||
+	    inode->i_ino > le32_to_cpu(inode->i_sb->u.ext2_sb.s_es->s_inodes_count)) {
+		ext2_error (inode->i_sb, "ext2_write_inode",
+			    "bad inode number: %lu", inode->i_ino);
+		return -EIO;
+	}
+	block_group = (inode->i_ino - 1) / EXT2_INODES_PER_GROUP(inode->i_sb);
+	if (block_group >= inode->i_sb->u.ext2_sb.s_groups_count) {
+		ext2_error (inode->i_sb, "ext2_write_inode",
+			    "group >= groups count");
+		return -EIO;
+	}
+	group_desc = block_group >> EXT2_DESC_PER_BLOCK_BITS(inode->i_sb);
+	desc = block_group & (EXT2_DESC_PER_BLOCK(inode->i_sb) - 1);
+	bh = inode->i_sb->u.ext2_sb.s_group_desc[group_desc];
+	if (!bh) {
+		ext2_error (inode->i_sb, "ext2_write_inode",
+			    "Descriptor not loaded");
+		return -EIO;
+	}
+	gdp = (struct ext2_group_desc *) bh->b_data;
+	/*
+	 * Figure out the offset within the block group inode table
+	 */
+	offset = ((inode->i_ino - 1) % EXT2_INODES_PER_GROUP(inode->i_sb)) *
+		EXT2_INODE_SIZE(inode->i_sb);
+	block = le32_to_cpu(gdp[desc].bg_inode_table) +
+		(offset >> EXT2_BLOCK_SIZE_BITS(inode->i_sb));
+	if (!(bh = bread (inode->i_dev, block, inode->i_sb->s_blocksize))) {
+		ext2_error (inode->i_sb, "ext2_write_inode",
+			    "unable to read inode block - "
+			    "inode=%lu, block=%lu", inode->i_ino, block);
+		return -EIO;
+	}
+	offset &= EXT2_BLOCK_SIZE(inode->i_sb) - 1;
+	raw_inode = (struct ext2_inode *) (bh->b_data + offset);
+
+	raw_inode->i_mode = cpu_to_le16(inode->i_mode);
+	if(!(test_opt(inode->i_sb, NO_UID32))) {
+		raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid));
+		raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid));
+/*
+ * Fix up interoperability with old kernels. Otherwise, old inodes get
+ * re-used with the upper 16 bits of the uid/gid intact
+ */
+		if(!inode->u.ext2_i.i_dtime) {
+			raw_inode->i_uid_high = cpu_to_le16(high_16_bits(inode->i_uid));
+			raw_inode->i_gid_high = cpu_to_le16(high_16_bits(inode->i_gid));
+		} else {
+			raw_inode->i_uid_high = 0;
+			raw_inode->i_gid_high = 0;
+		}
+	} else {
+		raw_inode->i_uid_low = cpu_to_le16(fs_high2lowuid(inode->i_uid));
+		raw_inode->i_gid_low = cpu_to_le16(fs_high2lowgid(inode->i_gid));
+		raw_inode->i_uid_high = 0;
+		raw_inode->i_gid_high = 0;
+	}
+	raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
+	raw_inode->i_size = cpu_to_le32(inode->i_size);
+	raw_inode->i_atime = cpu_to_le32(inode->i_atime);
+	raw_inode->i_ctime = cpu_to_le32(inode->i_ctime);
+	raw_inode->i_mtime = cpu_to_le32(inode->i_mtime);
+	raw_inode->i_blocks = cpu_to_le32(inode->i_blocks);
+	raw_inode->i_dtime = cpu_to_le32(inode->u.ext2_i.i_dtime);
+	raw_inode->i_flags = cpu_to_le32(inode->u.ext2_i.i_flags);
+	raw_inode->i_faddr = cpu_to_le32(inode->u.ext2_i.i_faddr);
+	raw_inode->i_frag = inode->u.ext2_i.i_frag_no;
+	raw_inode->i_fsize = inode->u.ext2_i.i_frag_size;
+	raw_inode->i_file_acl = cpu_to_le32(inode->u.ext2_i.i_file_acl);
+	if (S_ISDIR(inode->i_mode))
+		raw_inode->i_dir_acl = cpu_to_le32(inode->u.ext2_i.i_dir_acl);
+	else {
+		raw_inode->i_size_high = cpu_to_le32(inode->i_size >> 32);
+		if (inode->i_size > 0x7fffffffULL) {
+			struct super_block *sb = inode->i_sb;
+			if (!EXT2_HAS_RO_COMPAT_FEATURE(sb,
+					EXT2_FEATURE_RO_COMPAT_LARGE_FILE) ||
+			    EXT2_SB(sb)->s_es->s_rev_level ==
+					cpu_to_le32(EXT2_GOOD_OLD_REV)) {
+			       /* If this is the first large file
+				* created, add a flag to the superblock.
+				*/
+				lock_kernel();
+				ext2_update_dynamic_rev(sb);
+				EXT2_SET_RO_COMPAT_FEATURE(sb,
+					EXT2_FEATURE_RO_COMPAT_LARGE_FILE);
+				unlock_kernel();
+				ext2_write_super(sb);
+			}
+		}
+	}
+	
+	raw_inode->i_generation = cpu_to_le32(inode->i_generation);
+	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
+		raw_inode->i_block[0] = cpu_to_le32(kdev_t_to_nr(inode->i_rdev));
+	else for (block = 0; block < EXT2_N_BLOCKS; block++)
+		raw_inode->i_block[block] = inode->u.ext2_i.i_data[block];
+	mark_buffer_dirty(bh);
+	if (do_sync) {
+		ll_rw_block (WRITE, 1, &bh);
+		wait_on_buffer (bh);
+		if (buffer_req(bh) && !buffer_uptodate(bh)) {
+			printk ("IO error syncing ext2 inode ["
+				"%s:%08lx]\n",
+				bdevname(inode->i_dev), inode->i_ino);
+			err = -EIO;
+		}
+	}
+	brelse (bh);
+	return err;
+}
+
+void ext2_write_inode (struct inode * inode, int wait)
+{
+	lock_kernel();
+	ext2_update_inode (inode, wait);
+	unlock_kernel();
+}
+
+int ext2_sync_inode (struct inode *inode)
+{
+	return ext2_update_inode (inode, 1);
+}
diff -urN linux-2.4.16-reiserfspatches-immutable/fs/ext2/ioctl.c~ linux-2.4.16-reiserfspatches-immutable-ctx4/fs/ext2/ioctl.c~
--- linux-2.4.16-reiserfspatches-immutable/fs/ext2/ioctl.c~	Thu Jan  1 01:00:00 1970
+++ linux-2.4.16-reiserfspatches-immutable-ctx4/fs/ext2/ioctl.c~	Mon Dec 10 14:28:03 2001
@@ -0,0 +1,98 @@
+/*
+ * linux/fs/ext2/ioctl.c
+ *
+ * Copyright (C) 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ */
+
+#include <linux/fs.h>
+#include <linux/ext2_fs.h>
+#include <linux/sched.h>
+#include <asm/uaccess.h>
+
+
+int ext2_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
+		unsigned long arg)
+{
+	unsigned int flags;
+
+	ext2_debug ("cmd = %u, arg = %lu\n", cmd, arg);
+
+	switch (cmd) {
+	case EXT2_IOC_GETFLAGS:
+		flags = inode->u.ext2_i.i_flags & EXT2_FL_USER_VISIBLE;
+		return put_user(flags, (int *) arg);
+	case EXT2_IOC_SETFLAGS: {
+		unsigned int oldflags;
+
+		if (IS_RDONLY(inode))
+			return -EROFS;
+
+		if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
+			return -EPERM;
+
+		if (get_user(flags, (int *) arg))
+			return -EFAULT;
+
+		oldflags = inode->u.ext2_i.i_flags;
+
+		/*
+		 * The IMMUTABLE and APPEND_ONLY flags can only be changed by
+		 * the relevant capability.
+		 *
+		 * This test looks nicer. Thanks to Pauline Middelink
+		 */
+		if ((flags ^ oldflags) & (EXT2_APPEND_FL | EXT2_IMMUTABLE_FILE_FL | EXT2_IMMUTABLE_LINK_FL)) {
+			if (!capable(CAP_LINUX_IMMUTABLE))
+				return -EPERM;
+		}
+
+		flags = flags & EXT2_FL_USER_MODIFIABLE;
+		flags |= oldflags & ~EXT2_FL_USER_MODIFIABLE;
+		inode->u.ext2_i.i_flags = flags;
+
+		if (flags & EXT2_SYNC_FL)
+			inode->i_flags |= S_SYNC;
+		else
+			inode->i_flags &= ~S_SYNC;
+		if (flags & EXT2_APPEND_FL)
+			inode->i_flags |= S_APPEND;
+		else
+			inode->i_flags &= ~S_APPEND;
+
+		if (flags & EXT2_IMMUTABLE_FILE_FL)
+			inode->i_flags |= S_IMMUTABLE_FILE;
+		else
+			inode->i_flags &= ~S_IMMUTABLE_FILE;
+
+		if (flags & EXT2_IMMUTABLE_LINK_FL)
+			inode->i_flags |= S_IMMUTABLE_LINK;
+		else
+			inode->i_flags &= ~S_IMMUTABLE_LINK;
+
+		if (flags & EXT2_NOATIME_FL)
+			inode->i_flags |= S_NOATIME;
+		else
+			inode->i_flags &= ~S_NOATIME;
+		inode->i_ctime = CURRENT_TIME;
+		mark_inode_dirty(inode);
+		return 0;
+	}
+	case EXT2_IOC_GETVERSION:
+		return put_user(inode->i_generation, (int *) arg);
+	case EXT2_IOC_SETVERSION:
+		if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
+			return -EPERM;
+		if (IS_RDONLY(inode))
+			return -EROFS;
+		if (get_user(inode->i_generation, (int *) arg))
+			return -EFAULT;	
+		inode->i_ctime = CURRENT_TIME;
+		mark_inode_dirty(inode);
+		return 0;
+	default:
+		return -ENOTTY;
+	}
+}
diff -urN linux-2.4.16-reiserfspatches-immutable/fs/ext3/ialloc.c~ linux-2.4.16-reiserfspatches-immutable-ctx4/fs/ext3/ialloc.c~
--- linux-2.4.16-reiserfspatches-immutable/fs/ext3/ialloc.c~	Thu Jan  1 01:00:00 1970
+++ linux-2.4.16-reiserfspatches-immutable-ctx4/fs/ext3/ialloc.c~	Mon Dec 10 14:28:03 2001
@@ -0,0 +1,664 @@
+/*
+ *  linux/fs/ext3/ialloc.c
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ *  BSD ufs-inspired inode and directory allocation by
+ *  Stephen Tweedie (sct@redhat.com), 1993
+ *  Big-endian to little-endian byte-swapping/bitmaps by
+ *        David S. Miller (davem@caip.rutgers.edu), 1995
+ */
+
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/jbd.h>
+#include <linux/ext3_fs.h>
+#include <linux/ext3_jbd.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/locks.h>
+#include <linux/quotaops.h>
+
+#include <asm/bitops.h>
+#include <asm/byteorder.h>
+
+/*
+ * ialloc.c contains the inodes allocation and deallocation routines
+ */
+
+/*
+ * The free inodes are managed by bitmaps.  A file system contains several
+ * blocks groups.  Each group contains 1 bitmap block for blocks, 1 bitmap
+ * block for inodes, N blocks for the inode table and data blocks.
+ *
+ * The file system contains group descriptors which are located after the
+ * super block.  Each descriptor contains the number of the bitmap block and
+ * the free blocks count in the block.  The descriptors are loaded in memory
+ * when a file system is mounted (see ext3_read_super).
+ */
+
+
+/*
+ * Read the inode allocation bitmap for a given block_group, reading
+ * into the specified slot in the superblock's bitmap cache.
+ *
+ * Return >=0 on success or a -ve error code.
+ */
+static int read_inode_bitmap (struct super_block * sb,
+			       unsigned long block_group,
+			       unsigned int bitmap_nr)
+{
+	struct ext3_group_desc * gdp;
+	struct buffer_head * bh = NULL;
+	int retval = 0;
+
+	gdp = ext3_get_group_desc (sb, block_group, NULL);
+	if (!gdp) {
+		retval = -EIO;
+		goto error_out;
+	}
+	bh = bread (sb->s_dev,
+			le32_to_cpu(gdp->bg_inode_bitmap), sb->s_blocksize);
+	if (!bh) {
+		ext3_error (sb, "read_inode_bitmap",
+			    "Cannot read inode bitmap - "
+			    "block_group = %lu, inode_bitmap = %lu",
+			    block_group, (unsigned long) gdp->bg_inode_bitmap);
+		retval = -EIO;
+	}
+	/*
+	 * On IO error, just leave a zero in the superblock's block pointer for
+	 * this group.  The IO will be retried next time.
+	 */
+error_out:
+	sb->u.ext3_sb.s_inode_bitmap_number[bitmap_nr] = block_group;
+	sb->u.ext3_sb.s_inode_bitmap[bitmap_nr] = bh;
+	return retval;
+}
+
+/*
+ * load_inode_bitmap loads the inode bitmap for a blocks group
+ *
+ * It maintains a cache for the last bitmaps loaded.  This cache is managed
+ * with a LRU algorithm.
+ *
+ * Notes:
+ * 1/ There is one cache per mounted file system.
+ * 2/ If the file system contains less than EXT3_MAX_GROUP_LOADED groups,
+ *    this function reads the bitmap without maintaining a LRU cache.
+ *
+ * Return the slot used to store the bitmap, or a -ve error code.
+ */
+static int load_inode_bitmap (struct super_block * sb,
+			      unsigned int block_group)
+{
+	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	unsigned long inode_bitmap_number;
+	struct buffer_head * inode_bitmap;
+	int i, j, retval = 0;
+
+	if (block_group >= sbi->s_groups_count)
+		ext3_panic (sb, "load_inode_bitmap",
+			    "block_group >= groups_count - "
+			    "block_group = %d, groups_count = %lu",
+			    block_group, sbi->s_groups_count);
+	if (sbi->s_loaded_inode_bitmaps > 0 &&
+	    sbi->s_inode_bitmap_number[0] == block_group &&
+	    sbi->s_inode_bitmap[0] != NULL)
+		return 0;
+	if (sbi->s_groups_count <= EXT3_MAX_GROUP_LOADED) {
+		if (sbi->s_inode_bitmap[block_group]) {
+			if (sbi->s_inode_bitmap_number[block_group] !=
+						block_group)
+				ext3_panic(sb, "load_inode_bitmap",
+					"block_group != inode_bitmap_number");
+			return block_group;
+		}
+		retval = read_inode_bitmap(sb, block_group, block_group);
+		if (retval < 0)
+			return retval;
+		return block_group;
+	}
+
+	for (i = 0; i < sbi->s_loaded_inode_bitmaps &&
+		    sbi->s_inode_bitmap_number[i] != block_group; i++)
+		/* do nothing */;
+	if (i < sbi->s_loaded_inode_bitmaps &&
+	    sbi->s_inode_bitmap_number[i] == block_group) {
+		inode_bitmap_number = sbi->s_inode_bitmap_number[i];
+		inode_bitmap = sbi->s_inode_bitmap[i];
+		for (j = i; j > 0; j--) {
+			sbi->s_inode_bitmap_number[j] =
+				sbi->s_inode_bitmap_number[j - 1];
+			sbi->s_inode_bitmap[j] = sbi->s_inode_bitmap[j - 1];
+		}
+		sbi->s_inode_bitmap_number[0] = inode_bitmap_number;
+		sbi->s_inode_bitmap[0] = inode_bitmap;
+
+		/*
+		 * There's still one special case here --- if inode_bitmap == 0
+		 * then our last attempt to read the bitmap failed and we have
+		 * just ended up caching that failure.  Try again to read it.
+		 */
+		if (!inode_bitmap)
+			retval = read_inode_bitmap (sb, block_group, 0);
+	} else {
+		if (sbi->s_loaded_inode_bitmaps < EXT3_MAX_GROUP_LOADED)
+			sbi->s_loaded_inode_bitmaps++;
+		else
+			brelse(sbi->s_inode_bitmap[EXT3_MAX_GROUP_LOADED - 1]);
+		for (j = sbi->s_loaded_inode_bitmaps - 1; j > 0; j--) {
+			sbi->s_inode_bitmap_number[j] =
+				sbi->s_inode_bitmap_number[j - 1];
+			sbi->s_inode_bitmap[j] = sbi->s_inode_bitmap[j - 1];
+		}
+		retval = read_inode_bitmap (sb, block_group, 0);
+	}
+	return retval;
+}
+
+/*
+ * NOTE! When we get the inode, we're the only people
+ * that have access to it, and as such there are no
+ * race conditions we have to worry about. The inode
+ * is not on the hash-lists, and it cannot be reached
+ * through the filesystem because the directory entry
+ * has been deleted earlier.
+ *
+ * HOWEVER: we must make sure that we get no aliases,
+ * which means that we have to call "clear_inode()"
+ * _before_ we mark the inode not in use in the inode
+ * bitmaps. Otherwise a newly created file might use
+ * the same inode number (not actually the same pointer
+ * though), and then we'd have two inodes sharing the
+ * same inode number and space on the harddisk.
+ */
+void ext3_free_inode (handle_t *handle, struct inode * inode)
+{
+	struct super_block * sb = inode->i_sb;
+	int is_directory;
+	unsigned long ino;
+	struct buffer_head * bh;
+	struct buffer_head * bh2;
+	unsigned long block_group;
+	unsigned long bit;
+	int bitmap_nr;
+	struct ext3_group_desc * gdp;
+	struct ext3_super_block * es;
+	int fatal = 0, err;
+
+	if (!inode->i_dev) {
+		printk ("ext3_free_inode: inode has no device\n");
+		return;
+	}
+	if (atomic_read(&inode->i_count) > 1) {
+		printk ("ext3_free_inode: inode has count=%d\n",
+					atomic_read(&inode->i_count));
+		return;
+	}
+	if (inode->i_nlink) {
+		printk ("ext3_free_inode: inode has nlink=%d\n",
+			inode->i_nlink);
+		return;
+	}
+	if (!sb) {
+		printk("ext3_free_inode: inode on nonexistent device\n");
+		return;
+	}
+
+	ino = inode->i_ino;
+	ext3_debug ("freeing inode %lu\n", ino);
+
+	/*
+	 * Note: we must free any quota before locking the superblock,
+	 * as writing the quota to disk may need the lock as well.
+	 */
+	DQUOT_INIT(inode);
+	DQUOT_FREE_INODE(inode);
+	DQUOT_DROP(inode);
+
+	is_directory = S_ISDIR(inode->i_mode);
+
+	/* Do this BEFORE marking the inode not in use or returning an error */
+	clear_inode (inode);
+
+	lock_super (sb);
+	es = sb->u.ext3_sb.s_es;
+	if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
+		ext3_error (sb, "ext3_free_inode",
+			    "reserved or nonexistent inode %lu", ino);
+		goto error_return;
+	}
+	block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb);
+	bit = (ino - 1) % EXT3_INODES_PER_GROUP(sb);
+	bitmap_nr = load_inode_bitmap (sb, block_group);
+	if (bitmap_nr < 0)
+		goto error_return;
+
+	bh = sb->u.ext3_sb.s_inode_bitmap[bitmap_nr];
+
+	BUFFER_TRACE(bh, "get_write_access");
+	fatal = ext3_journal_get_write_access(handle, bh);
+	if (fatal)
+		goto error_return;
+
+	/* Ok, now we can actually update the inode bitmaps.. */
+	if (!ext3_clear_bit (bit, bh->b_data))
+		ext3_error (sb, "ext3_free_inode",
+			      "bit already cleared for inode %lu", ino);
+	else {
+		gdp = ext3_get_group_desc (sb, block_group, &bh2);
+
+		BUFFER_TRACE(bh2, "get_write_access");
+		fatal = ext3_journal_get_write_access(handle, bh2);
+		if (fatal) goto error_return;
+
+		BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get write access");
+		fatal = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh);
+		if (fatal) goto error_return;
+
+		if (gdp) {
+			gdp->bg_free_inodes_count = cpu_to_le16(
+				le16_to_cpu(gdp->bg_free_inodes_count) + 1);
+			if (is_directory)
+				gdp->bg_used_dirs_count = cpu_to_le16(
+				  le16_to_cpu(gdp->bg_used_dirs_count) - 1);
+		}
+		BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata");
+		err = ext3_journal_dirty_metadata(handle, bh2);
+		if (!fatal) fatal = err;
+		es->s_free_inodes_count =
+			cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) + 1);
+		BUFFER_TRACE(sb->u.ext3_sb.s_sbh,
+					"call ext3_journal_dirty_metadata");
+		err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh);
+		if (!fatal) fatal = err;
+	}
+	BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
+	err = ext3_journal_dirty_metadata(handle, bh);
+	if (!fatal)
+		fatal = err;
+	sb->s_dirt = 1;
+error_return:
+	ext3_std_error(sb, fatal);
+	unlock_super(sb);
+}
+
+/*
+ * There are two policies for allocating an inode.  If the new inode is
+ * a directory, then a forward search is made for a block group with both
+ * free space and a low directory-to-inode ratio; if that fails, then of
+ * the groups with above-average free space, that group with the fewest
+ * directories already is chosen.
+ *
+ * For other inodes, search forward from the parent directory's block
+ * group to find a free inode.
+ */
+struct inode * ext3_new_inode (handle_t *handle,
+				const struct inode * dir, int mode)
+{
+	struct super_block * sb;
+	struct buffer_head * bh;
+	struct buffer_head * bh2;
+	int i, j, avefreei;
+	struct inode * inode;
+	int bitmap_nr;
+	struct ext3_group_desc * gdp;
+	struct ext3_group_desc * tmp;
+	struct ext3_super_block * es;
+	int err = 0;
+
+	/* Cannot create files in a deleted directory */
+	if (!dir || !dir->i_nlink)
+		return ERR_PTR(-EPERM);
+
+	sb = dir->i_sb;
+	inode = new_inode(sb);
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
+	init_rwsem(&inode->u.ext3_i.truncate_sem);
+
+	lock_super (sb);
+	es = sb->u.ext3_sb.s_es;
+repeat:
+	gdp = NULL;
+	i = 0;
+
+	if (S_ISDIR(mode)) {
+		avefreei = le32_to_cpu(es->s_free_inodes_count) /
+			sb->u.ext3_sb.s_groups_count;
+		if (!gdp) {
+			for (j = 0; j < sb->u.ext3_sb.s_groups_count; j++) {
+				struct buffer_head *temp_buffer;
+				tmp = ext3_get_group_desc (sb, j, &temp_buffer);
+				if (tmp &&
+				    le16_to_cpu(tmp->bg_free_inodes_count) &&
+				    le16_to_cpu(tmp->bg_free_inodes_count) >=
+							avefreei) {
+					if (!gdp || (le16_to_cpu(tmp->bg_free_blocks_count) >
+						le16_to_cpu(gdp->bg_free_blocks_count))) {
+						i = j;
+						gdp = tmp;
+						bh2 = temp_buffer;
+					}
+				}
+			}
+		}
+	} else {
+		/*
+		 * Try to place the inode in its parent directory
+		 */
+		i = dir->u.ext3_i.i_block_group;
+		tmp = ext3_get_group_desc (sb, i, &bh2);
+		if (tmp && le16_to_cpu(tmp->bg_free_inodes_count))
+			gdp = tmp;
+		else
+		{
+			/*
+			 * Use a quadratic hash to find a group with a
+			 * free inode
+			 */
+			for (j = 1; j < sb->u.ext3_sb.s_groups_count; j <<= 1) {
+				i += j;
+				if (i >= sb->u.ext3_sb.s_groups_count)
+					i -= sb->u.ext3_sb.s_groups_count;
+				tmp = ext3_get_group_desc (sb, i, &bh2);
+				if (tmp &&
+				    le16_to_cpu(tmp->bg_free_inodes_count)) {
+					gdp = tmp;
+					break;
+				}
+			}
+		}
+		if (!gdp) {
+			/*
+			 * That failed: try linear search for a free inode
+			 */
+			i = dir->u.ext3_i.i_block_group + 1;
+			for (j = 2; j < sb->u.ext3_sb.s_groups_count; j++) {
+				if (++i >= sb->u.ext3_sb.s_groups_count)
+					i = 0;
+				tmp = ext3_get_group_desc (sb, i, &bh2);
+				if (tmp &&
+				    le16_to_cpu(tmp->bg_free_inodes_count)) {
+					gdp = tmp;
+					break;
+				}
+			}
+		}
+	}
+
+	err = -ENOSPC;
+	if (!gdp)
+		goto fail;
+
+	err = -EIO;
+	bitmap_nr = load_inode_bitmap (sb, i);
+	if (bitmap_nr < 0)
+		goto fail;
+
+	bh = sb->u.ext3_sb.s_inode_bitmap[bitmap_nr];
+
+	if ((j = ext3_find_first_zero_bit ((unsigned long *) bh->b_data,
+				      EXT3_INODES_PER_GROUP(sb))) <
+	    EXT3_INODES_PER_GROUP(sb)) {
+		BUFFER_TRACE(bh, "get_write_access");
+		err = ext3_journal_get_write_access(handle, bh);
+		if (err) goto fail;
+		
+		if (ext3_set_bit (j, bh->b_data)) {
+			ext3_error (sb, "ext3_new_inode",
+				      "bit already set for inode %d", j);
+			goto repeat;
+		}
+		BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
+		err = ext3_journal_dirty_metadata(handle, bh);
+		if (err) goto fail;
+	} else {
+		if (le16_to_cpu(gdp->bg_free_inodes_count) != 0) {
+			ext3_error (sb, "ext3_new_inode",
+				    "Free inodes count corrupted in group %d",
+				    i);
+			/* Is it really ENOSPC? */
+			err = -ENOSPC;
+			if (sb->s_flags & MS_RDONLY)
+				goto fail;
+
+			BUFFER_TRACE(bh2, "get_write_access");
+			err = ext3_journal_get_write_access(handle, bh2);
+			if (err) goto fail;
+			gdp->bg_free_inodes_count = 0;
+			BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata");
+			err = ext3_journal_dirty_metadata(handle, bh2);
+			if (err) goto fail;
+		}
+		goto repeat;
+	}
+	j += i * EXT3_INODES_PER_GROUP(sb) + 1;
+	if (j < EXT3_FIRST_INO(sb) || j > le32_to_cpu(es->s_inodes_count)) {
+		ext3_error (sb, "ext3_new_inode",
+			    "reserved inode or inode > inodes count - "
+			    "block_group = %d,inode=%d", i, j);
+		err = -EIO;
+		goto fail;
+	}
+
+	BUFFER_TRACE(bh2, "get_write_access");
+	err = ext3_journal_get_write_access(handle, bh2);
+	if (err) goto fail;
+	gdp->bg_free_inodes_count =
+		cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1);
+	if (S_ISDIR(mode))
+		gdp->bg_used_dirs_count =
+			cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1);
+	BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata");
+	err = ext3_journal_dirty_metadata(handle, bh2);
+	if (err) goto fail;
+	
+	BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access");
+	err = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh);
+	if (err) goto fail;
+	es->s_free_inodes_count =
+		cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) - 1);
+	BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "call ext3_journal_dirty_metadata");
+	err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh);
+	sb->s_dirt = 1;
+	if (err) goto fail;
+
+	inode->i_uid = current->fsuid;
+	if (test_opt (sb, GRPID))
+		inode->i_gid = dir->i_gid;
+	else if (dir->i_mode & S_ISGID) {
+		inode->i_gid = dir->i_gid;
+		if (S_ISDIR(mode))
+			mode |= S_ISGID;
+	} else
+		inode->i_gid = current->fsgid;
+	inode->i_mode = mode;
+
+	inode->i_ino = j;
+	/* This is the optimal IO size (for stat), not the fs block size */
+	inode->i_blksize = PAGE_SIZE;
+	inode->i_blocks = 0;
+	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+	inode->u.ext3_i.i_flags = dir->u.ext3_i.i_flags & ~EXT3_INDEX_FL;
+	if (S_ISLNK(mode))
+		inode->u.ext3_i.i_flags &= ~(EXT3_IMMUTABLE_FILE_FL|EXT3_IMMUTABLE_LINK_FL|EXT3_APPEND_FL);
+#ifdef EXT3_FRAGMENTS
+	inode->u.ext3_i.i_faddr = 0;
+	inode->u.ext3_i.i_frag_no = 0;
+	inode->u.ext3_i.i_frag_size = 0;
+#endif
+	inode->u.ext3_i.i_file_acl = 0;
+	inode->u.ext3_i.i_dir_acl = 0;
+	inode->u.ext3_i.i_dtime = 0;
+	INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan);
+#ifdef EXT3_PREALLOCATE
+	inode->u.ext3_i.i_prealloc_count = 0;
+#endif
+	inode->u.ext3_i.i_block_group = i;
+	
+	if (inode->u.ext3_i.i_flags & EXT3_SYNC_FL)
+		inode->i_flags |= S_SYNC;
+	if (IS_SYNC(inode))
+		handle->h_sync = 1;
+	insert_inode_hash(inode);
+	inode->i_generation = event++;
+
+	inode->u.ext3_i.i_state = EXT3_STATE_NEW;
+	err = ext3_mark_inode_dirty(handle, inode);
+	if (err) goto fail;
+	
+	unlock_super (sb);
+	if(DQUOT_ALLOC_INODE(inode)) {
+		DQUOT_DROP(inode);
+		inode->i_flags |= S_NOQUOTA;
+		inode->i_nlink = 0;
+		iput(inode);
+		return ERR_PTR(-EDQUOT);
+	}
+	ext3_debug ("allocating inode %lu\n", inode->i_ino);
+	return inode;
+
+fail:
+	unlock_super(sb);
+	iput(inode);
+	ext3_std_error(sb, err);
+	return ERR_PTR(err);
+}
+
+/* Verify that we are loading a valid orphan from disk */
+struct inode *ext3_orphan_get (struct super_block * sb, ino_t ino)
+{
+	ino_t max_ino = le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count);
+	unsigned long block_group;
+	int bit;
+	int bitmap_nr;
+	struct buffer_head *bh;
+	struct inode *inode = NULL;
+	
+	/* Error cases - e2fsck has already cleaned up for us */
+	if (ino > max_ino) {
+		ext3_warning(sb, __FUNCTION__,
+			     "bad orphan ino %ld!  e2fsck was run?\n", ino);
+		return NULL;
+	}
+
+	block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb);
+	bit = (ino - 1) % EXT3_INODES_PER_GROUP(sb);
+	if ((bitmap_nr = load_inode_bitmap(sb, block_group)) < 0 ||
+	    !(bh = EXT3_SB(sb)->s_inode_bitmap[bitmap_nr])) {
+		ext3_warning(sb, __FUNCTION__,
+			     "inode bitmap error for orphan %ld\n", ino);
+		return NULL;
+	}
+
+	/* Having the inode bit set should be a 100% indicator that this
+	 * is a valid orphan (no e2fsck run on fs).  Orphans also include
+	 * inodes that were being truncated, so we can't check i_nlink==0.
+	 */
+	if (!ext3_test_bit(bit, bh->b_data) || !(inode = iget(sb, ino)) ||
+	    is_bad_inode(inode) || NEXT_ORPHAN(inode) > max_ino) {
+		ext3_warning(sb, __FUNCTION__,
+			     "bad orphan inode %ld!  e2fsck was run?\n", ino);
+		printk(KERN_NOTICE "ext3_test_bit(bit=%d, block=%ld) = %d\n",
+		       bit, bh->b_blocknr, ext3_test_bit(bit, bh->b_data));
+		printk(KERN_NOTICE "inode=%p\n", inode);
+		if (inode) {
+			printk(KERN_NOTICE "is_bad_inode(inode)=%d\n",
+			       is_bad_inode(inode));
+			printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%d\n",
+			       NEXT_ORPHAN(inode));
+			printk(KERN_NOTICE "max_ino=%ld\n", max_ino);
+		}
+		/* Avoid freeing blocks if we got a bad deleted inode */
+		if (inode && inode->i_nlink == 0)
+			inode->i_blocks = 0;
+		iput(inode);
+		return NULL;
+	}
+
+	return inode;
+}
+
+unsigned long ext3_count_free_inodes (struct super_block * sb)
+{
+#ifdef EXT3FS_DEBUG
+	struct ext3_super_block * es;
+	unsigned long desc_count, bitmap_count, x;
+	int bitmap_nr;
+	struct ext3_group_desc * gdp;
+	int i;
+
+	lock_super (sb);
+	es = sb->u.ext3_sb.s_es;
+	desc_count = 0;
+	bitmap_count = 0;
+	gdp = NULL;
+	for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) {
+		gdp = ext3_get_group_desc (sb, i, NULL);
+		if (!gdp)
+			continue;
+		desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
+		bitmap_nr = load_inode_bitmap (sb, i);
+		if (bitmap_nr < 0)
+			continue;
+
+		x = ext3_count_free (sb->u.ext3_sb.s_inode_bitmap[bitmap_nr],
+				     EXT3_INODES_PER_GROUP(sb) / 8);
+		printk ("group %d: stored = %d, counted = %lu\n",
+			i, le16_to_cpu(gdp->bg_free_inodes_count), x);
+		bitmap_count += x;
+	}
+	printk("ext3_count_free_inodes: stored = %lu, computed = %lu, %lu\n",
+		le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count);
+	unlock_super (sb);
+	return desc_count;
+#else
+	return le32_to_cpu(sb->u.ext3_sb.s_es->s_free_inodes_count);
+#endif
+}
+
+#ifdef CONFIG_EXT3_CHECK
+/* Called at mount-time, super-block is locked */
+void ext3_check_inodes_bitmap (struct super_block * sb)
+{
+	struct ext3_super_block * es;
+	unsigned long desc_count, bitmap_count, x;
+	int bitmap_nr;
+	struct ext3_group_desc * gdp;
+	int i;
+
+	es = sb->u.ext3_sb.s_es;
+	desc_count = 0;
+	bitmap_count = 0;
+	gdp = NULL;
+	for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) {
+		gdp = ext3_get_group_desc (sb, i, NULL);
+		if (!gdp)
+			continue;
+		desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
+		bitmap_nr = load_inode_bitmap (sb, i);
+		if (bitmap_nr < 0)
+			continue;
+
+		x = ext3_count_free (sb->u.ext3_sb.s_inode_bitmap[bitmap_nr],
+				     EXT3_INODES_PER_GROUP(sb) / 8);
+		if (le16_to_cpu(gdp->bg_free_inodes_count) != x)
+			ext3_error (sb, "ext3_check_inodes_bitmap",
+				    "Wrong free inodes count in group %d, "
+				    "stored = %d, counted = %lu", i,
+				    le16_to_cpu(gdp->bg_free_inodes_count), x);
+		bitmap_count += x;
+	}
+	if (le32_to_cpu(es->s_free_inodes_count) != bitmap_count)
+		ext3_error (sb, "ext3_check_inodes_bitmap",
+			    "Wrong free inodes count in super block, "
+			    "stored = %lu, counted = %lu",
+			    (unsigned long)le32_to_cpu(es->s_free_inodes_count),
+			    bitmap_count);
+}
+#endif
diff -urN linux-2.4.16-reiserfspatches-immutable/fs/ext3/inode.c~ linux-2.4.16-reiserfspatches-immutable-ctx4/fs/ext3/inode.c~
--- linux-2.4.16-reiserfspatches-immutable/fs/ext3/inode.c~	Thu Jan  1 01:00:00 1970
+++ linux-2.4.16-reiserfspatches-immutable-ctx4/fs/ext3/inode.c~	Mon Dec 10 14:28:03 2001
@@ -0,0 +1,2676 @@
+/*
+ *  linux/fs/ext3/inode.c
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ *  from
+ *
+ *  linux/fs/minix/inode.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  Goal-directed block allocation by Stephen Tweedie
+ * 	(sct@redhat.com), 1993, 1998
+ *  Big-endian to little-endian byte-swapping/bitmaps by
+ *        David S. Miller (davem@caip.rutgers.edu), 1995
+ *  64-bit file support on 64-bit platforms by Jakub Jelinek
+ * 	(jj@sunsite.ms.mff.cuni.cz)
+ *
+ *  Assorted race fixes, rewrite of ext3_get_block() by Al Viro, 2000
+ */
+
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/ext3_jbd.h>
+#include <linux/jbd.h>
+#include <linux/locks.h>
+#include <linux/smp_lock.h>
+#include <linux/highuid.h>
+#include <linux/quotaops.h>
+#include <linux/module.h>
+
+
+/*
+ * SEARCH_FROM_ZERO forces each block allocation to search from the start
+ * of the filesystem.  This is to force rapid reallocation of recently-freed
+ * blocks.  The file fragmentation is horrendous.
+ */
+#undef SEARCH_FROM_ZERO
+
+/* The ext3 forget function must perform a revoke if we are freeing data
+ * which has been journaled.  Metadata (eg. indirect blocks) must be
+ * revoked in all cases. 
+ *
+ * "bh" may be NULL: a metadata block may have been freed from memory
+ * but there may still be a record of it in the journal, and that record
+ * still needs to be revoked.
+ */
+
+static int ext3_forget(handle_t *handle, int is_metadata,
+		       struct inode *inode, struct buffer_head *bh,
+		       int blocknr)
+{
+	int err;
+
+	BUFFER_TRACE(bh, "enter");
+
+	jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
+		  "data mode %lx\n",
+		  bh, is_metadata, inode->i_mode,
+		  test_opt(inode->i_sb, DATA_FLAGS));
+	
+	/* Never use the revoke function if we are doing full data
+	 * journaling: there is no need to, and a V1 superblock won't
+	 * support it.  Otherwise, only skip the revoke on un-journaled
+	 * data blocks. */
+
+	if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ||
+	    (!is_metadata && !ext3_should_journal_data(inode))) {
+		if (bh) {
+			BUFFER_TRACE(bh, "call journal_forget");
+			ext3_journal_forget(handle, bh);
+		}
+		return 0;
+	}
+
+	/*
+	 * data!=journal && (is_metadata || should_journal_data(inode))
+	 */
+	BUFFER_TRACE(bh, "call ext3_journal_revoke");
+	err = ext3_journal_revoke(handle, blocknr, bh);
+	if (err)
+		ext3_abort(inode->i_sb, __FUNCTION__,
+			   "error %d when attempting revoke", err);
+	BUFFER_TRACE(bh, "exit");
+	return err;
+}
+
+/* 
+ * Truncate transactions can be complex and absolutely huge.  So we need to
+ * be able to restart the transaction at a conventient checkpoint to make
+ * sure we don't overflow the journal.
+ *
+ * start_transaction gets us a new handle for a truncate transaction,
+ * and extend_transaction tries to extend the existing one a bit.  If
+ * extend fails, we need to propagate the failure up and restart the
+ * transaction in the top-level truncate loop. --sct 
+ */
+
+static handle_t *start_transaction(struct inode *inode) 
+{
+	long needed;
+	handle_t *result;
+	
+	needed = inode->i_blocks;
+	if (needed > EXT3_MAX_TRANS_DATA) 
+		needed = EXT3_MAX_TRANS_DATA;
+	
+	result = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS + needed);
+	if (!IS_ERR(result))
+		return result;
+	
+	ext3_std_error(inode->i_sb, PTR_ERR(result));
+	return result;
+}
+
+/*
+ * Try to extend this transaction for the purposes of truncation.
+ *
+ * Returns 0 if we managed to create more room.  If we can't create more
+ * room, and the transaction must be restarted we return 1.
+ */
+static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
+{
+	long needed;
+	
+	if (handle->h_buffer_credits > EXT3_RESERVE_TRANS_BLOCKS)
+		return 0;
+	needed = inode->i_blocks;
+	if (needed > EXT3_MAX_TRANS_DATA) 
+		needed = EXT3_MAX_TRANS_DATA;
+	if (!ext3_journal_extend(handle, EXT3_RESERVE_TRANS_BLOCKS + needed))
+		return 0;
+	return 1;
+}
+
+/*
+ * Restart the transaction associated with *handle.  This does a commit,
+ * so before we call here everything must be consistently dirtied against
+ * this transaction.
+ */
+static int ext3_journal_test_restart(handle_t *handle, struct inode *inode)
+{
+	long needed = inode->i_blocks;
+	if (needed > EXT3_MAX_TRANS_DATA) 
+		needed = EXT3_MAX_TRANS_DATA;
+	jbd_debug(2, "restarting handle %p\n", handle);
+	return ext3_journal_restart(handle, EXT3_DATA_TRANS_BLOCKS + needed);
+}
+
+/*
+ * Called at each iput()
+ */
+void ext3_put_inode (struct inode * inode)
+{
+	ext3_discard_prealloc (inode);
+}
+
+/*
+ * Called at the last iput() if i_nlink is zero.
+ */
+void ext3_delete_inode (struct inode * inode)
+{
+	handle_t *handle;
+	
+	if (is_bad_inode(inode) ||
+	    inode->i_ino == EXT3_ACL_IDX_INO ||
+	    inode->i_ino == EXT3_ACL_DATA_INO)
+		goto no_delete;
+
+	lock_kernel();
+	handle = start_transaction(inode);
+	if (IS_ERR(handle)) {
+		/* If we're going to skip the normal cleanup, we still
+		 * need to make sure that the in-core orphan linked list
+		 * is properly cleaned up. */
+		ext3_orphan_del(NULL, inode);
+
+		ext3_std_error(inode->i_sb, PTR_ERR(handle));
+		unlock_kernel();
+		goto no_delete;
+	}
+	
+	if (IS_SYNC(inode))
+		handle->h_sync = 1;
+	inode->i_size = 0;
+	if (inode->i_blocks)
+		ext3_truncate(inode);
+	/*
+	 * Kill off the orphan record which ext3_truncate created.
+	 * AKPM: I think this can be inside the above `if'.
+	 * Note that ext3_orphan_del() has to be able to cope with the
+	 * deletion of a non-existent orphan - this is because we don't
+	 * know if ext3_truncate() actually created an orphan record.
+	 * (Well, we could do this if we need to, but heck - it works)
+	 */
+	ext3_orphan_del(handle, inode);
+	inode->u.ext3_i.i_dtime	= CURRENT_TIME;
+
+	/* 
+	 * One subtle ordering requirement: if anything has gone wrong
+	 * (transaction abort, IO errors, whatever), then we can still
+	 * do these next steps (the fs will already have been marked as
+	 * having errors), but we can't free the inode if the mark_dirty
+	 * fails.  
+	 */
+	if (ext3_mark_inode_dirty(handle, inode))
+		/* If that failed, just do the required in-core inode clear. */
+		clear_inode(inode);
+	else
+		ext3_free_inode(handle, inode);
+	ext3_journal_stop(handle, inode);
+	unlock_kernel();
+	return;
+no_delete:
+	clear_inode(inode);	/* We must guarantee clearing of inode... */
+}
+
+void ext3_discard_prealloc (struct inode * inode)
+{
+#ifdef EXT3_PREALLOCATE
+	lock_kernel();
+	/* Writer: ->i_prealloc* */
+	if (inode->u.ext3_i.i_prealloc_count) {
+		unsigned short total = inode->u.ext3_i.i_prealloc_count;
+		unsigned long block = inode->u.ext3_i.i_prealloc_block;
+		inode->u.ext3_i.i_prealloc_count = 0;
+		inode->u.ext3_i.i_prealloc_block = 0;
+		/* Writer: end */
+		ext3_free_blocks (inode, block, total);
+	}
+	unlock_kernel();
+#endif
+}
+
+static int ext3_alloc_block (handle_t *handle,
+			struct inode * inode, unsigned long goal, int *err)
+{
+#ifdef EXT3FS_DEBUG
+	static unsigned long alloc_hits = 0, alloc_attempts = 0;
+#endif
+	unsigned long result;
+
+#ifdef EXT3_PREALLOCATE
+	/* Writer: ->i_prealloc* */
+	if (inode->u.ext3_i.i_prealloc_count &&
+	    (goal == inode->u.ext3_i.i_prealloc_block ||
+	     goal + 1 == inode->u.ext3_i.i_prealloc_block))
+	{
+		result = inode->u.ext3_i.i_prealloc_block++;
+		inode->u.ext3_i.i_prealloc_count--;
+		/* Writer: end */
+		ext3_debug ("preallocation hit (%lu/%lu).\n",
+			    ++alloc_hits, ++alloc_attempts);
+	} else {
+		ext3_discard_prealloc (inode);
+		ext3_debug ("preallocation miss (%lu/%lu).\n",
+			    alloc_hits, ++alloc_attempts);
+		if (S_ISREG(inode->i_mode))
+			result = ext3_new_block (inode, goal, 
+				 &inode->u.ext3_i.i_prealloc_count,
+				 &inode->u.ext3_i.i_prealloc_block, err);
+		else
+			result = ext3_new_block (inode, goal, 0, 0, err);
+		/*
+		 * AKPM: this is somewhat sticky.  I'm not surprised it was
+		 * disabled in 2.2's ext3.  Need to integrate b_committed_data
+		 * guarding with preallocation, if indeed preallocation is
+		 * effective.
+		 */
+	}
+#else
+	result = ext3_new_block (handle, inode, goal, 0, 0, err);
+#endif
+	return result;
+}
+
+
+typedef struct {
+	u32	*p;
+	u32	key;
+	struct buffer_head *bh;
+} Indirect;
+
+static inline void add_chain(Indirect *p, struct buffer_head *bh, u32 *v)
+{
+	p->key = *(p->p = v);
+	p->bh = bh;
+}
+
+static inline int verify_chain(Indirect *from, Indirect *to)
+{
+	while (from <= to && from->key == *from->p)
+		from++;
+	return (from > to);
+}
+
+/**
+ *	ext3_block_to_path - parse the block number into array of offsets
+ *	@inode: inode in question (we are only interested in its superblock)
+ *	@i_block: block number to be parsed
+ *	@offsets: array to store the offsets in
+ *
+ *	To store the locations of file's data ext3 uses a data structure common
+ *	for UNIX filesystems - tree of pointers anchored in the inode, with
+ *	data blocks at leaves and indirect blocks in intermediate nodes.
+ *	This function translates the block number into path in that tree -
+ *	return value is the path length and @offsets[n] is the offset of
+ *	pointer to (n+1)th node in the nth one. If @block is out of range
+ *	(negative or too large) warning is printed and zero returned.
+ *
+ *	Note: function doesn't find node addresses, so no IO is needed. All
+ *	we need to know is the capacity of indirect blocks (taken from the
+ *	inode->i_sb).
+ */
+
+/*
+ * Portability note: the last comparison (check that we fit into triple
+ * indirect block) is spelled differently, because otherwise on an
+ * architecture with 32-bit longs and 8Kb pages we might get into trouble
+ * if our filesystem had 8Kb blocks. We might use long long, but that would
+ * kill us on x86. Oh, well, at least the sign propagation does not matter -
+ * i_block would have to be negative in the very beginning, so we would not
+ * get there at all.
+ */
+
+static int ext3_block_to_path(struct inode *inode, long i_block, int offsets[4])
+{
+	int ptrs = EXT3_ADDR_PER_BLOCK(inode->i_sb);
+	int ptrs_bits = EXT3_ADDR_PER_BLOCK_BITS(inode->i_sb);
+	const long direct_blocks = EXT3_NDIR_BLOCKS,
+		indirect_blocks = ptrs,
+		double_blocks = (1 << (ptrs_bits * 2));
+	int n = 0;
+
+	if (i_block < 0) {
+		ext3_warning (inode->i_sb, "ext3_block_to_path", "block < 0");
+	} else if (i_block < direct_blocks) {
+		offsets[n++] = i_block;
+	} else if ( (i_block -= direct_blocks) < indirect_blocks) {
+		offsets[n++] = EXT3_IND_BLOCK;
+		offsets[n++] = i_block;
+	} else if ((i_block -= indirect_blocks) < double_blocks) {
+		offsets[n++] = EXT3_DIND_BLOCK;
+		offsets[n++] = i_block >> ptrs_bits;
+		offsets[n++] = i_block & (ptrs - 1);
+	} else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) {
+		offsets[n++] = EXT3_TIND_BLOCK;
+		offsets[n++] = i_block >> (ptrs_bits * 2);
+		offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1);
+		offsets[n++] = i_block & (ptrs - 1);
+	} else {
+		ext3_warning (inode->i_sb, "ext3_block_to_path", "block > big");
+	}
+	return n;
+}
+
+/**
+ *	ext3_get_branch - read the chain of indirect blocks leading to data
+ *	@inode: inode in question
+ *	@depth: depth of the chain (1 - direct pointer, etc.)
+ *	@offsets: offsets of pointers in inode/indirect blocks
+ *	@chain: place to store the result
+ *	@err: here we store the error value
+ *
+ *	Function fills the array of triples <key, p, bh> and returns %NULL
+ *	if everything went OK or the pointer to the last filled triple
+ *	(incomplete one) otherwise. Upon the return chain[i].key contains
+ *	the number of (i+1)-th block in the chain (as it is stored in memory,
+ *	i.e. little-endian 32-bit), chain[i].p contains the address of that
+ *	number (it points into struct inode for i==0 and into the bh->b_data
+ *	for i>0) and chain[i].bh points to the buffer_head of i-th indirect
+ *	block for i>0 and NULL for i==0. In other words, it holds the block
+ *	numbers of the chain, addresses they were taken from (and where we can
+ *	verify that chain did not change) and buffer_heads hosting these
+ *	numbers.
+ *
+ *	Function stops when it stumbles upon zero pointer (absent block)
+ *		(pointer to last triple returned, *@err == 0)
+ *	or when it gets an IO error reading an indirect block
+ *		(ditto, *@err == -EIO)
+ *	or when it notices that chain had been changed while it was reading
+ *		(ditto, *@err == -EAGAIN)
+ *	or when it reads all @depth-1 indirect blocks successfully and finds
+ *	the whole chain, all way to the data (returns %NULL, *err == 0).
+ */
+static Indirect *ext3_get_branch(struct inode *inode, int depth, int *offsets,
+				 Indirect chain[4], int *err)
+{
+	kdev_t dev = inode->i_dev;
+	int blocksize = inode->i_sb->s_blocksize;
+	Indirect *p = chain;
+	struct buffer_head *bh;
+
+	*err = 0;
+	/* i_data is not going away, no lock needed */
+	add_chain (chain, NULL, inode->u.ext3_i.i_data + *offsets);
+	if (!p->key)
+		goto no_block;
+	while (--depth) {
+		bh = bread(dev, le32_to_cpu(p->key), blocksize);
+		if (!bh)
+			goto failure;
+		/* Reader: pointers */
+		if (!verify_chain(chain, p))
+			goto changed;
+		add_chain(++p, bh, (u32*)bh->b_data + *++offsets);
+		/* Reader: end */
+		if (!p->key)
+			goto no_block;
+	}
+	return NULL;
+
+changed:
+	*err = -EAGAIN;
+	goto no_block;
+failure:
+	*err = -EIO;
+no_block:
+	return p;
+}
+
+/**
+ *	ext3_find_near - find a place for allocation with sufficient locality
+ *	@inode: owner
+ *	@ind: descriptor of indirect block.
+ *
+ *	This function returns the prefered place for block allocation.
+ *	It is used when heuristic for sequential allocation fails.
+ *	Rules are:
+ *	  + if there is a block to the left of our position - allocate near it.
+ *	  + if pointer will live in indirect block - allocate near that block.
+ *	  + if pointer will live in inode - allocate in the same
+ *	    cylinder group. 
+ *	Caller must make sure that @ind is valid and will stay that way.
+ */
+
+static inline unsigned long ext3_find_near(struct inode *inode, Indirect *ind)
+{
+	u32 *start = ind->bh ? (u32*) ind->bh->b_data : inode->u.ext3_i.i_data;
+	u32 *p;
+
+	/* Try to find previous block */
+	for (p = ind->p - 1; p >= start; p--)
+		if (*p)
+			return le32_to_cpu(*p);
+
+	/* No such thing, so let's try location of indirect block */
+	if (ind->bh)
+		return ind->bh->b_blocknr;
+
+	/*
+	 * It is going to be refered from inode itself? OK, just put it into
+	 * the same cylinder group then.
+	 */
+	return (inode->u.ext3_i.i_block_group * 
+		EXT3_BLOCKS_PER_GROUP(inode->i_sb)) +
+	       le32_to_cpu(inode->i_sb->u.ext3_sb.s_es->s_first_data_block);
+}
+
+/**
+ *	ext3_find_goal - find a prefered place for allocation.
+ *	@inode: owner
+ *	@block:  block we want
+ *	@chain:  chain of indirect blocks
+ *	@partial: pointer to the last triple within a chain
+ *	@goal:	place to store the result.
+ *
+ *	Normally this function find the prefered place for block allocation,
+ *	stores it in *@goal and returns zero. If the branch had been changed
+ *	under us we return -EAGAIN.
+ */
+
+static int ext3_find_goal(struct inode *inode, long block, Indirect chain[4],
+			  Indirect *partial, unsigned long *goal)
+{
+	/* Writer: ->i_next_alloc* */
+	if (block == inode->u.ext3_i.i_next_alloc_block + 1) {
+		inode->u.ext3_i.i_next_alloc_block++;
+		inode->u.ext3_i.i_next_alloc_goal++;
+	}
+#ifdef SEARCH_FROM_ZERO
+	inode->u.ext3_i.i_next_alloc_block = 0;
+	inode->u.ext3_i.i_next_alloc_goal = 0;
+#endif
+	/* Writer: end */
+	/* Reader: pointers, ->i_next_alloc* */
+	if (verify_chain(chain, partial)) {
+		/*
+		 * try the heuristic for sequential allocation,
+		 * failing that at least try to get decent locality.
+		 */
+		if (block == inode->u.ext3_i.i_next_alloc_block)
+			*goal = inode->u.ext3_i.i_next_alloc_goal;
+		if (!*goal)
+			*goal = ext3_find_near(inode, partial);
+#ifdef SEARCH_FROM_ZERO
+		*goal = 0;
+#endif
+		return 0;
+	}
+	/* Reader: end */
+	return -EAGAIN;
+}
+
+/**
+ *	ext3_alloc_branch - allocate and set up a chain of blocks.
+ *	@inode: owner
+ *	@num: depth of the chain (number of blocks to allocate)
+ *	@offsets: offsets (in the blocks) to store the pointers to next.
+ *	@branch: place to store the chain in.
+ *
+ *	This function allocates @num blocks, zeroes out all but the last one,
+ *	links them into chain and (if we are synchronous) writes them to disk.
+ *	In other words, it prepares a branch that can be spliced onto the
+ *	inode. It stores the information about that chain in the branch[], in
+ *	the same format as ext3_get_branch() would do. We are calling it after
+ *	we had read the existing part of chain and partial points to the last
+ *	triple of that (one with zero ->key). Upon the exit we have the same
+ *	picture as after the successful ext3_get_block(), excpet that in one
+ *	place chain is disconnected - *branch->p is still zero (we did not
+ *	set the last link), but branch->key contains the number that should
+ *	be placed into *branch->p to fill that gap.
+ *
+ *	If allocation fails we free all blocks we've allocated (and forget
+ *	their buffer_heads) and return the error value the from failed
+ *	ext3_alloc_block() (normally -ENOSPC). Otherwise we set the chain
+ *	as described above and return 0.
+ */
+
+static int ext3_alloc_branch(handle_t *handle, struct inode *inode,
+			     int num,
+			     unsigned long goal,
+			     int *offsets,
+			     Indirect *branch)
+{
+	int blocksize = inode->i_sb->s_blocksize;
+	int n = 0, keys = 0;
+	int err = 0;
+	int i;
+	int parent = ext3_alloc_block(handle, inode, goal, &err);
+
+	branch[0].key = cpu_to_le32(parent);
+	if (parent) {
+		for (n = 1; n < num; n++) {
+			struct buffer_head *bh;
+			/* Allocate the next block */
+			int nr = ext3_alloc_block(handle, inode, parent, &err);
+			if (!nr)
+				break;
+			branch[n].key = cpu_to_le32(nr);
+			keys = n+1;
+			
+			/*
+			 * Get buffer_head for parent block, zero it out
+			 * and set the pointer to new one, then send
+			 * parent to disk.  
+			 */
+			bh = getblk(inode->i_dev, parent, blocksize);
+			branch[n].bh = bh;
+			lock_buffer(bh);
+			BUFFER_TRACE(bh, "call get_create_access");
+			err = ext3_journal_get_create_access(handle, bh);
+			if (err) {
+				unlock_buffer(bh);
+				brelse(bh);
+				break;
+			}
+
+			memset(bh->b_data, 0, blocksize);
+			branch[n].p = (u32*) bh->b_data + offsets[n];
+			*branch[n].p = branch[n].key;
+			BUFFER_TRACE(bh, "marking uptodate");
+			mark_buffer_uptodate(bh, 1);
+			unlock_buffer(bh);
+
+			BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
+			err = ext3_journal_dirty_metadata(handle, bh);
+			if (err)
+				break;
+			
+			parent = nr;
+		}
+		if (IS_SYNC(inode))
+			handle->h_sync = 1;
+	}
+	if (n == num)
+		return 0;
+
+	/* Allocation failed, free what we already allocated */
+	for (i = 1; i < keys; i++) {
+		BUFFER_TRACE(branch[i].bh, "call journal_forget");
+		ext3_journal_forget(handle, branch[i].bh);
+	}
+	for (i = 0; i < keys; i++)
+		ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1);
+	return err;
+}
+
+/**
+ *	ext3_splice_branch - splice the allocated branch onto inode.
+ *	@inode: owner
+ *	@block: (logical) number of block we are adding
+ *	@chain: chain of indirect blocks (with a missing link - see
+ *		ext3_alloc_branch)
+ *	@where: location of missing link
+ *	@num:   number of blocks we are adding
+ *
+ *	This function verifies that chain (up to the missing link) had not
+ *	changed, fills the missing link and does all housekeeping needed in
+ *	inode (->i_blocks, etc.). In case of success we end up with the full
+ *	chain to new block and return 0. Otherwise (== chain had been changed)
+ *	we free the new blocks (forgetting their buffer_heads, indeed) and
+ *	return -EAGAIN.
+ */
+
+static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block,
+			      Indirect chain[4], Indirect *where, int num)
+{
+	int i;
+	int err = 0;
+
+	/*
+	 * If we're splicing into a [td]indirect block (as opposed to the
+	 * inode) then we need to get write access to the [td]indirect block
+	 * before the splice.
+	 */
+	if (where->bh) {
+		BUFFER_TRACE(where->bh, "get_write_access");
+		err = ext3_journal_get_write_access(handle, where->bh);
+		if (err)
+			goto err_out;
+	}
+	/* Verify that place we are splicing to is still there and vacant */
+
+	/* Writer: pointers, ->i_next_alloc* */
+	if (!verify_chain(chain, where-1) || *where->p)
+		/* Writer: end */
+		goto changed;
+
+	/* That's it */
+
+	*where->p = where->key;
+	inode->u.ext3_i.i_next_alloc_block = block;
+	inode->u.ext3_i.i_next_alloc_goal = le32_to_cpu(where[num-1].key);
+#ifdef SEARCH_FROM_ZERO
+	inode->u.ext3_i.i_next_alloc_block = 0;
+	inode->u.ext3_i.i_next_alloc_goal = 0;
+#endif
+	/* Writer: end */
+
+	/* We are done with atomic stuff, now do the rest of housekeeping */
+
+	inode->i_ctime = CURRENT_TIME;
+	ext3_mark_inode_dirty(handle, inode);
+
+	/* had we spliced it onto indirect block? */
+	if (where->bh) {
+		/*
+		 * akpm: If we spliced it onto an indirect block, we haven't
+		 * altered the inode.  Note however that if it is being spliced
+		 * onto an indirect block at the very end of the file (the
+		 * file is growing) then we *will* alter the inode to reflect
+		 * the new i_size.  But that is not done here - it is done in
+		 * generic_commit_write->__mark_inode_dirty->ext3_dirty_inode.
+		 */
+		jbd_debug(5, "splicing indirect only\n");
+		BUFFER_TRACE(where->bh, "call ext3_journal_dirty_metadata");
+		err = ext3_journal_dirty_metadata(handle, where->bh);
+		if (err) 
+			goto err_out;
+	} else {
+		/*
+		 * OK, we spliced it into the inode itself on a direct block.
+		 * Inode was dirtied above.
+		 */
+		jbd_debug(5, "splicing direct\n");
+	}
+	return err;
+
+changed:
+	/*
+	 * AKPM: if where[i].bh isn't part of the current updating
+	 * transaction then we explode nastily.  Test this code path.
+	 */
+	jbd_debug(1, "the chain changed: try again\n");
+	err = -EAGAIN;
+	
+err_out:
+	for (i = 1; i < num; i++) {
+		BUFFER_TRACE(where[i].bh, "call journal_forget");
+		ext3_journal_forget(handle, where[i].bh);
+	}
+	/* For the normal collision cleanup case, we free up the blocks.
+	 * On genuine filesystem errors we don't even think about doing
+	 * that. */
+	if (err == -EAGAIN)
+		for (i = 0; i < num; i++)
+			ext3_free_blocks(handle, inode, 
+					 le32_to_cpu(where[i].key), 1);
+	return err;
+}
+
+/*
+ * Allocation strategy is simple: if we have to allocate something, we will
+ * have to go the whole way to leaf. So let's do it before attaching anything
+ * to tree, set linkage between the newborn blocks, write them if sync is
+ * required, recheck the path, free and repeat if check fails, otherwise
+ * set the last missing link (that will protect us from any truncate-generated
+ * removals - all blocks on the path are immune now) and possibly force the
+ * write on the parent block.
+ * That has a nice additional property: no special recovery from the failed
+ * allocations is needed - we simply release blocks and do not touch anything
+ * reachable from inode.
+ *
+ * akpm: `handle' can be NULL if create == 0.
+ */
+
+static int ext3_get_block_handle(handle_t *handle, struct inode *inode, 
+				 long iblock,
+				 struct buffer_head *bh_result, int create)
+{
+	int err = -EIO;
+	int offsets[4];
+	Indirect chain[4];
+	Indirect *partial;
+	unsigned long goal;
+	int left;
+	int depth = ext3_block_to_path(inode, iblock, offsets);
+	loff_t new_size;
+
+	J_ASSERT(handle != NULL || create == 0);
+
+	if (depth == 0)
+		goto out;
+
+	lock_kernel();
+reread:
+	partial = ext3_get_branch(inode, depth, offsets, chain, &err);
+
+	/* Simplest case - block found, no allocation needed */
+	if (!partial) {
+		bh_result->b_state &= ~(1UL << BH_New);
+got_it:
+		bh_result->b_dev = inode->i_dev;
+		bh_result->b_blocknr = le32_to_cpu(chain[depth-1].key);
+		bh_result->b_state |= (1UL << BH_Mapped);
+		/* Clean up and exit */
+		partial = chain+depth-1; /* the whole chain */
+		goto cleanup;
+	}
+
+	/* Next simple case - plain lookup or failed read of indirect block */
+	if (!create || err == -EIO) {
+cleanup:
+		while (partial > chain) {
+			BUFFER_TRACE(partial->bh, "call brelse");
+			brelse(partial->bh);
+			partial--;
+		}
+		BUFFER_TRACE(bh_result, "returned");
+		unlock_kernel();
+out:
+		return err;
+	}
+
+	/*
+	 * Indirect block might be removed by truncate while we were
+	 * reading it. Handling of that case (forget what we've got and
+	 * reread) is taken out of the main path.
+	 */
+	if (err == -EAGAIN)
+		goto changed;
+
+	if (ext3_find_goal(inode, iblock, chain, partial, &goal) < 0)
+		goto changed;
+
+	left = (chain + depth) - partial;
+
+	/*
+	 * Block out ext3_truncate while we alter the tree
+	 */
+	down_read(&inode->u.ext3_i.truncate_sem);
+	err = ext3_alloc_branch(handle, inode, left, goal,
+					offsets+(partial-chain), partial);
+
+	/* The ext3_splice_branch call will free and forget any buffers
+	 * on the new chain if there is a failure, but that risks using
+	 * up transaction credits, especially for bitmaps where the
+	 * credits cannot be returned.  Can we handle this somehow?  We
+	 * may need to return -EAGAIN upwards in the worst case.  --sct */
+	if (!err)
+		err = ext3_splice_branch(handle, inode, iblock, chain,
+					 partial, left);
+	up_read(&inode->u.ext3_i.truncate_sem);
+	if (err == -EAGAIN)
+		goto changed;
+	if (err)
+		goto cleanup;
+
+	new_size = inode->i_size;
+	/*
+	 * This is not racy against ext3_truncate's modification of i_disksize
+	 * because VM/VFS ensures that the file cannot be extended while
+	 * truncate is in progress.  It is racy between multiple parallel
+	 * instances of get_block, but we have the BKL.
+	 */
+	if (new_size > inode->u.ext3_i.i_disksize)
+		inode->u.ext3_i.i_disksize = new_size;
+
+	bh_result->b_state |= (1UL << BH_New);
+	goto got_it;
+
+changed:
+	while (partial > chain) {
+		jbd_debug(1, "buffer chain changed, retrying\n");
+		BUFFER_TRACE(partial->bh, "brelsing");
+		brelse(partial->bh);
+		partial--;
+	}
+	goto reread;
+}
+
+static int ext3_get_block(struct inode *inode, long iblock,
+			struct buffer_head *bh_result, int create)
+{
+	handle_t *handle = 0;
+	int ret;
+
+	if (create) {
+		handle = ext3_journal_current_handle();
+		J_ASSERT(handle != 0);
+	}
+	ret = ext3_get_block_handle(handle, inode, iblock, bh_result, create);
+	return ret;
+}
+
+/*
+ * `handle' can be NULL if create is zero
+ */
+struct buffer_head *ext3_getblk(handle_t *handle, struct inode * inode,
+				long block, int create, int * errp)
+{
+	struct buffer_head dummy;
+	int fatal = 0, err;
+	
+	J_ASSERT(handle != NULL || create == 0);
+
+	dummy.b_state = 0;
+	dummy.b_blocknr = -1000;
+	buffer_trace_init(&dummy.b_history);
+	*errp = ext3_get_block_handle(handle, inode, block, &dummy, create);
+	if (!*errp && buffer_mapped(&dummy)) {
+		struct buffer_head *bh;
+		bh = getblk(dummy.b_dev, dummy.b_blocknr,
+					inode->i_sb->s_blocksize);
+		if (buffer_new(&dummy)) {
+			J_ASSERT(create != 0);
+			J_ASSERT(handle != 0);
+
+			/* Now that we do not always journal data, we
+			   should keep in mind whether this should
+			   always journal the new buffer as metadata.
+			   For now, regular file writes use
+			   ext3_get_block instead, so it's not a
+			   problem. */
+			lock_kernel();
+			lock_buffer(bh);
+			BUFFER_TRACE(bh, "call get_create_access");
+			fatal = ext3_journal_get_create_access(handle, bh);
+			if (!fatal) {
+				memset(bh->b_data, 0,
+				       inode->i_sb->s_blocksize);
+				mark_buffer_uptodate(bh, 1);
+			}
+			unlock_buffer(bh);
+			BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
+			err = ext3_journal_dirty_metadata(handle, bh);
+			if (!fatal) fatal = err;
+			unlock_kernel();
+		} else {
+			BUFFER_TRACE(bh, "not a new buffer");
+		}
+		if (fatal) {
+			*errp = fatal;
+			brelse(bh);
+			bh = NULL;
+		}
+		return bh;
+	}
+	return NULL;
+}
+
+struct buffer_head *ext3_bread(handle_t *handle, struct inode * inode,
+			       int block, int create, int *err)
+{
+	struct buffer_head * bh;
+	int prev_blocks;
+
+	prev_blocks = inode->i_blocks;
+
+	bh = ext3_getblk (handle, inode, block, create, err);
+	if (!bh)
+		return bh;
+#ifdef EXT3_PREALLOCATE
+	/*
+	 * If the inode has grown, and this is a directory, then use a few
+	 * more of the preallocated blocks to keep directory fragmentation
+	 * down.  The preallocated blocks are guaranteed to be contiguous.
+	 */
+	if (create &&
+	    S_ISDIR(inode->i_mode) &&
+	    inode->i_blocks > prev_blocks &&
+	    EXT3_HAS_COMPAT_FEATURE(inode->i_sb,
+				    EXT3_FEATURE_COMPAT_DIR_PREALLOC)) {
+		int i;
+		struct buffer_head *tmp_bh;
+
+		for (i = 1;
+		     inode->u.ext3_i.i_prealloc_count &&
+		     i < EXT3_SB(inode->i_sb)->s_es->s_prealloc_dir_blocks;
+		     i++) {
+			/*
+			 * ext3_getblk will zero out the contents of the
+			 * directory for us
+			 */
+			tmp_bh = ext3_getblk(handle, inode,
+						block+i, create, err);
+			if (!tmp_bh) {
+				brelse (bh);
+				return 0;
+			}
+			brelse (tmp_bh);
+		}
+	}
+#endif
+	if (buffer_uptodate(bh))
+		return bh;
+	ll_rw_block (READ, 1, &bh);
+	wait_on_buffer (bh);
+	if (buffer_uptodate(bh))
+		return bh;
+	brelse (bh);
+	*err = -EIO;
+	return NULL;
+}
+
+static int walk_page_buffers(	handle_t *handle,
+				struct buffer_head *head,
+				unsigned from,
+				unsigned to,
+				int *partial,
+				int (*fn)(	handle_t *handle,
+						struct buffer_head *bh))
+{
+	struct buffer_head *bh;
+	unsigned block_start, block_end;
+	unsigned blocksize = head->b_size;
+	int err, ret = 0;
+
+	for (	bh = head, block_start = 0;
+		ret == 0 && (bh != head || !block_start);
+	    	block_start = block_end, bh = bh->b_this_page)
+	{
+		block_end = block_start + blocksize;
+		if (block_end <= from || block_start >= to) {
+			if (partial && !buffer_uptodate(bh))
+				*partial = 1;
+			continue;
+		}
+		err = (*fn)(handle, bh);
+		if (!ret)
+			ret = err;
+	}
+	return ret;
+}
+
+/*
+ * To preserve ordering, it is essential that the hole instantiation and
+ * the data write be encapsulated in a single transaction.  We cannot
+ * close off a transaction and start a new one between the ext3_get_block()
+ * and the commit_write().  So doing the journal_start at the start of
+ * prepare_write() is the right place.
+ *
+ * Also, this function can nest inside ext3_writepage() ->
+ * block_write_full_page(). In that case, we *know* that ext3_writepage()
+ * has generated enough buffer credits to do the whole page.  So we won't
+ * block on the journal in that case, which is good, because the caller may
+ * be PF_MEMALLOC.
+ *
+ * By accident, ext3 can be reentered when a transaction is open via
+ * quota file writes.  If we were to commit the transaction while thus
+ * reentered, there can be a deadlock - we would be holding a quota
+ * lock, and the commit would never complete if another thread had a
+ * transaction open and was blocking on the quota lock - a ranking
+ * violation.
+ *
+ * So what we do is to rely on the fact that journal_stop/journal_start
+ * will _not_ run commit under these circumstances because handle->h_ref
+ * is elevated.  We'll still have enough credits for the tiny quotafile
+ * write.  
+ */
+
+static int do_journal_get_write_access(handle_t *handle, 
+				       struct buffer_head *bh)
+{
+	return ext3_journal_get_write_access(handle, bh);
+}
+
+static int ext3_prepare_write(struct file *file, struct page *page,
+			      unsigned from, unsigned to)
+{
+	struct inode *inode = page->mapping->host;
+	handle_t *handle = ext3_journal_current_handle();
+	int ret, needed_blocks = ext3_writepage_trans_blocks(inode);
+
+	lock_kernel();
+	handle = ext3_journal_start(inode, needed_blocks);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		goto out;
+	}
+	ret = block_prepare_write(page, from, to, ext3_get_block);
+	if (ret != 0)
+		goto prepare_write_failed;
+
+	if (ext3_should_journal_data(inode))
+		ret = walk_page_buffers(handle, page->buffers,
+				from, to, NULL, do_journal_get_write_access);
+prepare_write_failed:
+	if (ret)
+		ext3_journal_stop(handle, inode);
+out:
+	unlock_kernel();
+	return ret;
+}
+
+static int journal_dirty_sync_data(handle_t *handle, struct buffer_head *bh)
+{
+	return ext3_journal_dirty_data(handle, bh, 0);
+}
+
+/*
+ * For ext3_writepage().  We also brelse() the buffer to account for
+ * the bget() which ext3_writepage() performs.
+ */
+static int journal_dirty_async_data(handle_t *handle, struct buffer_head *bh)
+{
+	int ret = ext3_journal_dirty_data(handle, bh, 1);
+	__brelse(bh);
+	return ret;
+}
+
+/* For commit_write() in data=journal mode */
+static int commit_write_fn(handle_t *handle, struct buffer_head *bh)
+{
+	set_bit(BH_Uptodate, &bh->b_state);
+	return ext3_journal_dirty_metadata(handle, bh);
+}
+
+/*
+ * We need to pick up the new inode size which generic_commit_write gave us
+ * `file' can be NULL - eg, when called from block_symlink().
+ *
+ * ext3 inode->i_dirty_buffers policy:  If we're journalling data we
+ * definitely don't want them to appear on the inode at all - instead
+ * we need to manage them at the JBD layer and we need to intercept
+ * the relevant sync operations and translate them into journal operations.
+ *
+ * If we're not journalling data then we can just leave the buffers
+ * on ->i_dirty_buffers.  If someone writes them out for us then thanks.
+ * Otherwise we'll do it in commit, if we're using ordered data.
+ */
+
+static int ext3_commit_write(struct file *file, struct page *page,
+			     unsigned from, unsigned to)
+{
+	handle_t *handle = ext3_journal_current_handle();
+	struct inode *inode = page->mapping->host;
+	int ret = 0, ret2;
+
+	lock_kernel();
+	if (ext3_should_journal_data(inode)) {
+		/*
+		 * Here we duplicate the generic_commit_write() functionality
+		 */
+		int partial = 0;
+		loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
+
+		ret = walk_page_buffers(handle, page->buffers,
+			from, to, &partial, commit_write_fn);
+		if (!partial)
+			SetPageUptodate(page);
+		kunmap(page);
+		if (pos > inode->i_size)
+			inode->i_size = pos;
+		set_bit(EXT3_STATE_JDATA, &inode->u.ext3_i.i_state);
+	} else {
+		if (ext3_should_order_data(inode)) {
+			ret = walk_page_buffers(handle, page->buffers,
+				from, to, NULL, journal_dirty_sync_data);
+		}
+		/* Be careful here if generic_commit_write becomes a
+		 * required invocation after block_prepare_write. */
+		if (ret == 0)
+			ret = generic_commit_write(file, page, from, to);
+	}
+	if (inode->i_size > inode->u.ext3_i.i_disksize) {
+		inode->u.ext3_i.i_disksize = inode->i_size;
+		ret2 = ext3_mark_inode_dirty(handle, inode);
+		if (!ret) 
+			ret = ret2;
+	}
+	ret2 = ext3_journal_stop(handle, inode);
+	unlock_kernel();
+	if (!ret)
+		ret = ret2;
+	return ret;
+}
+
+/* 
+ * bmap() is special.  It gets used by applications such as lilo and by
+ * the swapper to find the on-disk block of a specific piece of data.
+ *
+ * Naturally, this is dangerous if the block concerned is still in the
+ * journal.  If somebody makes a swapfile on an ext3 data-journaling
+ * filesystem and enables swap, then they may get a nasty shock when the
+ * data getting swapped to that swapfile suddenly gets overwritten by
+ * the original zero's written out previously to the journal and
+ * awaiting writeback in the kernel's buffer cache. 
+ *
+ * So, if we see any bmap calls here on a modified, data-journaled file,
+ * take extra steps to flush any blocks which might be in the cache. 
+ */
+static int ext3_bmap(struct address_space *mapping, long block)
+{
+	struct inode *inode = mapping->host;
+	journal_t *journal;
+	int err;
+	
+	if (test_and_clear_bit(EXT3_STATE_JDATA, &inode->u.ext3_i.i_state)) {
+		/* 
+		 * This is a REALLY heavyweight approach, but the use of
+		 * bmap on dirty files is expected to be extremely rare:
+		 * only if we run lilo or swapon on a freshly made file
+		 * do we expect this to happen. 
+		 *
+		 * (bmap requires CAP_SYS_RAWIO so this does not
+		 * represent an unprivileged user DOS attack --- we'd be
+		 * in trouble if mortal users could trigger this path at
+		 * will.) 
+		 *
+		 * NB. EXT3_STATE_JDATA is not set on files other than
+		 * regular files.  If somebody wants to bmap a directory
+		 * or symlink and gets confused because the buffer
+		 * hasn't yet been flushed to disk, they deserve
+		 * everything they get.
+		 */
+		
+		journal = EXT3_JOURNAL(inode);
+		journal_lock_updates(journal);
+		err = journal_flush(journal);
+		journal_unlock_updates(journal);
+		
+		if (err)
+			return 0;
+	}
+	
+	return generic_block_bmap(mapping,block,ext3_get_block);
+}
+
+static int bget_one(handle_t *handle, struct buffer_head *bh)
+{
+	atomic_inc(&bh->b_count);
+	return 0;
+}
+
+/*
+ * Note that we always start a transaction even if we're not journalling
+ * data.  This is to preserve ordering: any hole instantiation within
+ * __block_write_full_page -> ext3_get_block() should be journalled
+ * along with the data so we don't crash and then get metadata which
+ * refers to old data.
+ *
+ * In all journalling modes block_write_full_page() will start the I/O.
+ *
+ * Problem:
+ *
+ *	ext3_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() ->
+ *		ext3_writepage()
+ *
+ * Similar for:
+ *
+ *	ext3_file_write() -> generic_file_write() -> __alloc_pages() -> ...
+ *
+ * Same applies to ext3_get_block().  We will deadlock on various things like
+ * lock_journal and i_truncate_sem.
+ *
+ * Setting PF_MEMALLOC here doesn't work - too many internal memory
+ * allocations fail.
+ *
+ * 16May01: If we're reentered then journal_current_handle() will be
+ *	    non-zero. We simply *return*.
+ *
+ * 1 July 2001: @@@ FIXME:
+ *   In journalled data mode, a data buffer may be metadata against the
+ *   current transaction.  But the same file is part of a shared mapping
+ *   and someone does a writepage() on it.
+ *
+ *   We will move the buffer onto the async_data list, but *after* it has
+ *   been dirtied. So there's a small window where we have dirty data on
+ *   BJ_Metadata.
+ *
+ *   Note that this only applies to the last partial page in the file.  The
+ *   bit which block_write_full_page() uses prepare/commit for.  (That's
+ *   broken code anyway: it's wrong for msync()).
+ *
+ *   It's a rare case: affects the final partial page, for journalled data
+ *   where the file is subject to bith write() and writepage() in the same
+ *   transction.  To fix it we'll need a custom block_write_full_page().
+ *   We'll probably need that anyway for journalling writepage() output.
+ *
+ * We don't honour synchronous mounts for writepage().  That would be
+ * disastrous.  Any write() or metadata operation will sync the fs for
+ * us.
+ */
+static int ext3_writepage(struct page *page)
+{
+	struct inode *inode = page->mapping->host;
+	struct buffer_head *page_buffers;
+	handle_t *handle = NULL;
+	int ret = 0, err;
+	int needed;
+	int order_data;
+
+	J_ASSERT(PageLocked(page));
+	
+	/*
+	 * We give up here if we're reentered, because it might be
+	 * for a different filesystem.  One *could* look for a
+	 * nested transaction opportunity.
+	 */
+	lock_kernel();
+	if (ext3_journal_current_handle())
+		goto out_fail;
+
+	needed = ext3_writepage_trans_blocks(inode);
+	if (current->flags & PF_MEMALLOC)
+		handle = ext3_journal_try_start(inode, needed);
+	else
+		handle = ext3_journal_start(inode, needed);
+				
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		goto out_fail;
+	}
+
+	order_data = ext3_should_order_data(inode) ||
+			ext3_should_journal_data(inode);
+
+	unlock_kernel();
+
+	page_buffers = NULL;	/* Purely to prevent compiler warning */
+
+	/* bget() all the buffers */
+	if (order_data) {
+		if (!page->buffers)
+			create_empty_buffers(page,
+				inode->i_dev, inode->i_sb->s_blocksize);
+		page_buffers = page->buffers;
+		walk_page_buffers(handle, page_buffers, 0,
+				PAGE_CACHE_SIZE, NULL, bget_one);
+	}
+
+	ret = block_write_full_page(page, ext3_get_block);
+
+	/*
+	 * The page can become unlocked at any point now, and
+	 * truncate can then come in and change things.  So we
+	 * can't touch *page from now on.  But *page_buffers is
+	 * safe due to elevated refcount.
+	 */
+
+	handle = ext3_journal_current_handle();
+	lock_kernel();
+
+	/* And attach them to the current transaction */
+	if (order_data) {
+		err = walk_page_buffers(handle, page_buffers,
+			0, PAGE_CACHE_SIZE, NULL, journal_dirty_async_data);
+		if (!ret)
+			ret = err;
+	}
+
+	err = ext3_journal_stop(handle, inode);
+	if (!ret)
+		ret = err;
+	unlock_kernel();
+	return ret;
+
+out_fail:
+	
+	unlock_kernel();
+	SetPageDirty(page);
+	UnlockPage(page);
+	return ret;
+}
+
+static int ext3_readpage(struct file *file, struct page *page)
+{
+	return block_read_full_page(page,ext3_get_block);
+}
+
+
+static int ext3_flushpage(struct page *page, unsigned long offset)
+{
+	journal_t *journal = EXT3_JOURNAL(page->mapping->host);
+	return journal_flushpage(journal, page, offset);
+}
+
+static int ext3_releasepage(struct page *page, int wait)
+{
+	journal_t *journal = EXT3_JOURNAL(page->mapping->host);
+	return journal_try_to_free_buffers(journal, page, wait);
+}
+
+
+struct address_space_operations ext3_aops = {
+	readpage:	ext3_readpage,		/* BKL not held.  Don't need */
+	writepage:	ext3_writepage,		/* BKL not held.  We take it */
+	sync_page:	block_sync_page,
+	prepare_write:	ext3_prepare_write,	/* BKL not held.  We take it */
+	commit_write:	ext3_commit_write,	/* BKL not held.  We take it */
+	bmap:		ext3_bmap,		/* BKL held */
+	flushpage:	ext3_flushpage,		/* BKL not held.  Don't need */
+	releasepage:	ext3_releasepage,	/* BKL not held.  Don't need */
+};
+
+/*
+ * ext3_block_truncate_page() zeroes out a mapping from file offset `from'
+ * up to the end of the block which corresponds to `from'.
+ * This required during truncate. We need to physically zero the tail end
+ * of that block so it doesn't yield old data if the file is later grown.
+ */
+static int ext3_block_truncate_page(handle_t *handle,
+		struct address_space *mapping, loff_t from)
+{
+	unsigned long index = from >> PAGE_CACHE_SHIFT;
+	unsigned offset = from & (PAGE_CACHE_SIZE-1);
+	unsigned blocksize, iblock, length, pos;
+	struct inode *inode = mapping->host;
+	struct page *page;
+	struct buffer_head *bh;
+	int err;
+
+	blocksize = inode->i_sb->s_blocksize;
+	length = offset & (blocksize - 1);
+
+	/* Block boundary? Nothing to do */
+	if (!length)
+		return 0;
+
+	length = blocksize - length;
+	iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
+
+	page = grab_cache_page(mapping, index);
+	err = -ENOMEM;
+	if (!page)
+		goto out;
+
+	if (!page->buffers)
+		create_empty_buffers(page, inode->i_dev, blocksize);
+
+	/* Find the buffer that contains "offset" */
+	bh = page->buffers;
+	pos = blocksize;
+	while (offset >= pos) {
+		bh = bh->b_this_page;
+		iblock++;
+		pos += blocksize;
+	}
+
+	err = 0;
+	if (!buffer_mapped(bh)) {
+		/* Hole? Nothing to do */
+		if (buffer_uptodate(bh))
+			goto unlock;
+		ext3_get_block(inode, iblock, bh, 0);
+		/* Still unmapped? Nothing to do */
+		if (!buffer_mapped(bh))
+			goto unlock;
+	}
+
+	/* Ok, it's mapped. Make sure it's up-to-date */
+	if (Page_Uptodate(page))
+		set_bit(BH_Uptodate, &bh->b_state);
+
+	if (!buffer_uptodate(bh)) {
+		err = -EIO;
+		ll_rw_block(READ, 1, &bh);
+		wait_on_buffer(bh);
+		/* Uhhuh. Read error. Complain and punt. */
+		if (!buffer_uptodate(bh))
+			goto unlock;
+	}
+
+	if (ext3_should_journal_data(inode)) {
+		BUFFER_TRACE(bh, "get write access");
+		err = ext3_journal_get_write_access(handle, bh);
+		if (err)
+			goto unlock;
+	}
+	
+	memset(kmap(page) + offset, 0, length);
+	flush_dcache_page(page);
+	kunmap(page);
+
+	BUFFER_TRACE(bh, "zeroed end of block");
+
+	err = 0;
+	if (ext3_should_journal_data(inode)) {
+		err = ext3_journal_dirty_metadata(handle, bh);
+	} else {
+		if (ext3_should_order_data(inode))
+			err = ext3_journal_dirty_data(handle, bh, 0);
+		__mark_buffer_dirty(bh);
+	}
+
+unlock:
+	UnlockPage(page);
+	page_cache_release(page);
+out:
+	return err;
+}
+
+/*
+ * Probably it should be a library function... search for first non-zero word
+ * or memcmp with zero_page, whatever is better for particular architecture.
+ * Linus?
+ */
+static inline int all_zeroes(u32 *p, u32 *q)
+{
+	while (p < q)
+		if (*p++)
+			return 0;
+	return 1;
+}
+
+/**
+ *	ext3_find_shared - find the indirect blocks for partial truncation.
+ *	@inode:	  inode in question
+ *	@depth:	  depth of the affected branch
+ *	@offsets: offsets of pointers in that branch (see ext3_block_to_path)
+ *	@chain:	  place to store the pointers to partial indirect blocks
+ *	@top:	  place to the (detached) top of branch
+ *
+ *	This is a helper function used by ext3_truncate().
+ *
+ *	When we do truncate() we may have to clean the ends of several
+ *	indirect blocks but leave the blocks themselves alive. Block is
+ *	partially truncated if some data below the new i_size is refered
+ *	from it (and it is on the path to the first completely truncated
+ *	data block, indeed).  We have to free the top of that path along
+ *	with everything to the right of the path. Since no allocation
+ *	past the truncation point is possible until ext3_truncate()
+ *	finishes, we may safely do the latter, but top of branch may
+ *	require special attention - pageout below the truncation point
+ *	might try to populate it.
+ *
+ *	We atomically detach the top of branch from the tree, store the
+ *	block number of its root in *@top, pointers to buffer_heads of
+ *	partially truncated blocks - in @chain[].bh and pointers to
+ *	their last elements that should not be removed - in
+ *	@chain[].p. Return value is the pointer to last filled element
+ *	of @chain.
+ *
+ *	The work left to caller to do the actual freeing of subtrees:
+ *		a) free the subtree starting from *@top
+ *		b) free the subtrees whose roots are stored in
+ *			(@chain[i].p+1 .. end of @chain[i].bh->b_data)
+ *		c) free the subtrees growing from the inode past the @chain[0].
+ *			(no partially truncated stuff there).  */
+
+static Indirect *ext3_find_shared(struct inode *inode,
+				int depth,
+				int offsets[4],
+				Indirect chain[4],
+				u32 *top)
+{
+	Indirect *partial, *p;
+	int k, err;
+
+	*top = 0;
+	/* Make k index the deepest non-null offest + 1 */
+	for (k = depth; k > 1 && !offsets[k-1]; k--)
+		;
+	partial = ext3_get_branch(inode, k, offsets, chain, &err);
+	/* Writer: pointers */
+	if (!partial)
+		partial = chain + k-1;
+	/*
+	 * If the branch acquired continuation since we've looked at it -
+	 * fine, it should all survive and (new) top doesn't belong to us.
+	 */
+	if (!partial->key && *partial->p)
+		/* Writer: end */
+		goto no_top;
+	for (p=partial; p>chain && all_zeroes((u32*)p->bh->b_data,p->p); p--)
+		;
+	/*
+	 * OK, we've found the last block that must survive. The rest of our
+	 * branch should be detached before unlocking. However, if that rest
+	 * of branch is all ours and does not grow immediately from the inode
+	 * it's easier to cheat and just decrement partial->p.
+	 */
+	if (p == chain + k - 1 && p > chain) {
+		p->p--;
+	} else {
+		*top = *p->p;
+		/* Nope, don't do this in ext3.  Must leave the tree intact */
+#if 0
+		*p->p = 0;
+#endif
+	}
+	/* Writer: end */
+
+	while(partial > p)
+	{
+		brelse(partial->bh);
+		partial--;
+	}
+no_top:
+	return partial;
+}
+
+/*
+ * Zero a number of block pointers in either an inode or an indirect block.
+ * If we restart the transaction we must again get write access to the
+ * indirect block for further modification.
+ *
+ * We release `count' blocks on disk, but (last - first) may be greater
+ * than `count' because there can be holes in there.
+ */
+static void
+ext3_clear_blocks(handle_t *handle, struct inode *inode, struct buffer_head *bh,
+		unsigned long block_to_free, unsigned long count,
+		u32 *first, u32 *last)
+{
+	u32 *p;
+	kdev_t dev = inode->i_sb->s_dev;
+	unsigned long blocksize = inode->i_sb->s_blocksize;
+
+	if (try_to_extend_transaction(handle, inode)) {
+		if (bh) {
+			BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
+			ext3_journal_dirty_metadata(handle, bh);
+		}
+		ext3_mark_inode_dirty(handle, inode);
+		ext3_journal_test_restart(handle, inode);
+		BUFFER_TRACE(bh, "get_write_access");
+		ext3_journal_get_write_access(handle, bh);
+	}
+
+	/*
+	 * Any buffers which are on the journal will be in memory. We find
+	 * them on the hash table so journal_revoke() will run journal_forget()
+	 * on them.  We've already detached each block from the file, so
+	 * bforget() in journal_forget() should be safe.
+	 *
+	 * AKPM: turn on bforget in journal_forget()!!!
+	 */
+	for (p = first; p < last; p++) {
+		u32 nr = le32_to_cpu(*p);
+		if (nr) {
+			struct buffer_head *bh;
+
+			*p = 0;
+			bh = get_hash_table(dev, nr, blocksize);
+			ext3_forget(handle, 0, inode, bh, nr);
+		}
+	}
+
+	ext3_free_blocks(handle, inode, block_to_free, count);
+}
+
+/**
+ * ext3_free_data - free a list of data blocks
+ * @handle:	handle for this transaction
+ * @inode:	inode we are dealing with
+ * @this_bh:	indirect buffer_head which contains *@first and *@last
+ * @first:	array of block numbers
+ * @last:	points immediately past the end of array
+ *
+ * We are freeing all blocks refered from that array (numbers are stored as
+ * little-endian 32-bit) and updating @inode->i_blocks appropriately.
+ *
+ * We accumulate contiguous runs of blocks to free.  Conveniently, if these
+ * blocks are contiguous then releasing them at one time will only affect one
+ * or two bitmap blocks (+ group descriptor(s) and superblock) and we won't
+ * actually use a lot of journal space.
+ *
+ * @this_bh will be %NULL if @first and @last point into the inode's direct
+ * block pointers.
+ */
+static void ext3_free_data(handle_t *handle, struct inode *inode,
+			   struct buffer_head *this_bh, u32 *first, u32 *last)
+{
+	unsigned long block_to_free = 0;    /* Starting block # of a run */
+	unsigned long count = 0;	    /* Number of blocks in the run */ 
+	u32 *block_to_free_p = NULL;	    /* Pointer into inode/ind
+					       corresponding to
+					       block_to_free */
+	unsigned long nr;		    /* Current block # */
+	u32 *p;				    /* Pointer into inode/ind
+					       for current block */
+	int err;
+
+	if (this_bh) {				/* For indirect block */
+		BUFFER_TRACE(this_bh, "get_write_access");
+		err = ext3_journal_get_write_access(handle, this_bh);
+		/* Important: if we can't update the indirect pointers
+		 * to the blocks, we can't free them. */
+		if (err)
+			return;
+	}
+
+	for (p = first; p < last; p++) {
+		nr = le32_to_cpu(*p);
+		if (nr) {
+			/* accumulate blocks to free if they're contiguous */
+			if (count == 0) {
+				block_to_free = nr;
+				block_to_free_p = p;
+				count = 1;
+			} else if (nr == block_to_free + count) {
+				count++;
+			} else {
+				ext3_clear_blocks(handle, inode, this_bh, 
+						  block_to_free,
+						  count, block_to_free_p, p);
+				block_to_free = nr;
+				block_to_free_p = p;
+				count = 1;
+			}
+		}
+	}
+
+	if (count > 0)
+		ext3_clear_blocks(handle, inode, this_bh, block_to_free,
+				  count, block_to_free_p, p);
+
+	if (this_bh) {
+		BUFFER_TRACE(this_bh, "call ext3_journal_dirty_metadata");
+		ext3_journal_dirty_metadata(handle, this_bh);
+	}
+}
+
+/**
+ *	ext3_free_branches - free an array of branches
+ *	@handle: JBD handle for this transaction
+ *	@inode:	inode we are dealing with
+ *	@parent_bh: the buffer_head which contains *@first and *@last
+ *	@first:	array of block numbers
+ *	@last:	pointer immediately past the end of array
+ *	@depth:	depth of the branches to free
+ *
+ *	We are freeing all blocks refered from these branches (numbers are
+ *	stored as little-endian 32-bit) and updating @inode->i_blocks
+ *	appropriately.
+ */
+static void ext3_free_branches(handle_t *handle, struct inode *inode,
+			       struct buffer_head *parent_bh,
+			       u32 *first, u32 *last, int depth)
+{
+	unsigned long nr;
+	u32 *p;
+
+	if (is_handle_aborted(handle))
+		return;
+	
+	if (depth--) {
+		struct buffer_head *bh;
+		int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb);
+		p = last;
+		while (--p >= first) {
+			nr = le32_to_cpu(*p);
+			if (!nr)
+				continue;		/* A hole */
+
+			/* Go read the buffer for the next level down */
+			bh = bread(inode->i_dev, nr, inode->i_sb->s_blocksize);
+
+			/*
+			 * A read failure? Report error and clear slot
+			 * (should be rare).
+			 */
+			if (!bh) {
+				ext3_error(inode->i_sb, "ext3_free_branches",
+					   "Read failure, inode=%ld, block=%ld",
+					   inode->i_ino, nr);
+				continue;
+			}
+
+			/* This zaps the entire block.  Bottom up. */
+			BUFFER_TRACE(bh, "free child branches");
+			ext3_free_branches(handle, inode, bh, (u32*)bh->b_data,
+					   (u32*)bh->b_data + addr_per_block,
+					   depth);
+
+			/*
+			 * We've probably journalled the indirect block several
+			 * times during the truncate.  But it's no longer
+			 * needed and we now drop it from the transaction via
+			 * journal_revoke().
+			 *
+			 * That's easy if it's exclusively part of this
+			 * transaction.  But if it's part of the committing
+			 * transaction then journal_forget() will simply
+			 * brelse() it.  That means that if the underlying
+			 * block is reallocated in ext3_get_block(),
+			 * unmap_underlying_metadata() will find this block
+			 * and will try to get rid of it.  damn, damn.
+			 *
+			 * If this block has already been committed to the
+			 * journal, a revoke record will be written.  And
+			 * revoke records must be emitted *before* clearing
+			 * this block's bit in the bitmaps.
+			 */
+			ext3_forget(handle, 1, inode, bh, bh->b_blocknr);
+
+			/*
+			 * Everything below this this pointer has been
+			 * released.  Now let this top-of-subtree go.
+			 *
+			 * We want the freeing of this indirect block to be
+			 * atomic in the journal with the updating of the
+			 * bitmap block which owns it.  So make some room in
+			 * the journal.
+			 *
+			 * We zero the parent pointer *after* freeing its
+			 * pointee in the bitmaps, so if extend_transaction()
+			 * for some reason fails to put the bitmap changes and
+			 * the release into the same transaction, recovery
+			 * will merely complain about releasing a free block,
+			 * rather than leaking blocks.
+			 */
+			if (is_handle_aborted(handle))
+				return;
+			if (try_to_extend_transaction(handle, inode)) {
+				ext3_mark_inode_dirty(handle, inode);
+				ext3_journal_test_restart(handle, inode);
+			}
+
+			ext3_free_blocks(handle, inode, nr, 1);
+
+			if (parent_bh) {
+				/*
+				 * The block which we have just freed is
+				 * pointed to by an indirect block: journal it
+				 */
+				BUFFER_TRACE(parent_bh, "get_write_access");
+				if (!ext3_journal_get_write_access(handle,
+								   parent_bh)){
+					*p = 0;
+					BUFFER_TRACE(parent_bh,
+					"call ext3_journal_dirty_metadata");
+					ext3_journal_dirty_metadata(handle, 
+								    parent_bh);
+				}
+			}
+		}
+	} else {
+		/* We have reached the bottom of the tree. */
+		BUFFER_TRACE(parent_bh, "free data blocks");
+		ext3_free_data(handle, inode, parent_bh, first, last);
+	}
+}
+
+/*
+ * ext3_truncate()
+ *
+ * We block out ext3_get_block() block instantiations across the entire
+ * transaction, and VFS/VM ensures that ext3_truncate() cannot run
+ * simultaneously on behalf of the same inode.
+ *
+ * As we work through the truncate and commmit bits of it to the journal there
+ * is one core, guiding principle: the file's tree must always be consistent on
+ * disk.  We must be able to restart the truncate after a crash.
+ *
+ * The file's tree may be transiently inconsistent in memory (although it
+ * probably isn't), but whenever we close off and commit a journal transaction,
+ * the contents of (the filesystem + the journal) must be consistent and
+ * restartable.  It's pretty simple, really: bottom up, right to left (although
+ * left-to-right works OK too).
+ *
+ * Note that at recovery time, journal replay occurs *before* the restart of
+ * truncate against the orphan inode list.
+ *
+ * The committed inode has the new, desired i_size (which is the same as
+ * i_disksize in this case).  After a crash, ext3_orphan_cleanup() will see
+ * that this inode's truncate did not complete and it will again call
+ * ext3_truncate() to have another go.  So there will be instantiated blocks
+ * to the right of the truncation point in a crashed ext3 filesystem.  But
+ * that's fine - as long as they are linked from the inode, the post-crash
+ * ext3_truncate() run will find them and release them.
+ */
+
+void ext3_truncate(struct inode * inode)
+{
+	handle_t *handle;
+	u32 *i_data = inode->u.ext3_i.i_data;
+	int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb);
+	int offsets[4];
+	Indirect chain[4];
+	Indirect *partial;
+	int nr = 0;
+	int n;
+	long last_block;
+	unsigned blocksize;
+
+	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+	    S_ISLNK(inode->i_mode)))
+		return;
+	if (IS_APPEND(inode) || IS_IMMUTABLE_FILE(inode))
+		return;
+
+	ext3_discard_prealloc(inode);
+
+	handle = start_transaction(inode);
+	if (IS_ERR(handle))
+		return;		/* AKPM: return what? */
+
+	blocksize = inode->i_sb->s_blocksize;
+	last_block = (inode->i_size + blocksize-1)
+					>> EXT3_BLOCK_SIZE_BITS(inode->i_sb);
+
+	ext3_block_truncate_page(handle, inode->i_mapping, inode->i_size);
+		
+
+	n = ext3_block_to_path(inode, last_block, offsets);
+	if (n == 0)
+		goto out_stop;	/* error */
+
+	/*
+	 * OK.  This truncate is going to happen.  We add the inode to the
+	 * orphan list, so that if this truncate spans multiple transactions,
+	 * and we crash, we will resume the truncate when the filesystem
+	 * recovers.  It also marks the inode dirty, to catch the new size.
+	 *
+	 * Implication: the file must always be in a sane, consistent
+	 * truncatable state while each transaction commits.
+	 */
+	if (ext3_orphan_add(handle, inode))
+		goto out_stop;
+
+	/*
+	 * The orphan list entry will now protect us from any crash which
+	 * occurs before the truncate completes, so it is now safe to propagate
+	 * the new, shorter inode size (held for now in i_size) into the
+	 * on-disk inode. We do this via i_disksize, which is the value which
+	 * ext3 *really* writes onto the disk inode.
+	 */
+	inode->u.ext3_i.i_disksize = inode->i_size;
+
+	/*
+	 * From here we block out all ext3_get_block() callers who want to
+	 * modify the block allocation tree.
+	 */
+	down_write(&inode->u.ext3_i.truncate_sem);
+
+	if (n == 1) {		/* direct blocks */
+		ext3_free_data(handle, inode, NULL, i_data+offsets[0],
+			       i_data + EXT3_NDIR_BLOCKS);
+		goto do_indirects;
+	}
+
+	partial = ext3_find_shared(inode, n, offsets, chain, &nr);
+	/* Kill the top of shared branch (not detached) */
+	if (nr) {
+		if (partial == chain) {
+			/* Shared branch grows from the inode */
+			ext3_free_branches(handle, inode, NULL,
+					   &nr, &nr+1, (chain+n-1) - partial);
+			*partial->p = 0;
+			/*
+			 * We mark the inode dirty prior to restart,
+			 * and prior to stop.  No need for it here.
+			 */
+		} else {
+			/* Shared branch grows from an indirect block */
+			BUFFER_TRACE(partial->bh, "get_write_access");
+			ext3_free_branches(handle, inode, partial->bh,
+					partial->p,
+					partial->p+1, (chain+n-1) - partial);
+		}
+	}
+	/* Clear the ends of indirect blocks on the shared branch */
+	while (partial > chain) {
+		ext3_free_branches(handle, inode, partial->bh, partial->p + 1,
+				   (u32*)partial->bh->b_data + addr_per_block,
+				   (chain+n-1) - partial);
+		BUFFER_TRACE(partial->bh, "call brelse");
+		brelse (partial->bh);
+		partial--;
+	}
+do_indirects:
+	/* Kill the remaining (whole) subtrees */
+	switch (offsets[0]) {
+		default:
+			nr = i_data[EXT3_IND_BLOCK];
+			if (nr) {
+				ext3_free_branches(handle, inode, NULL,
+						   &nr, &nr+1, 1);
+				i_data[EXT3_IND_BLOCK] = 0;
+			}
+		case EXT3_IND_BLOCK:
+			nr = i_data[EXT3_DIND_BLOCK];
+			if (nr) {
+				ext3_free_branches(handle, inode, NULL,
+						   &nr, &nr+1, 2);
+				i_data[EXT3_DIND_BLOCK] = 0;
+			}
+		case EXT3_DIND_BLOCK:
+			nr = i_data[EXT3_TIND_BLOCK];
+			if (nr) {
+				ext3_free_branches(handle, inode, NULL,
+						   &nr, &nr+1, 3);
+				i_data[EXT3_TIND_BLOCK] = 0;
+			}
+		case EXT3_TIND_BLOCK:
+			;
+	}
+	up_write(&inode->u.ext3_i.truncate_sem);
+	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+	ext3_mark_inode_dirty(handle, inode);
+
+	/* In a multi-transaction truncate, we only make the final
+	 * transaction synchronous */
+	if (IS_SYNC(inode))
+		handle->h_sync = 1;
+out_stop:
+	/*
+	 * If this was a simple ftruncate(), and the file will remain alive
+	 * then we need to clear up the orphan record which we created above.
+	 * However, if this was a real unlink then we were called by
+	 * ext3_delete_inode(), and we allow that function to clean up the
+	 * orphan info for us.
+	 */
+	if (inode->i_nlink)
+		ext3_orphan_del(handle, inode);
+
+	ext3_journal_stop(handle, inode);
+}
+
+/* 
+ * ext3_get_inode_loc returns with an extra refcount against the
+ * inode's underlying buffer_head on success. 
+ */
+
+int ext3_get_inode_loc (struct inode *inode, struct ext3_iloc *iloc)
+{
+	struct buffer_head *bh = 0;
+	unsigned long block;
+	unsigned long block_group;
+	unsigned long group_desc;
+	unsigned long desc;
+	unsigned long offset;
+	struct ext3_group_desc * gdp;
+		
+	if ((inode->i_ino != EXT3_ROOT_INO &&
+		inode->i_ino != EXT3_ACL_IDX_INO &&
+		inode->i_ino != EXT3_ACL_DATA_INO &&
+		inode->i_ino != EXT3_JOURNAL_INO &&
+		inode->i_ino < EXT3_FIRST_INO(inode->i_sb)) ||
+		inode->i_ino > le32_to_cpu(
+			inode->i_sb->u.ext3_sb.s_es->s_inodes_count)) {
+		ext3_error (inode->i_sb, "ext3_get_inode_loc",
+			    "bad inode number: %lu", inode->i_ino);
+		goto bad_inode;
+	}
+	block_group = (inode->i_ino - 1) / EXT3_INODES_PER_GROUP(inode->i_sb);
+	if (block_group >= inode->i_sb->u.ext3_sb.s_groups_count) {
+		ext3_error (inode->i_sb, "ext3_get_inode_loc",
+			    "group >= groups count");
+		goto bad_inode;
+	}
+	group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(inode->i_sb);
+	desc = block_group & (EXT3_DESC_PER_BLOCK(inode->i_sb) - 1);
+	bh = inode->i_sb->u.ext3_sb.s_group_desc[group_desc];
+	if (!bh) {
+		ext3_error (inode->i_sb, "ext3_get_inode_loc",
+			    "Descriptor not loaded");
+		goto bad_inode;
+	}
+
+	gdp = (struct ext3_group_desc *) bh->b_data;
+	/*
+	 * Figure out the offset within the block group inode table
+	 */
+	offset = ((inode->i_ino - 1) % EXT3_INODES_PER_GROUP(inode->i_sb)) *
+		EXT3_INODE_SIZE(inode->i_sb);
+	block = le32_to_cpu(gdp[desc].bg_inode_table) +
+		(offset >> EXT3_BLOCK_SIZE_BITS(inode->i_sb));
+	if (!(bh = bread (inode->i_dev, block, inode->i_sb->s_blocksize))) {
+		ext3_error (inode->i_sb, "ext3_get_inode_loc",
+			    "unable to read inode block - "
+			    "inode=%lu, block=%lu", inode->i_ino, block);
+		goto bad_inode;
+	}
+	offset &= (EXT3_BLOCK_SIZE(inode->i_sb) - 1);
+
+	iloc->bh = bh;
+	iloc->raw_inode = (struct ext3_inode *) (bh->b_data + offset);
+	iloc->block_group = block_group;
+	
+	return 0;
+	
+ bad_inode:
+	return -EIO;
+}
+
+void ext3_read_inode(struct inode * inode)
+{
+	struct ext3_iloc iloc;
+	struct ext3_inode *raw_inode;
+	struct buffer_head *bh;
+	int block;
+	
+	if(ext3_get_inode_loc(inode, &iloc))
+		goto bad_inode;
+	bh = iloc.bh;
+	raw_inode = iloc.raw_inode;
+	init_rwsem(&inode->u.ext3_i.truncate_sem);
+	inode->i_mode = le16_to_cpu(raw_inode->i_mode);
+	inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
+	inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
+	if(!(test_opt (inode->i_sb, NO_UID32))) {
+		inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
+		inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
+	}
+	inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
+	inode->i_size = le32_to_cpu(raw_inode->i_size);
+	inode->i_atime = le32_to_cpu(raw_inode->i_atime);
+	inode->i_ctime = le32_to_cpu(raw_inode->i_ctime);
+	inode->i_mtime = le32_to_cpu(raw_inode->i_mtime);
+	inode->u.ext3_i.i_dtime = le32_to_cpu(raw_inode->i_dtime);
+	/* We now have enough fields to check if the inode was active or not.
+	 * This is needed because nfsd might try to access dead inodes
+	 * the test is that same one that e2fsck uses
+	 * NeilBrown 1999oct15
+	 */
+	if (inode->i_nlink == 0) {
+		if (inode->i_mode == 0 ||
+		    !(inode->i_sb->u.ext3_sb.s_mount_state & EXT3_ORPHAN_FS)) {
+			/* this inode is deleted */
+			brelse (bh);
+			goto bad_inode;
+		}
+		/* The only unlinked inodes we let through here have
+		 * valid i_mode and are being read by the orphan
+		 * recovery code: that's fine, we're about to complete
+		 * the process of deleting those. */
+	}
+	inode->i_blksize = PAGE_SIZE;	/* This is the optimal IO size
+					 * (for stat), not the fs block
+					 * size */  
+	inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
+	inode->i_version = ++event;
+	inode->u.ext3_i.i_flags = le32_to_cpu(raw_inode->i_flags);
+#ifdef EXT3_FRAGMENTS
+	inode->u.ext3_i.i_faddr = le32_to_cpu(raw_inode->i_faddr);
+	inode->u.ext3_i.i_frag_no = raw_inode->i_frag;
+	inode->u.ext3_i.i_frag_size = raw_inode->i_fsize;
+#endif
+	inode->u.ext3_i.i_file_acl = le32_to_cpu(raw_inode->i_file_acl);
+	if (!S_ISREG(inode->i_mode)) {
+		inode->u.ext3_i.i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl);
+	} else {
+		inode->i_size |=
+			((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32;
+	}
+	inode->u.ext3_i.i_disksize = inode->i_size;
+	inode->i_generation = le32_to_cpu(raw_inode->i_generation);
+#ifdef EXT3_PREALLOCATE
+	inode->u.ext3_i.i_prealloc_count = 0;
+#endif
+	inode->u.ext3_i.i_block_group = iloc.block_group;
+
+	/*
+	 * NOTE! The in-memory inode i_data array is in little-endian order
+	 * even on big-endian machines: we do NOT byteswap the block numbers!
+	 */
+	for (block = 0; block < EXT3_N_BLOCKS; block++)
+		inode->u.ext3_i.i_data[block] = iloc.raw_inode->i_block[block];
+	INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan);
+
+	brelse (iloc.bh);
+
+	if (inode->i_ino == EXT3_ACL_IDX_INO ||
+	    inode->i_ino == EXT3_ACL_DATA_INO)
+		/* Nothing to do */ ;
+	else if (S_ISREG(inode->i_mode)) {
+		inode->i_op = &ext3_file_inode_operations;
+		inode->i_fop = &ext3_file_operations;
+		inode->i_mapping->a_ops = &ext3_aops;
+	} else if (S_ISDIR(inode->i_mode)) {
+		inode->i_op = &ext3_dir_inode_operations;
+		inode->i_fop = &ext3_dir_operations;
+	} else if (S_ISLNK(inode->i_mode)) {
+		if (!inode->i_blocks)
+			inode->i_op = &ext3_fast_symlink_inode_operations;
+		else {
+			inode->i_op = &page_symlink_inode_operations;
+			inode->i_mapping->a_ops = &ext3_aops;
+		}
+	} else 
+		init_special_inode(inode, inode->i_mode,
+				   le32_to_cpu(iloc.raw_inode->i_block[0]));
+	/* inode->i_attr_flags = 0;				unused */
+	if (inode->u.ext3_i.i_flags & EXT3_SYNC_FL) {
+		/* inode->i_attr_flags |= ATTR_FLAG_SYNCRONOUS; unused */
+		inode->i_flags |= S_SYNC;
+	}
+	if (inode->u.ext3_i.i_flags & EXT3_APPEND_FL) {
+		/* inode->i_attr_flags |= ATTR_FLAG_APPEND;	unused */
+		inode->i_flags |= S_APPEND;
+	}
+	if (inode->u.ext3_i.i_flags & EXT3_IMMUTABLE_FILE_FL) {
+		/* inode->i_attr_flags |= ATTR_FLAG_IMMUTABLE_FILE;	unused */
+		inode->i_flags |= S_IMMUTABLE_FILE;
+	}
+	if (inode->u.ext3_i.i_flags & EXT3_IMMUTABLE_LINK_FL) {
+		/* inode->i_attr_flags |= ATTR_FLAG_IMMUTABLE_FILE;	unused */
+		inode->i_flags |= S_IMMUTABLE_LINK;
+	}
+	if (inode->u.ext3_i.i_flags & EXT3_NOATIME_FL) {
+		/* inode->i_attr_flags |= ATTR_FLAG_NOATIME;	unused */
+		inode->i_flags |= S_NOATIME;
+	}
+	return;
+	
+bad_inode:
+	make_bad_inode(inode);
+	return;
+}
+
+/*
+ * Post the struct inode info into an on-disk inode location in the
+ * buffer-cache.  This gobbles the caller's reference to the
+ * buffer_head in the inode location struct.  
+ */
+
+static int ext3_do_update_inode(handle_t *handle, 
+				struct inode *inode, 
+				struct ext3_iloc *iloc)
+{
+	struct ext3_inode *raw_inode = iloc->raw_inode;
+	struct buffer_head *bh = iloc->bh;
+	int err = 0, rc, block;
+
+	if (handle) {
+		BUFFER_TRACE(bh, "get_write_access");
+		err = ext3_journal_get_write_access(handle, bh);
+		if (err)
+			goto out_brelse;
+	}
+	raw_inode->i_mode = cpu_to_le16(inode->i_mode);
+	if(!(test_opt(inode->i_sb, NO_UID32))) {
+		raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid));
+		raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid));
+/*
+ * Fix up interoperability with old kernels. Otherwise, old inodes get
+ * re-used with the upper 16 bits of the uid/gid intact
+ */
+		if(!inode->u.ext3_i.i_dtime) {
+			raw_inode->i_uid_high =
+				cpu_to_le16(high_16_bits(inode->i_uid));
+			raw_inode->i_gid_high =
+				cpu_to_le16(high_16_bits(inode->i_gid));
+		} else {
+			raw_inode->i_uid_high = 0;
+			raw_inode->i_gid_high = 0;
+		}
+	} else {
+		raw_inode->i_uid_low =
+			cpu_to_le16(fs_high2lowuid(inode->i_uid));
+		raw_inode->i_gid_low =
+			cpu_to_le16(fs_high2lowgid(inode->i_gid));
+		raw_inode->i_uid_high = 0;
+		raw_inode->i_gid_high = 0;
+	}
+	raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
+	raw_inode->i_size = cpu_to_le32(inode->u.ext3_i.i_disksize);
+	raw_inode->i_atime = cpu_to_le32(inode->i_atime);
+	raw_inode->i_ctime = cpu_to_le32(inode->i_ctime);
+	raw_inode->i_mtime = cpu_to_le32(inode->i_mtime);
+	raw_inode->i_blocks = cpu_to_le32(inode->i_blocks);
+	raw_inode->i_dtime = cpu_to_le32(inode->u.ext3_i.i_dtime);
+	raw_inode->i_flags = cpu_to_le32(inode->u.ext3_i.i_flags);
+#ifdef EXT3_FRAGMENTS
+	raw_inode->i_faddr = cpu_to_le32(inode->u.ext3_i.i_faddr);
+	raw_inode->i_frag = inode->u.ext3_i.i_frag_no;
+	raw_inode->i_fsize = inode->u.ext3_i.i_frag_size;
+#else
+	/* If we are not tracking these fields in the in-memory inode,
+	 * then preserve them on disk, but still initialise them to zero
+	 * for new inodes. */
+	if (inode->u.ext3_i.i_state & EXT3_STATE_NEW) {
+		raw_inode->i_faddr = 0;
+		raw_inode->i_frag = 0;
+		raw_inode->i_fsize = 0;
+	}
+#endif
+	raw_inode->i_file_acl = cpu_to_le32(inode->u.ext3_i.i_file_acl);
+	if (!S_ISREG(inode->i_mode)) {
+		raw_inode->i_dir_acl = cpu_to_le32(inode->u.ext3_i.i_dir_acl);
+	} else {
+		raw_inode->i_size_high =
+			cpu_to_le32(inode->u.ext3_i.i_disksize >> 32);
+		if (inode->u.ext3_i.i_disksize > 0x7fffffffULL) {
+			struct super_block *sb = inode->i_sb;
+			if (!EXT3_HAS_RO_COMPAT_FEATURE(sb,
+					EXT3_FEATURE_RO_COMPAT_LARGE_FILE) ||
+			    EXT3_SB(sb)->s_es->s_rev_level ==
+					cpu_to_le32(EXT3_GOOD_OLD_REV)) {
+			       /* If this is the first large file
+				* created, add a flag to the superblock.
+				*/
+				err = ext3_journal_get_write_access(handle,
+						sb->u.ext3_sb.s_sbh);
+				if (err)
+					goto out_brelse;
+				ext3_update_dynamic_rev(sb);
+				EXT3_SET_RO_COMPAT_FEATURE(sb,
+					EXT3_FEATURE_RO_COMPAT_LARGE_FILE);
+				sb->s_dirt = 1;
+				handle->h_sync = 1;
+				err = ext3_journal_dirty_metadata(handle,
+						sb->u.ext3_sb.s_sbh);
+			}
+		}
+	}
+	raw_inode->i_generation = le32_to_cpu(inode->i_generation);
+	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
+		raw_inode->i_block[0] =
+			cpu_to_le32(kdev_t_to_nr(inode->i_rdev));
+	else for (block = 0; block < EXT3_N_BLOCKS; block++)
+		raw_inode->i_block[block] = inode->u.ext3_i.i_data[block];
+
+	BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
+	rc = ext3_journal_dirty_metadata(handle, bh);
+	if (!err)
+		err = rc;
+	inode->u.ext3_i.i_state &= ~EXT3_STATE_NEW;
+
+out_brelse:
+	brelse (bh);
+	ext3_std_error(inode->i_sb, err);
+	return err;
+}
+
+/*
+ * ext3_write_inode()
+ *
+ * We are called from a few places:
+ *
+ * - Within generic_file_write() for O_SYNC files.
+ *   Here, there will be no transaction running. We wait for any running
+ *   trasnaction to commit.
+ *
+ * - Within sys_sync(), kupdate and such.
+ *   We wait on commit, if tol to.
+ *
+ * - Within prune_icache() (PF_MEMALLOC == true)
+ *   Here we simply return.  We can't afford to block kswapd on the
+ *   journal commit.
+ *
+ * In all cases it is actually safe for us to return without doing anything,
+ * because the inode has been copied into a raw inode buffer in
+ * ext3_mark_inode_dirty().  This is a correctness thing for O_SYNC and for
+ * knfsd.
+ *
+ * Note that we are absolutely dependent upon all inode dirtiers doing the
+ * right thing: they *must* call mark_inode_dirty() after dirtying info in
+ * which we are interested.
+ *
+ * It would be a bug for them to not do this.  The code:
+ *
+ *	mark_inode_dirty(inode)
+ *	stuff();
+ *	inode->i_size = expr;
+ *
+ * is in error because a kswapd-driven write_inode() could occur while
+ * `stuff()' is running, and the new i_size will be lost.  Plus the inode
+ * will no longer be on the superblock's dirty inode list.
+ */
+void ext3_write_inode(struct inode *inode, int wait)
+{
+	if (current->flags & PF_MEMALLOC)
+		return;
+
+	if (ext3_journal_current_handle()) {
+		jbd_debug(0, "called recursively, non-PF_MEMALLOC!\n");
+		return;
+	}
+
+	if (!wait)
+		return;
+
+	ext3_force_commit(inode->i_sb);	
+}
+
+/*
+ * ext3_setattr()
+ *
+ * Called from notify_change.
+ *
+ * We want to trap VFS attempts to truncate the file as soon as
+ * possible.  In particular, we want to make sure that when the VFS
+ * shrinks i_size, we put the inode on the orphan list and modify
+ * i_disksize immediately, so that during the subsequent flushing of
+ * dirty pages and freeing of disk blocks, we can guarantee that any
+ * commit will leave the blocks being flushed in an unused state on
+ * disk.  (On recovery, the inode will get truncated and the blocks will
+ * be freed, so we have a strong guarantee that no future commit will
+ * leave these blocks visible to the user.)  
+ *
+ * This is only needed for regular files.  rmdir() has its own path, and
+ * we can never truncate a direcory except on final unlink (at which
+ * point i_nlink is zero so recovery is easy.)
+ *
+ * Called with the BKL.  
+ */
+
+int ext3_setattr(struct dentry *dentry, struct iattr *attr)
+{
+	struct inode *inode = dentry->d_inode;
+	int error, rc;
+
+	error = inode_change_ok(inode, attr);
+	if (error)
+		return error;
+	
+	if (attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) {
+		handle_t *handle;
+
+		handle = ext3_journal_start(inode, 3);
+		if (IS_ERR(handle)) {
+			error = PTR_ERR(handle);
+			goto err_out;
+		}
+		
+		error = ext3_orphan_add(handle, inode);
+		inode->u.ext3_i.i_disksize = attr->ia_size;
+		rc = ext3_mark_inode_dirty(handle, inode);
+		if (!error)
+			error = rc;
+		ext3_journal_stop(handle, inode);
+	}
+	
+	inode_setattr(inode, attr);
+
+	/* If inode_setattr's call to ext3_truncate failed to get a
+	 * transaction handle at all, we need to clean up the in-core
+	 * orphan list manually. */
+	if (inode->i_nlink)
+		ext3_orphan_del(NULL, inode);
+
+err_out:
+	ext3_std_error(inode->i_sb, error);
+	return 0;
+}
+
+
+/*
+ * akpm: how many blocks doth make a writepage()?
+ *
+ * With N blocks per page, it may be:
+ * N data blocks
+ * 2 indirect block
+ * 2 dindirect
+ * 1 tindirect
+ * N+5 bitmap blocks (from the above)
+ * N+5 group descriptor summary blocks
+ * 1 inode block
+ * 1 superblock.
+ * 2 * EXT3_SINGLEDATA_TRANS_BLOCKS for the quote files
+ *
+ * 3 * (N + 5) + 2 + 2 * EXT3_SINGLEDATA_TRANS_BLOCKS
+ *
+ * With ordered or writeback data it's the same, less the N data blocks.
+ *
+ * If the inode's direct blocks can hold an integral number of pages then a
+ * page cannot straddle two indirect blocks, and we can only touch one indirect
+ * and dindirect block, and the "5" above becomes "3".
+ *
+ * This still overestimates under most circumstances.  If we were to pass the
+ * start and end offsets in here as well we could do block_to_path() on each
+ * block and work out the exact number of indirects which are touched.  Pah.
+ */
+
+int ext3_writepage_trans_blocks(struct inode *inode)
+{
+	int bpp = ext3_journal_blocks_per_page(inode);
+	int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3;
+	int ret;
+	
+	if (ext3_should_journal_data(inode))
+		ret = 3 * (bpp + indirects) + 2;
+	else
+		ret = 2 * (bpp + indirects) + 2;
+
+#ifdef CONFIG_QUOTA
+	ret += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS;
+#endif
+
+	return ret;
+}
+
+int
+ext3_mark_iloc_dirty(handle_t *handle, 
+		     struct inode *inode,
+		     struct ext3_iloc *iloc)
+{
+	int err = 0;
+
+	if (handle) {
+		/* the do_update_inode consumes one bh->b_count */
+		atomic_inc(&iloc->bh->b_count);
+		err = ext3_do_update_inode(handle, inode, iloc);
+		/* ext3_do_update_inode() does journal_dirty_metadata */
+		brelse(iloc->bh);
+	} else {
+		printk(KERN_EMERG __FUNCTION__ ": called with no handle!\n");
+	}
+	return err;
+}
+
+/* 
+ * On success, We end up with an outstanding reference count against
+ * iloc->bh.  This _must_ be cleaned up later. 
+ */
+
+int
+ext3_reserve_inode_write(handle_t *handle, struct inode *inode, 
+			 struct ext3_iloc *iloc)
+{
+	int err = 0;
+	if (handle) {
+		err = ext3_get_inode_loc(inode, iloc);
+		if (!err) {
+			BUFFER_TRACE(iloc->bh, "get_write_access");
+			err = ext3_journal_get_write_access(handle, iloc->bh);
+			if (err) {
+				brelse(iloc->bh);
+				iloc->bh = NULL;
+			}
+		}
+	}
+	ext3_std_error(inode->i_sb, err);
+	return err;
+}
+
+/*
+ * akpm: What we do here is to mark the in-core inode as clean
+ * with respect to inode dirtiness (it may still be data-dirty).
+ * This means that the in-core inode may be reaped by prune_icache
+ * without having to perform any I/O.  This is a very good thing,
+ * because *any* task may call prune_icache - even ones which
+ * have a transaction open against a different journal.
+ *
+ * Is this cheating?  Not really.  Sure, we haven't written the
+ * inode out, but prune_icache isn't a user-visible syncing function.
+ * Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync)
+ * we start and wait on commits.
+ *
+ * Is this efficient/effective?  Well, we're being nice to the system
+ * by cleaning up our inodes proactively so they can be reaped
+ * without I/O.  But we are potentially leaving up to five seconds'
+ * worth of inodes floating about which prune_icache wants us to
+ * write out.  One way to fix that would be to get prune_icache()
+ * to do a write_super() to free up some memory.  It has the desired
+ * effect.
+ */
+int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode)
+{
+	struct ext3_iloc iloc;
+	int err;
+
+	err = ext3_reserve_inode_write(handle, inode, &iloc);
+	if (!err)
+		err = ext3_mark_iloc_dirty(handle, inode, &iloc);
+	return err;
+}
+
+/*
+ * akpm: ext3_dirty_inode() is called from __mark_inode_dirty()
+ *
+ * We're really interested in the case where a file is being extended.
+ * i_size has been changed by generic_commit_write() and we thus need
+ * to include the updated inode in the current transaction.
+ *
+ * Also, DQUOT_ALLOC_SPACE() will always dirty the inode when blocks
+ * are allocated to the file.
+ *
+ * If the inode is marked synchronous, we don't honour that here - doing
+ * so would cause a commit on atime updates, which we don't bother doing.
+ * We handle synchronous inodes at the highest possible level.
+ */
+void ext3_dirty_inode(struct inode *inode)
+{
+	handle_t *current_handle = ext3_journal_current_handle();
+	handle_t *handle;
+
+	lock_kernel();
+	handle = ext3_journal_start(inode, 1);
+	if (IS_ERR(handle))
+		goto out;
+	if (current_handle &&
+		current_handle->h_transaction != handle->h_transaction) {
+		/* This task has a transaction open against a different fs */
+		printk(KERN_EMERG __FUNCTION__": transactions do not match!\n");
+	} else {
+		jbd_debug(5, "marking dirty.  outer handle=%p\n",
+				current_handle);
+		ext3_mark_inode_dirty(handle, inode);
+	}
+	ext3_journal_stop(handle, inode);
+out:
+	unlock_kernel();
+}
+
+#ifdef AKPM
+/* 
+ * Bind an inode's backing buffer_head into this transaction, to prevent
+ * it from being flushed to disk early.  Unlike
+ * ext3_reserve_inode_write, this leaves behind no bh reference and
+ * returns no iloc structure, so the caller needs to repeat the iloc
+ * lookup to mark the inode dirty later.
+ */
+static inline int
+ext3_pin_inode(handle_t *handle, struct inode *inode)
+{
+	struct ext3_iloc iloc;
+	
+	int err = 0;
+	if (handle) {
+		err = ext3_get_inode_loc(inode, &iloc);
+		if (!err) {
+			BUFFER_TRACE(iloc.bh, "get_write_access");
+			err = journal_get_write_access(handle, iloc.bh);
+			if (!err)
+				err = ext3_journal_dirty_metadata(handle, 
+								  iloc.bh);
+			brelse(iloc.bh);
+		}
+	}
+	ext3_std_error(inode->i_sb, err);
+	return err;
+}
+#endif
+
+int ext3_change_inode_journal_flag(struct inode *inode, int val)
+{
+	journal_t *journal;
+	handle_t *handle;
+	int err;
+
+	/*
+	 * We have to be very careful here: changing a data block's
+	 * journaling status dynamically is dangerous.  If we write a
+	 * data block to the journal, change the status and then delete
+	 * that block, we risk forgetting to revoke the old log record
+	 * from the journal and so a subsequent replay can corrupt data.
+	 * So, first we make sure that the journal is empty and that
+	 * nobody is changing anything.
+	 */
+
+	journal = EXT3_JOURNAL(inode);
+	if (is_journal_aborted(journal) || IS_RDONLY(inode))
+		return -EROFS;
+	
+	journal_lock_updates(journal);
+	journal_flush(journal);
+
+	/*
+	 * OK, there are no updates running now, and all cached data is
+	 * synced to disk.  We are now in a completely consistent state
+	 * which doesn't have anything in the journal, and we know that
+	 * no filesystem updates are running, so it is safe to modify
+	 * the inode's in-core data-journaling state flag now.
+	 */
+
+	if (val)
+		inode->u.ext3_i.i_flags |= EXT3_JOURNAL_DATA_FL;
+	else
+		inode->u.ext3_i.i_flags &= ~EXT3_JOURNAL_DATA_FL;
+
+	journal_unlock_updates(journal);
+
+	/* Finally we can mark the inode as dirty. */
+
+	handle = ext3_journal_start(inode, 1);
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+
+	err = ext3_mark_inode_dirty(handle, inode);
+	handle->h_sync = 1;
+	ext3_journal_stop(handle, inode);
+	ext3_std_error(inode->i_sb, err);
+	
+	return err;
+}
+
+
+/*
+ * ext3_aops_journal_start().
+ *
+ * <This function died, but the comment lives on>
+ *
+ * We need to take the inode semaphore *outside* the
+ * journal_start/journal_stop.  Otherwise, a different task could do a
+ * wait_for_commit() while holding ->i_sem, which deadlocks.  The rule
+ * is: transaction open/closes are considered to be a locking operation
+ * and they nest *inside* ->i_sem.
+ * ----------------------------------------------------------------------------
+ * Possible problem:
+ *	ext3_file_write()
+ *	-> generic_file_write()
+ *	   -> __alloc_pages()
+ *	      -> page_launder()
+ *		 -> ext3_writepage()
+ *
+ * And the writepage can be on a different fs while we have a
+ * transaction open against this one!  Bad.
+ *
+ * I tried making the task PF_MEMALLOC here, but that simply results in
+ * 0-order allocation failures passed back to generic_file_write().
+ * Instead, we rely on the reentrancy protection in ext3_writepage().
+ * ----------------------------------------------------------------------------
+ * When we do the journal_start() here we don't really need to reserve
+ * any blocks - we won't need any until we hit ext3_prepare_write(),
+ * which does all the needed journal extending.  However!  There is a
+ * problem with quotas:
+ *
+ * Thread 1:
+ * sys_sync
+ * ->sync_dquots
+ *   ->commit_dquot
+ *     ->lock_dquot
+ *     ->write_dquot
+ *       ->ext3_file_write
+ *         ->journal_start
+ *         ->ext3_prepare_write
+ *           ->journal_extend
+ *           ->journal_start
+ * Thread 2:
+ * ext3_create		(for example)
+ * ->ext3_new_inode
+ *   ->dquot_initialize
+ *     ->lock_dquot
+ *
+ * Deadlock.  Thread 1's journal_start blocks because thread 2 has a
+ * transaction open.  Thread 2's transaction will never close because
+ * thread 2 is stuck waiting for the dquot lock.
+ *
+ * So.  We must ensure that thread 1 *never* needs to extend the journal
+ * for quota writes.  We do that by reserving enough journal blocks
+ * here, in ext3_aops_journal_start() to ensure that the forthcoming "see if we
+ * need to extend" test in ext3_prepare_write() succeeds.  
+ */
+
+
+MODULE_LICENSE("GPL");
diff -urN linux-2.4.16-reiserfspatches-immutable/fs/ext3/ioctl.c~ linux-2.4.16-reiserfspatches-immutable-ctx4/fs/ext3/ioctl.c~
--- linux-2.4.16-reiserfspatches-immutable/fs/ext3/ioctl.c~	Thu Jan  1 01:00:00 1970
+++ linux-2.4.16-reiserfspatches-immutable-ctx4/fs/ext3/ioctl.c~	Mon Dec 10 14:28:03 2001
@@ -0,0 +1,174 @@
+/*
+ * linux/fs/ext3/ioctl.c
+ *
+ * Copyright (C) 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ */
+
+#include <linux/fs.h>
+#include <linux/jbd.h>
+#include <linux/ext3_fs.h>
+#include <linux/ext3_jbd.h>
+#include <linux/sched.h>
+#include <asm/uaccess.h>
+
+
+int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
+		unsigned long arg)
+{
+	unsigned int flags;
+
+	ext3_debug ("cmd = %u, arg = %lu\n", cmd, arg);
+
+	switch (cmd) {
+	case EXT3_IOC_GETFLAGS:
+		flags = inode->u.ext3_i.i_flags & EXT3_FL_USER_VISIBLE;
+		return put_user(flags, (int *) arg);
+	case EXT3_IOC_SETFLAGS: {
+		handle_t *handle = NULL;
+		int err;
+		struct ext3_iloc iloc;
+		unsigned int oldflags;
+		unsigned int jflag;
+
+		if (IS_RDONLY(inode))
+			return -EROFS;
+
+		if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
+			return -EPERM;
+
+		if (get_user(flags, (int *) arg))
+			return -EFAULT;
+
+		oldflags = inode->u.ext3_i.i_flags;
+
+		/* The JOURNAL_DATA flag is modifiable only by root */
+		jflag = flags & EXT3_JOURNAL_DATA_FL;
+
+		/*
+		 * The IMMUTABLE and APPEND_ONLY flags can only be changed by
+		 * the relevant capability.
+		 *
+		 * This test looks nicer. Thanks to Pauline Middelink
+		 */
+		if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FILE_FL | EXT3_IMMUTABLE_LINK_FL)) {
+			if (!capable(CAP_LINUX_IMMUTABLE))
+				return -EPERM;
+		}
+		
+		/*
+		 * The JOURNAL_DATA flag can only be changed by
+		 * the relevant capability.
+		 */
+		if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) {
+			if (!capable(CAP_SYS_RESOURCE))
+				return -EPERM;
+		}
+
+
+		handle = ext3_journal_start(inode, 1);
+		if (IS_ERR(handle))
+			return PTR_ERR(handle);
+		if (IS_SYNC(inode))
+			handle->h_sync = 1;
+		err = ext3_reserve_inode_write(handle, inode, &iloc);
+		if (err)
+			goto flags_err;
+		
+		flags = flags & EXT3_FL_USER_MODIFIABLE;
+		flags |= oldflags & ~EXT3_FL_USER_MODIFIABLE;
+		inode->u.ext3_i.i_flags = flags;
+
+		if (flags & EXT3_SYNC_FL)
+			inode->i_flags |= S_SYNC;
+		else
+			inode->i_flags &= ~S_SYNC;
+		if (flags & EXT3_APPEND_FL)
+			inode->i_flags |= S_APPEND;
+		else
+			inode->i_flags &= ~S_APPEND;
+		if (flags & EXT3_IMMUTABLE_FILE_FL)
+			inode->i_flags |= S_IMMUTABLE_FILE;
+		else
+			inode->i_flags &= ~S_IMMUTABLE_FILE;
+		if (flags & EXT3_IMMUTABLE_LINK_FL)
+			inode->i_flags |= S_IMMUTABLE_LINK;
+		else
+			inode->i_flags &= ~S_IMMUTABLE_LINK;
+		if (flags & EXT3_NOATIME_FL)
+			inode->i_flags |= S_NOATIME;
+		else
+			inode->i_flags &= ~S_NOATIME;
+		inode->i_ctime = CURRENT_TIME;
+
+		err = ext3_mark_iloc_dirty(handle, inode, &iloc);
+flags_err:
+		ext3_journal_stop(handle, inode);
+		if (err)
+			return err;
+		
+		if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL))
+			err = ext3_change_inode_journal_flag(inode, jflag);
+		return err;
+	}
+	case EXT3_IOC_GETVERSION:
+	case EXT3_IOC_GETVERSION_OLD:
+		return put_user(inode->i_generation, (int *) arg);
+	case EXT3_IOC_SETVERSION:
+	case EXT3_IOC_SETVERSION_OLD: {
+		handle_t *handle;
+		struct ext3_iloc iloc;
+		__u32 generation;
+		int err;
+
+		if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
+			return -EPERM;
+		if (IS_RDONLY(inode))
+			return -EROFS;
+		if (get_user(generation, (int *) arg))
+			return -EFAULT;
+
+		handle = ext3_journal_start(inode, 1);
+		if (IS_ERR(handle))
+			return PTR_ERR(handle);
+		err = ext3_reserve_inode_write(handle, inode, &iloc);
+		if (err)
+			return err;
+
+		inode->i_ctime = CURRENT_TIME;
+		inode->i_generation = generation;
+
+		err = ext3_mark_iloc_dirty(handle, inode, &iloc);
+		ext3_journal_stop(handle, inode);
+		return err;
+	}
+#ifdef CONFIG_JBD_DEBUG
+	case EXT3_IOC_WAIT_FOR_READONLY:
+		/*
+		 * This is racy - by the time we're woken up and running,
+		 * the superblock could be released.  And the module could
+		 * have been unloaded.  So sue me.
+		 *
+		 * Returns 1 if it slept, else zero.
+		 */
+		{
+			struct super_block *sb = inode->i_sb;
+			DECLARE_WAITQUEUE(wait, current);
+			int ret = 0;
+
+			set_current_state(TASK_INTERRUPTIBLE);
+			add_wait_queue(&sb->u.ext3_sb.ro_wait_queue, &wait);
+			if (timer_pending(&sb->u.ext3_sb.turn_ro_timer)) {
+				schedule();
+				ret = 1;
+			}
+			remove_wait_queue(&sb->u.ext3_sb.ro_wait_queue, &wait);
+			return ret;
+		}
+#endif
+	default:
+		return -ENOTTY;
+	}
+}
diff -urN linux-2.4.16-reiserfspatches-immutable/fs/fat/file.c~ linux-2.4.16-reiserfspatches-immutable-ctx4/fs/fat/file.c~
--- linux-2.4.16-reiserfspatches-immutable/fs/fat/file.c~	Thu Jan  1 01:00:00 1970
+++ linux-2.4.16-reiserfspatches-immutable-ctx4/fs/fat/file.c~	Mon Dec 10 14:28:03 2001
@@ -0,0 +1,136 @@
+/*
+ *  linux/fs/fat/file.c
+ *
+ *  Written 1992,1993 by Werner Almesberger
+ *
+ *  regular file handling primitives for fat-based filesystems
+ */
+
+#include <linux/sched.h>
+#include <linux/locks.h>
+#include <linux/fs.h>
+#include <linux/msdos_fs.h>
+#include <linux/errno.h>
+#include <linux/fcntl.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/pagemap.h>
+#include <linux/fat_cvf.h>
+
+#include <asm/uaccess.h>
+#include <asm/system.h>
+
+#define PRINTK(x)
+#define Printk(x) printk x
+
+struct file_operations fat_file_operations = {
+	llseek:		generic_file_llseek,
+	read:		fat_file_read,
+	write:		fat_file_write,
+	mmap:		generic_file_mmap,
+	fsync:		file_fsync,
+};
+
+struct inode_operations fat_file_inode_operations = {
+	truncate:	fat_truncate,
+	setattr:	fat_notify_change,
+};
+
+ssize_t fat_file_read(
+	struct file *filp,
+	char *buf,
+	size_t count,
+	loff_t *ppos)
+{
+	struct inode *inode = filp->f_dentry->d_inode;
+	return MSDOS_SB(inode->i_sb)->cvf_format
+			->cvf_file_read(filp,buf,count,ppos);
+}
+
+
+int fat_get_block(struct inode *inode, long iblock, struct buffer_head *bh_result, int create)
+{
+	struct super_block *sb = inode->i_sb;
+	unsigned long phys;
+
+	phys = fat_bmap(inode, iblock);
+	if (phys) {
+		bh_result->b_dev = inode->i_dev;
+		bh_result->b_blocknr = phys;
+		bh_result->b_state |= (1UL << BH_Mapped);
+		return 0;
+	}
+	if (!create)
+		return 0;
+	if (iblock << sb->s_blocksize_bits != MSDOS_I(inode)->mmu_private) {
+		BUG();
+		return -EIO;
+	}
+	if (!(iblock % MSDOS_SB(inode->i_sb)->cluster_size)) {
+		if (fat_add_cluster(inode) < 0)
+			return -ENOSPC;
+	}
+	MSDOS_I(inode)->mmu_private += sb->s_blocksize;
+	phys = fat_bmap(inode, iblock);
+	if (!phys)
+		BUG();
+	bh_result->b_dev = inode->i_dev;
+	bh_result->b_blocknr = phys;
+	bh_result->b_state |= (1UL << BH_Mapped);
+	bh_result->b_state |= (1UL << BH_New);
+	return 0;
+}
+
+ssize_t fat_file_write(
+	struct file *filp,
+	const char *buf,
+	size_t count,
+	loff_t *ppos)
+{
+	struct inode *inode = filp->f_dentry->d_inode;
+	struct super_block *sb = inode->i_sb;
+	return MSDOS_SB(sb)->cvf_format
+			->cvf_file_write(filp,buf,count,ppos);
+}
+
+ssize_t default_fat_file_write(
+	struct file *filp,
+	const char *buf,
+	size_t count,
+	loff_t *ppos)
+{
+	struct inode *inode = filp->f_dentry->d_inode;
+	int retval;
+
+	retval = generic_file_write(filp, buf, count, ppos);
+	if (retval > 0) {
+		inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+		MSDOS_I(inode)->i_attrs |= ATTR_ARCH;
+		mark_inode_dirty(inode);
+	}
+	return retval;
+}
+
+void fat_truncate(struct inode *inode)
+{
+	struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
+	int cluster;
+
+	/* Why no return value?  Surely the disk could fail... */
+	if (IS_RDONLY (inode))
+		return /* -EPERM */;
+	if (IS_IMMUTABLE_FILE(inode))
+		return /* -EPERM */;
+	cluster = 1 << sbi->cluster_bits;
+	/* 
+	 * This protects against truncating a file bigger than it was then
+	 * trying to write into the hole.
+	 */
+	if (MSDOS_I(inode)->mmu_private > inode->i_size)
+		MSDOS_I(inode)->mmu_private = inode->i_size;
+
+	fat_free(inode, (inode->i_size + (cluster - 1)) >> sbi->cluster_bits);
+	MSDOS_I(inode)->i_attrs |= ATTR_ARCH;
+	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+	mark_inode_dirty(inode);
+}
diff -urN linux-2.4.16-reiserfspatches-immutable/fs/fat/inode.c~ linux-2.4.16-reiserfspatches-immutable-ctx4/fs/fat/inode.c~
--- linux-2.4.16-reiserfspatches-immutable/fs/fat/inode.c~	Thu Jan  1 01:00:00 1970
+++ linux-2.4.16-reiserfspatches-immutable-ctx4/fs/fat/inode.c~	Mon Dec 10 14:28:03 2001
@@ -0,0 +1,1061 @@
+/*
+ *  linux/fs/fat/inode.c
+ *
+ *  Written 1992,1993 by Werner Almesberger
+ *  VFAT extensions by Gordon Chaffee, merged with msdos fs by Henrik Storner
+ *  Rewritten for the constant inumbers support by Al Viro
+ *
+ *  Fixes:
+ *
+ *  	Max Cohan: Fixed invalid FSINFO offset when info_sector is 0
+ */
+
+#include <linux/module.h>
+#include <linux/msdos_fs.h>
+#include <linux/nls.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/bitops.h>
+#include <linux/major.h>
+#include <linux/blkdev.h>
+#include <linux/fs.h>
+#include <linux/stat.h>
+#include <linux/locks.h>
+#include <linux/fat_cvf.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+
+#include <asm/uaccess.h>
+#include <asm/unaligned.h>
+
+extern struct cvf_format default_cvf;
+
+/* #define FAT_PARANOIA 1 */
+#define DEBUG_LEVEL 0
+#ifdef FAT_DEBUG
+#  define PRINTK(x) printk x
+#else
+#  define PRINTK(x)
+#endif
+#if (DEBUG_LEVEL >= 1)
+#  define PRINTK1(x) printk x
+#else
+#  define PRINTK1(x)
+#endif
+
+/*
+ * New FAT inode stuff. We do the following:
+ *	a) i_ino is constant and has nothing with on-disk location.
+ *	b) FAT manages its own cache of directory entries.
+ *	c) *This* cache is indexed by on-disk location.
+ *	d) inode has an associated directory entry, all right, but
+ *		it may be unhashed.
+ *	e) currently entries are stored within struct inode. That should
+ *		change.
+ *	f) we deal with races in the following way:
+ *		1. readdir() and lookup() do FAT-dir-cache lookup.
+ *		2. rename() unhashes the F-d-c entry and rehashes it in
+ *			a new place.
+ *		3. unlink() and rmdir() unhash F-d-c entry.
+ *		4. fat_write_inode() checks whether the thing is unhashed.
+ *			If it is we silently return. If it isn't we do bread(),
+ *			check if the location is still valid and retry if it
+ *			isn't. Otherwise we do changes.
+ *		5. Spinlock is used to protect hash/unhash/location check/lookup
+ *		6. fat_clear_inode() unhashes the F-d-c entry.
+ *		7. lookup() and readdir() do igrab() if they find a F-d-c entry
+ *			and consider negative result as cache miss.
+ */
+
+#define FAT_HASH_BITS	8
+#define FAT_HASH_SIZE	(1UL << FAT_HASH_BITS)
+#define FAT_HASH_MASK	(FAT_HASH_SIZE-1)
+static struct list_head fat_inode_hashtable[FAT_HASH_SIZE];
+spinlock_t fat_inode_lock = SPIN_LOCK_UNLOCKED;
+
+void fat_hash_init(void)
+{
+	int i;
+	for(i = 0; i < FAT_HASH_SIZE; i++) {
+		INIT_LIST_HEAD(&fat_inode_hashtable[i]);
+	}
+}
+
+static inline unsigned long fat_hash(struct super_block *sb, int i_pos)
+{
+	unsigned long tmp = (unsigned long)i_pos | (unsigned long) sb;
+	tmp = tmp + (tmp >> FAT_HASH_BITS) + (tmp >> FAT_HASH_BITS * 2);
+	return tmp & FAT_HASH_MASK;
+}
+
+void fat_attach(struct inode *inode, int i_pos)
+{
+	spin_lock(&fat_inode_lock);
+	MSDOS_I(inode)->i_location = i_pos;
+	list_add(&MSDOS_I(inode)->i_fat_hash,
+		fat_inode_hashtable + fat_hash(inode->i_sb, i_pos));
+	spin_unlock(&fat_inode_lock);
+}
+
+void fat_detach(struct inode *inode)
+{
+	spin_lock(&fat_inode_lock);
+	MSDOS_I(inode)->i_location = 0;
+	list_del(&MSDOS_I(inode)->i_fat_hash);
+	INIT_LIST_HEAD(&MSDOS_I(inode)->i_fat_hash);
+	spin_unlock(&fat_inode_lock);
+}
+
+struct inode *fat_iget(struct super_block *sb, int i_pos)
+{
+	struct list_head *p = fat_inode_hashtable + fat_hash(sb, i_pos);
+	struct list_head *walk;
+	struct msdos_inode_info *i;
+	struct inode *inode = NULL;
+
+	spin_lock(&fat_inode_lock);
+	list_for_each(walk, p) {
+		i = list_entry(walk, struct msdos_inode_info, i_fat_hash);
+		if (i->i_fat_inode->i_sb != sb)
+			continue;
+		if (i->i_location != i_pos)
+			continue;
+		inode = igrab(i->i_fat_inode);
+		if (inode)
+			break;
+	}
+	spin_unlock(&fat_inode_lock);
+	return inode;
+}
+
+static void fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de);
+
+struct inode *fat_build_inode(struct super_block *sb,
+				struct msdos_dir_entry *de, int ino, int *res)
+{
+	struct inode *inode;
+	*res = 0;
+	inode = fat_iget(sb, ino);
+	if (inode)
+		goto out;
+	inode = new_inode(sb);
+	*res = -ENOMEM;
+	if (!inode)
+		goto out;
+	*res = 0;
+	inode->i_ino = iunique(sb, MSDOS_ROOT_INO);
+	fat_fill_inode(inode, de);
+	fat_attach(inode, ino);
+	insert_inode_hash(inode);
+out:
+	return inode;
+}
+
+void fat_delete_inode(struct inode *inode)
+{
+	if (!is_bad_inode(inode)) {
+		lock_kernel();
+		inode->i_size = 0;
+		fat_truncate(inode);
+		unlock_kernel();
+	}
+	clear_inode(inode);
+}
+
+void fat_clear_inode(struct inode *inode)
+{
+	if (is_bad_inode(inode))
+		return;
+	lock_kernel();
+	spin_lock(&fat_inode_lock);
+	fat_cache_inval_inode(inode);
+	list_del(&MSDOS_I(inode)->i_fat_hash);
+	spin_unlock(&fat_inode_lock);
+	unlock_kernel();
+}
+
+void fat_put_super(struct super_block *sb)
+{
+	if (MSDOS_SB(sb)->cvf_format->cvf_version) {
+		dec_cvf_format_use_count_by_version(MSDOS_SB(sb)->cvf_format->cvf_version);
+		MSDOS_SB(sb)->cvf_format->unmount_cvf(sb);
+	}
+	if (MSDOS_SB(sb)->fat_bits == 32) {
+		fat_clusters_flush(sb);
+	}
+	fat_cache_inval_dev(sb->s_dev);
+	set_blocksize (sb->s_dev,BLOCK_SIZE);
+	if (MSDOS_SB(sb)->nls_disk) {
+		unload_nls(MSDOS_SB(sb)->nls_disk);
+		MSDOS_SB(sb)->nls_disk = NULL;
+		MSDOS_SB(sb)->options.codepage = 0;
+	}
+	if (MSDOS_SB(sb)->nls_io) {
+		unload_nls(MSDOS_SB(sb)->nls_io);
+		MSDOS_SB(sb)->nls_io = NULL;
+	}
+	/*
+	 * Note: the iocharset option might have been specified
+	 * without enabling nls_io, so check for it here.
+	 */
+	if (MSDOS_SB(sb)->options.iocharset) {
+		kfree(MSDOS_SB(sb)->options.iocharset);
+		MSDOS_SB(sb)->options.iocharset = NULL;
+	}
+}
+
+
+static int parse_options(char *options,int *fat, int *debug,
+			 struct fat_mount_options *opts,
+			 char *cvf_format, char *cvf_options)
+{
+	char *this_char,*value,save,*savep;
+	char *p;
+	int ret = 1, len;
+
+	opts->name_check = 'n';
+	opts->conversion = 'b';
+	opts->fs_uid = current->uid;
+	opts->fs_gid = current->gid;
+	opts->fs_umask = current->fs->umask;
+	opts->quiet = opts->sys_immutable = opts->dotsOK = opts->showexec = 0;
+	opts->codepage = 0;
+	opts->nocase = 0;
+	opts->shortname = 0;
+	opts->utf8 = 0;
+	opts->iocharset = NULL;
+	*debug = *fat = 0;
+
+	if (!options)
+		goto out;
+	save = 0;
+	savep = NULL;
+	for (this_char = strtok(options,","); this_char;
+	     this_char = strtok(NULL,",")) {
+		if ((value = strchr(this_char,'=')) != NULL) {
+			save = *value;
+			savep = value;
+			*value++ = 0;
+		}
+		if (!strcmp(this_char,"check") && value) {
+			if (value[0] && !value[1] && strchr("rns",*value))
+				opts->name_check = *value;
+			else if (!strcmp(value,"relaxed"))
+				opts->name_check = 'r';
+			else if (!strcmp(value,"normal"))
+				opts->name_check = 'n';
+			else if (!strcmp(value,"strict"))
+				opts->name_check = 's';
+			else ret = 0;
+		}
+		else if (!strcmp(this_char,"conv") && value) {
+			if (value[0] && !value[1] && strchr("bta",*value))
+				opts->conversion = *value;
+			else if (!strcmp(value,"binary"))
+				opts->conversion = 'b';
+			else if (!strcmp(value,"text"))
+				opts->conversion = 't';
+			else if (!strcmp(value,"auto"))
+				opts->conversion = 'a';
+			else ret = 0;
+		}
+		else if (!strcmp(this_char,"dots")) {
+			opts->dotsOK = 1;
+		}
+		else if (!strcmp(this_char,"nocase")) {
+			opts->nocase = 1;
+		}
+		else if (!strcmp(this_char,"nodots")) {
+			opts->dotsOK = 0;
+		}
+		else if (!strcmp(this_char,"showexec")) {
+			opts->showexec = 1;
+		}
+		else if (!strcmp(this_char,"dotsOK") && value) {
+			if (!strcmp(value,"yes")) opts->dotsOK = 1;
+			else if (!strcmp(value,"no")) opts->dotsOK = 0;
+			else ret = 0;
+		}
+		else if (!strcmp(this_char,"uid")) {
+			if (!value || !*value) ret = 0;
+			else {
+				opts->fs_uid = simple_strtoul(value,&value,0);
+				if (*value) ret = 0;
+			}
+		}
+		else if (!strcmp(this_char,"gid")) {
+			if (!value || !*value) ret= 0;
+			else {
+				opts->fs_gid = simple_strtoul(value,&value,0);
+				if (*value) ret = 0;
+			}
+		}
+		else if (!strcmp(this_char,"umask")) {
+			if (!value || !*value) ret = 0;
+			else {
+				opts->fs_umask = simple_strtoul(value,&value,8);
+				if (*value) ret = 0;
+			}
+		}
+		else if (!strcmp(this_char,"debug")) {
+			if (value) ret = 0;
+			else *debug = 1;
+		}
+		else if (!strcmp(this_char,"fat")) {
+			if (!value || !*value) ret = 0;
+			else {
+				*fat = simple_strtoul(value,&value,0);
+				if (*value || (*fat != 12 && *fat != 16 &&
+					       *fat != 32)) 
+					ret = 0;
+			}
+		}
+		else if (!strcmp(this_char,"quiet")) {
+			if (value) ret = 0;
+			else opts->quiet = 1;
+		}
+		else if (!strcmp(this_char,"blocksize")) {
+			printk("FAT: blocksize option is obsolete, "
+			       "not supported now\n");
+		}
+		else if (!strcmp(this_char,"sys_immutable")) {
+			if (value) ret = 0;
+			else opts->sys_immutable = 1;
+		}
+		else if (!strcmp(this_char,"codepage") && value) {
+			opts->codepage = simple_strtoul(value,&value,0);
+			if (*value) ret = 0;
+			else printk ("MSDOS FS: Using codepage %d\n",
+					opts->codepage);
+		}
+		else if (!strcmp(this_char,"iocharset") && value) {
+			p = value;
+			while (*value && *value != ',')
+				value++;
+			len = value - p;
+			if (len) {
+				char *buffer;
+
+				if (opts->iocharset != NULL) {
+					kfree(opts->iocharset);
+					opts->iocharset = NULL;
+				}
+				buffer = kmalloc(len + 1, GFP_KERNEL);
+				if (buffer != NULL) {
+					opts->iocharset = buffer;
+					memcpy(buffer, p, len);
+					buffer[len] = 0;
+					printk("MSDOS FS: IO charset %s\n", buffer);
+				} else
+					ret = 0;
+			}
+		}
+		else if (!strcmp(this_char,"cvf_format")) {
+			if (!value)
+				return 0;
+			strncpy(cvf_format,value,20);
+		}
+		else if (!strcmp(this_char,"cvf_options")) {
+			if (!value)
+				return 0;
+			strncpy(cvf_options,value,100);
+		}
+
+		if (this_char != options) *(this_char-1) = ',';
+		if (value) *savep = save;
+		if (ret == 0)
+			break;
+	}
+out:
+	return ret;
+}
+
+static void fat_read_root(struct inode *inode)
+{
+	struct super_block *sb = inode->i_sb;
+	struct msdos_sb_info *sbi = MSDOS_SB(sb);
+	int nr;
+
+	INIT_LIST_HEAD(&MSDOS_I(inode)->i_fat_hash);
+	MSDOS_I(inode)->i_location = 0;
+	MSDOS_I(inode)->i_fat_inode = inode;
+	inode->i_uid = sbi->options.fs_uid;
+	inode->i_gid = sbi->options.fs_gid;
+	inode->i_version = ++event;
+	inode->i_generation = 0;
+	inode->i_mode = (S_IRWXUGO & ~sbi->options.fs_umask) | S_IFDIR;
+	inode->i_op = sbi->dir_ops;
+	inode->i_fop = &fat_dir_operations;
+	if (sbi->fat_bits == 32) {
+		MSDOS_I(inode)->i_start = sbi->root_cluster;
+		if ((nr = MSDOS_I(inode)->i_start) != 0) {
+			while (nr != -1) {
+				inode->i_size += 1 << sbi->cluster_bits;
+				if (!(nr = fat_access(sb, nr, -1))) {
+					printk("Directory %ld: bad FAT\n",
+					       inode->i_ino);
+					break;
+				}
+			}
+		}
+	} else {
+		MSDOS_I(inode)->i_start = 0;
+		inode->i_size = sbi->dir_entries * sizeof(struct msdos_dir_entry);
+	}
+	inode->i_blksize = 1 << sbi->cluster_bits;
+	inode->i_blocks = ((inode->i_size + inode->i_blksize - 1)
+			   & ~(inode->i_blksize - 1)) / 512;
+	MSDOS_I(inode)->i_logstart = 0;
+	MSDOS_I(inode)->mmu_private = inode->i_size;
+
+	MSDOS_I(inode)->i_attrs = 0;
+	inode->i_mtime = inode->i_atime = inode->i_ctime = 0;
+	MSDOS_I(inode)->i_ctime_ms = 0;
+	inode->i_nlink = fat_subdirs(inode)+2;
+}
+
+/*
+ * a FAT file handle with fhtype 3 is
+ *  0/  i_ino - for fast, reliable lookup if still in the cache
+ *  1/  i_generation - to see if i_ino is still valid
+ *          bit 0 == 0 iff directory
+ *  2/  i_location - if ino has changed, but still in cache
+ *  3/  i_logstart - to semi-verify inode found at i_location
+ *  4/  parent->i_logstart - maybe used to hunt for the file on disc
+ *
+ */
+struct dentry *fat_fh_to_dentry(struct super_block *sb, __u32 *fh,
+				int len, int fhtype, int parent)
+{
+	struct inode *inode = NULL;
+	struct list_head *lp;
+	struct dentry *result;
+
+	if (fhtype != 3)
+		return NULL;
+	if (len < 5)
+		return NULL;
+	if (parent)
+		return NULL; /* We cannot find the parent,
+				It better just *be* there */
+
+	inode = iget(sb, fh[0]);
+	if (!inode || is_bad_inode(inode) ||
+	    inode->i_generation != fh[1]) {
+		if (inode) iput(inode);
+		inode = NULL;
+	}
+	if (!inode) {
+		/* try 2 - see if i_location is in F-d-c
+		 * require i_logstart to be the same
+		 * Will fail if you truncate and then re-write
+		 */
+
+		inode = fat_iget(sb, fh[2]);
+		if (inode && MSDOS_I(inode)->i_logstart != fh[3]) {
+			iput(inode);
+			inode = NULL;
+		}
+	}
+	if (!inode) {
+		/* For now, do nothing
+		 * What we could do is:
+		 * follow the file starting at fh[4], and record
+		 * the ".." entry, and the name of the fh[2] entry.
+		 * The follow the ".." file finding the next step up.
+		 * This way we build a path to the root of
+		 * the tree. If this works, we lookup the path and so
+		 * get this inode into the cache.
+		 * Finally try the fat_iget lookup again
+		 * If that fails, then weare totally out of luck
+		 * But all that is for another day
+		 */
+	}
+	if (!inode)
+		return ERR_PTR(-ESTALE);
+
+	
+	/* now to find a dentry.
+	 * If possible, get a well-connected one
+	 *
+	 * Given the way that we found the inode, it *MUST* be
+	 * well-connected, but it is easiest to just copy the
+	 * code.
+	 */
+	spin_lock(&dcache_lock);
+	for (lp = inode->i_dentry.next; lp != &inode->i_dentry ; lp=lp->next) {
+		result = list_entry(lp,struct dentry, d_alias);
+		if (! (result->d_flags & DCACHE_NFSD_DISCONNECTED)) {
+			dget_locked(result);
+			result->d_vfs_flags |= DCACHE_REFERENCED;
+			spin_unlock(&dcache_lock);
+			iput(inode);
+			return result;
+		}
+	}
+	spin_unlock(&dcache_lock);
+	result = d_alloc_root(inode);
+	if (result == NULL) {
+		iput(inode);
+		return ERR_PTR(-ENOMEM);
+	}
+	result->d_flags |= DCACHE_NFSD_DISCONNECTED;
+	return result;
+
+		
+}
+
+int fat_dentry_to_fh(struct dentry *de, __u32 *fh, int *lenp, int needparent)
+{
+	int len = *lenp;
+	struct inode *inode =  de->d_inode;
+	
+	if (len < 5)
+		return 255; /* no room */
+	*lenp = 5;
+	fh[0] = inode->i_ino;
+	fh[1] = inode->i_generation;
+	fh[2] = MSDOS_I(inode)->i_location;
+	fh[3] = MSDOS_I(inode)->i_logstart;
+	fh[4] = MSDOS_I(de->d_parent->d_inode)->i_logstart;
+	return 3;
+}
+
+static struct super_operations fat_sops = { 
+	write_inode:	fat_write_inode,
+	delete_inode:	fat_delete_inode,
+	put_super:	fat_put_super,
+	statfs:		fat_statfs,
+	clear_inode:	fat_clear_inode,
+
+	read_inode:	make_bad_inode,
+	fh_to_dentry:	fat_fh_to_dentry,
+	dentry_to_fh:	fat_dentry_to_fh,
+};
+
+/*
+ * Read the super block of an MS-DOS FS.
+ *
+ * Note that this may be called from vfat_read_super
+ * with some fields already initialized.
+ */
+struct super_block *
+fat_read_super(struct super_block *sb, void *data, int silent,
+		struct inode_operations *fs_dir_inode_ops)
+{
+	struct inode *root_inode;
+	struct buffer_head *bh;
+	struct fat_boot_sector *b;
+	struct msdos_sb_info *sbi = MSDOS_SB(sb);
+	char *p;
+	int logical_sector_size, hard_blksize, fat_clusters = 0;
+	unsigned int total_sectors, rootdir_sectors;
+	int fat32, debug, error, fat, cp;
+	struct fat_mount_options opts;
+	char buf[50];
+	int i;
+	char cvf_format[21];
+	char cvf_options[101];
+
+	cvf_format[0] = '\0';
+	cvf_options[0] = '\0';
+	sbi->cvf_format = NULL;
+	sbi->private_data = NULL;
+
+	sbi->dir_ops = fs_dir_inode_ops;
+
+	sb->s_maxbytes = MAX_NON_LFS;
+	sb->s_op = &fat_sops;
+
+	hard_blksize = get_hardsect_size(sb->s_dev);
+	if (!hard_blksize)
+		hard_blksize = 512;
+
+	opts.isvfat = sbi->options.isvfat;
+	if (!parse_options((char *) data, &fat, &debug, &opts,
+			   cvf_format, cvf_options))
+		goto out_fail;
+	/* N.B. we should parse directly into the sb structure */
+	memcpy(&(sbi->options), &opts, sizeof(struct fat_mount_options));
+
+	fat_cache_init();
+
+	sb->s_blocksize = hard_blksize;
+	set_blocksize(sb->s_dev, hard_blksize);
+	bh = bread(sb->s_dev, 0, sb->s_blocksize);
+	if (bh == NULL) {
+		printk("FAT: unable to read boot sector\n");
+		goto out_fail;
+	}
+
+/*
+ * The DOS3 partition size limit is *not* 32M as many people think.  
+ * Instead, it is 64K sectors (with the usual sector size being
+ * 512 bytes, leading to a 32M limit).
+ * 
+ * DOS 3 partition managers got around this problem by faking a 
+ * larger sector size, ie treating multiple physical sectors as 
+ * a single logical sector.
+ * 
+ * We can accommodate this scheme by adjusting our cluster size,
+ * fat_start, and data_start by an appropriate value.
+ *
+ * (by Drew Eckhardt)
+ */
+
+
+	b = (struct fat_boot_sector *) bh->b_data;
+	logical_sector_size =
+		CF_LE_W(get_unaligned((unsigned short *) &b->sector_size));
+	if (!logical_sector_size
+	    || (logical_sector_size & (logical_sector_size - 1))) {
+		printk("FAT: bogus logical sector size %d\n",
+		       logical_sector_size);
+		brelse(bh);
+		goto out_invalid;
+	}
+
+	sbi->cluster_size = b->cluster_size;
+	if (!sbi->cluster_size
+	    || (sbi->cluster_size & (sbi->cluster_size - 1))) {
+		printk("FAT: bogus cluster size %d\n", sbi->cluster_size);
+		brelse(bh);
+		goto out_invalid;
+	}
+
+	if (logical_sector_size < hard_blksize) {
+		printk("FAT: logical sector size too small for device"
+		       " (logical sector size = %d)\n", logical_sector_size);
+		brelse(bh);
+		goto out_invalid;
+	}
+
+	sbi->cluster_bits = ffs(logical_sector_size * sbi->cluster_size) - 1;
+	sbi->fats = b->fats;
+	sbi->fat_start = CF_LE_W(b->reserved);
+	if (!b->fat_length && b->fat32_length) {
+		struct fat_boot_fsinfo *fsinfo;
+		struct buffer_head *fsinfo_bh;
+		int fsinfo_block, fsinfo_offset;
+
+		/* Must be FAT32 */
+		fat32 = 1;
+		sbi->fat_length = CF_LE_L(b->fat32_length);
+		sbi->root_cluster = CF_LE_L(b->root_cluster);
+
+		sbi->fsinfo_sector = CF_LE_W(b->info_sector);
+		/* MC - if info_sector is 0, don't multiply by 0 */
+		if (sbi->fsinfo_sector == 0)
+			sbi->fsinfo_sector = 1;
+
+		fsinfo_block =
+			(sbi->fsinfo_sector * logical_sector_size) / hard_blksize;
+		fsinfo_offset =
+			(sbi->fsinfo_sector * logical_sector_size) % hard_blksize;
+		fsinfo_bh = bh;
+		if (fsinfo_block != 0) {
+			fsinfo_bh = bread(sb->s_dev, fsinfo_block, hard_blksize);
+			if (fsinfo_bh == NULL) {
+				printk("FAT: bread failed, FSINFO block"
+				       " (blocknr = %d)\n", fsinfo_block);
+				brelse(bh);
+				goto out_invalid;
+			}
+		}
+		fsinfo = (struct fat_boot_fsinfo *)&fsinfo_bh->b_data[fsinfo_offset];
+		if (!IS_FSINFO(fsinfo)) {
+			printk("FAT: Did not find valid FSINFO signature.\n"
+			       "Found signature1 0x%x signature2 0x%x sector=%ld.\n",
+			       CF_LE_L(fsinfo->signature1),
+			       CF_LE_L(fsinfo->signature2),
+			       sbi->fsinfo_sector);
+		} else {
+			sbi->free_clusters = CF_LE_L(fsinfo->free_clusters);
+		}
+
+		if (fsinfo_block != 0)
+			brelse(fsinfo_bh);
+	} else {
+		fat32 = 0;
+		sbi->fat_length = CF_LE_W(b->fat_length);
+		sbi->root_cluster = 0;
+		sbi->free_clusters = -1; /* Don't know yet */
+	}
+
+	sbi->dir_per_block = logical_sector_size / sizeof(struct msdos_dir_entry);
+	sbi->dir_per_block_bits = ffs(sbi->dir_per_block) - 1;
+
+	sbi->dir_start = sbi->fat_start + sbi->fats * sbi->fat_length;
+	sbi->dir_entries =
+		CF_LE_W(get_unaligned((unsigned short *)&b->dir_entries));
+	rootdir_sectors = sbi->dir_entries
+		* sizeof(struct msdos_dir_entry) / logical_sector_size;
+	sbi->data_start = sbi->dir_start + rootdir_sectors;
+	total_sectors = CF_LE_W(get_unaligned((unsigned short *)&b->sectors));
+	if (total_sectors == 0)
+		total_sectors = CF_LE_L(b->total_sect);
+	sbi->clusters = (total_sectors - sbi->data_start) / sbi->cluster_size;
+
+	error = 0;
+	if (!error) {
+		sbi->fat_bits = fat32 ? 32 :
+			(fat ? fat :
+			 (sbi->clusters > MSDOS_FAT12 ? 16 : 12));
+		fat_clusters =
+			sbi->fat_length * logical_sector_size * 8 / sbi->fat_bits;
+		error = !sbi->fats || (sbi->dir_entries & (sbi->dir_per_block - 1))
+			|| sbi->clusters + 2 > fat_clusters + MSDOS_MAX_EXTRA
+			|| logical_sector_size < 512
+			|| PAGE_CACHE_SIZE < logical_sector_size
+			|| !b->secs_track || !b->heads;
+	}
+	brelse(bh);
+
+	if (error)
+		goto out_invalid;
+
+	sb->s_blocksize = logical_sector_size;
+	sb->s_blocksize_bits = ffs(logical_sector_size) - 1;
+	set_blocksize(sb->s_dev, sb->s_blocksize);
+	sbi->cvf_format = &default_cvf;
+	if (!strcmp(cvf_format, "none"))
+		i = -1;
+	else
+		i = detect_cvf(sb,cvf_format);
+	if (i >= 0)
+		error = cvf_formats[i]->mount_cvf(sb, cvf_options);
+	if (error || debug) {
+		/* The MSDOS_CAN_BMAP is obsolete, but left just to remember */
+		printk("[MS-DOS FS Rel. 12,FAT %d,check=%c,conv=%c,"
+		       "uid=%d,gid=%d,umask=%03o%s]\n",
+		       sbi->fat_bits,opts.name_check,
+		       opts.conversion,opts.fs_uid,opts.fs_gid,opts.fs_umask,
+		       MSDOS_CAN_BMAP(sbi) ? ",bmap" : "");
+		printk("[me=0x%x,cs=%d,#f=%d,fs=%d,fl=%ld,ds=%ld,de=%d,data=%ld,"
+		       "se=%u,ts=%u,ls=%d,rc=%ld,fc=%u]\n",
+		       b->media, sbi->cluster_size, sbi->fats,
+		       sbi->fat_start, sbi->fat_length, sbi->dir_start,
+		       sbi->dir_entries, sbi->data_start,
+		       CF_LE_W(get_unaligned((unsigned short *)&b->sectors)),
+		       CF_LE_L(b->total_sect), logical_sector_size,
+		       sbi->root_cluster, sbi->free_clusters);
+		printk ("hard sector size = %d\n", hard_blksize);
+	}
+	if (i < 0)
+		if (sbi->clusters + 2 > fat_clusters)
+			sbi->clusters = fat_clusters - 2;
+	if (error)
+		goto out_invalid;
+
+	sb->s_magic = MSDOS_SUPER_MAGIC;
+	/* set up enough so that it can read an inode */
+	init_MUTEX(&sbi->fat_lock);
+	sbi->prev_free = 0;
+
+	cp = opts.codepage ? opts.codepage : 437;
+	sprintf(buf, "cp%d", cp);
+	sbi->nls_disk = load_nls(buf);
+	if (! sbi->nls_disk) {
+		/* Fail only if explicit charset specified */
+		if (opts.codepage != 0)
+			goto out_fail;
+		sbi->options.codepage = 0; /* already 0?? */
+		sbi->nls_disk = load_nls_default();
+	}
+
+	sbi->nls_io = NULL;
+	if (sbi->options.isvfat && !opts.utf8) {
+		p = opts.iocharset ? opts.iocharset : CONFIG_NLS_DEFAULT;
+		sbi->nls_io = load_nls(p);
+		if (! sbi->nls_io)
+			/* Fail only if explicit charset specified */
+			if (opts.iocharset)
+				goto out_unload_nls;
+	}
+	if (! sbi->nls_io)
+		sbi->nls_io = load_nls_default();
+
+	root_inode = new_inode(sb);
+	if (!root_inode)
+		goto out_unload_nls;
+	root_inode->i_ino = MSDOS_ROOT_INO;
+	fat_read_root(root_inode);
+	insert_inode_hash(root_inode);
+	sb->s_root = d_alloc_root(root_inode);
+	if (!sb->s_root)
+		goto out_no_root;
+	if(i >= 0) {
+		sbi->cvf_format = cvf_formats[i];
+		++cvf_format_use_count[i];
+	}
+	return sb;
+
+out_no_root:
+	printk("FAT: get root inode failed\n");
+	iput(root_inode);
+	unload_nls(sbi->nls_io);
+out_unload_nls:
+	unload_nls(sbi->nls_disk);
+	goto out_fail;
+out_invalid:
+	if (!silent) {
+		printk("VFS: Can't find a valid FAT filesystem on dev %s.\n",
+			kdevname(sb->s_dev));
+	}
+out_fail:
+	if (opts.iocharset) {
+		printk("FAT: freeing iocharset=%s\n", opts.iocharset);
+		kfree(opts.iocharset);
+	}
+	if(sbi->private_data)
+		kfree(sbi->private_data);
+	sbi->private_data = NULL;
+ 
+	return NULL;
+}
+
+int fat_statfs(struct super_block *sb,struct statfs *buf)
+{
+	int free,nr;
+       
+	if (MSDOS_SB(sb)->cvf_format &&
+	    MSDOS_SB(sb)->cvf_format->cvf_statfs)
+		return MSDOS_SB(sb)->cvf_format->cvf_statfs(sb,buf,
+						sizeof(struct statfs));
+	  
+	lock_fat(sb);
+	if (MSDOS_SB(sb)->free_clusters != -1)
+		free = MSDOS_SB(sb)->free_clusters;
+	else {
+		free = 0;
+		for (nr = 2; nr < MSDOS_SB(sb)->clusters+2; nr++)
+			if (!fat_access(sb,nr,-1)) free++;
+		MSDOS_SB(sb)->free_clusters = free;
+	}
+	unlock_fat(sb);
+	buf->f_type = sb->s_magic;
+	buf->f_bsize = 1 << MSDOS_SB(sb)->cluster_bits;
+	buf->f_blocks = MSDOS_SB(sb)->clusters;
+	buf->f_bfree = free;
+	buf->f_bavail = free;
+	buf->f_namelen = MSDOS_SB(sb)->options.isvfat ? 260 : 12;
+	return 0;
+}
+
+static int is_exec(char *extension)
+{
+	char *exe_extensions = "EXECOMBAT", *walk;
+
+	for (walk = exe_extensions; *walk; walk += 3)
+		if (!strncmp(extension, walk, 3))
+			return 1;
+	return 0;
+}
+
+static int fat_writepage(struct page *page)
+{
+	return block_write_full_page(page,fat_get_block);
+}
+static int fat_readpage(struct file *file, struct page *page)
+{
+	return block_read_full_page(page,fat_get_block);
+}
+static int fat_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
+{
+	return cont_prepare_write(page,from,to,fat_get_block,
+		&MSDOS_I(page->mapping->host)->mmu_private);
+}
+static int _fat_bmap(struct address_space *mapping, long block)
+{
+	return generic_block_bmap(mapping,block,fat_get_block);
+}
+static struct address_space_operations fat_aops = {
+	readpage: fat_readpage,
+	writepage: fat_writepage,
+	sync_page: block_sync_page,
+	prepare_write: fat_prepare_write,
+	commit_write: generic_commit_write,
+	bmap: _fat_bmap
+};
+
+/* doesn't deal with root inode */
+static void fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
+{
+	struct super_block *sb = inode->i_sb;
+	struct msdos_sb_info *sbi = MSDOS_SB(sb);
+	int nr;
+
+	INIT_LIST_HEAD(&MSDOS_I(inode)->i_fat_hash);
+	MSDOS_I(inode)->i_location = 0;
+	MSDOS_I(inode)->i_fat_inode = inode;
+	inode->i_uid = sbi->options.fs_uid;
+	inode->i_gid = sbi->options.fs_gid;
+	inode->i_version = ++event;
+	inode->i_generation = CURRENT_TIME;
+	
+	if ((de->attr & ATTR_DIR) && !IS_FREE(de->name)) {
+		inode->i_generation &= ~1;
+		inode->i_mode = MSDOS_MKMODE(de->attr,S_IRWXUGO &
+		    ~sbi->options.fs_umask) | S_IFDIR;
+		inode->i_op = sbi->dir_ops;
+		inode->i_fop = &fat_dir_operations;
+
+		MSDOS_I(inode)->i_start = CF_LE_W(de->start);
+		if (sbi->fat_bits == 32) {
+			MSDOS_I(inode)->i_start |=
+				(CF_LE_W(de->starthi) << 16);
+		}
+		MSDOS_I(inode)->i_logstart = MSDOS_I(inode)->i_start;
+		inode->i_nlink = fat_subdirs(inode);
+		    /* includes .., compensating for "self" */
+#ifdef DEBUG
+		if (!inode->i_nlink) {
+			printk("directory %d: i_nlink == 0\n",inode->i_ino);
+			inode->i_nlink = 1;
+		}
+#endif
+		if ((nr = MSDOS_I(inode)->i_start) != 0)
+			while (nr != -1) {
+				inode->i_size += 1 << sbi->cluster_bits;
+				if (!(nr = fat_access(sb, nr, -1))) {
+					printk("Directory %ld: bad FAT\n",
+					    inode->i_ino);
+					break;
+				}
+			}
+		MSDOS_I(inode)->mmu_private = inode->i_size;
+	} else { /* not a directory */
+		inode->i_generation |= 1;
+		inode->i_mode = MSDOS_MKMODE(de->attr,
+		    ((sbi->options.showexec &&
+		       !is_exec(de->ext))
+		    	? S_IRUGO|S_IWUGO : S_IRWXUGO)
+		    & ~sbi->options.fs_umask) | S_IFREG;
+		MSDOS_I(inode)->i_start = CF_LE_W(de->start);
+		if (sbi->fat_bits == 32) {
+			MSDOS_I(inode)->i_start |=
+				(CF_LE_W(de->starthi) << 16);
+		}
+		MSDOS_I(inode)->i_logstart = MSDOS_I(inode)->i_start;
+		inode->i_size = CF_LE_L(de->size);
+	        inode->i_op = &fat_file_inode_operations;
+	        inode->i_fop = &fat_file_operations;
+		inode->i_mapping->a_ops = &fat_aops;
+		MSDOS_I(inode)->mmu_private = inode->i_size;
+	}
+	if(de->attr & ATTR_SYS)
+		if (sbi->options.sys_immutable)
+			inode->i_flags |= S_IMMUTABLE_FILE;
+	MSDOS_I(inode)->i_attrs = de->attr & ATTR_UNUSED;
+	/* this is as close to the truth as we can get ... */
+	inode->i_blksize = 1 << sbi->cluster_bits;
+	inode->i_blocks = ((inode->i_size + inode->i_blksize - 1)
+			   & ~(inode->i_blksize - 1)) / 512;
+	inode->i_mtime = inode->i_atime =
+		date_dos2unix(CF_LE_W(de->time),CF_LE_W(de->date));
+	inode->i_ctime =
+		MSDOS_SB(sb)->options.isvfat
+		? date_dos2unix(CF_LE_W(de->ctime),CF_LE_W(de->cdate))
+		: inode->i_mtime;
+	MSDOS_I(inode)->i_ctime_ms = de->ctime_ms;
+}
+
+void fat_write_inode(struct inode *inode, int wait)
+{
+	struct super_block *sb = inode->i_sb;
+	struct buffer_head *bh;
+	struct msdos_dir_entry *raw_entry;
+	unsigned int i_pos;
+
+retry:
+	i_pos = MSDOS_I(inode)->i_location;
+	if (inode->i_ino == MSDOS_ROOT_INO || !i_pos) {
+		return;
+	}
+	lock_kernel();
+	if (!(bh = fat_bread(sb, i_pos >> MSDOS_SB(sb)->dir_per_block_bits))) {
+		printk("dev = %s, ino = %d\n", kdevname(inode->i_dev), i_pos);
+		fat_fs_panic(sb, "msdos_write_inode: unable to read i-node block");
+		unlock_kernel();
+		return;
+	}
+	spin_lock(&fat_inode_lock);
+	if (i_pos != MSDOS_I(inode)->i_location) {
+		spin_unlock(&fat_inode_lock);
+		fat_brelse(sb, bh);
+		unlock_kernel();
+		goto retry;
+	}
+
+	raw_entry = &((struct msdos_dir_entry *) (bh->b_data))
+	    [i_pos & (MSDOS_SB(sb)->dir_per_block - 1)];
+	if (S_ISDIR(inode->i_mode)) {
+		raw_entry->attr = ATTR_DIR;
+		raw_entry->size = 0;
+	}
+	else {
+		raw_entry->attr = ATTR_NONE;
+		raw_entry->size = CT_LE_L(inode->i_size);
+	}
+	raw_entry->attr |= MSDOS_MKATTR(inode->i_mode) |
+	    MSDOS_I(inode)->i_attrs;
+	raw_entry->start = CT_LE_W(MSDOS_I(inode)->i_logstart);
+	raw_entry->starthi = CT_LE_W(MSDOS_I(inode)->i_logstart >> 16);
+	fat_date_unix2dos(inode->i_mtime,&raw_entry->time,&raw_entry->date);
+	raw_entry->time = CT_LE_W(raw_entry->time);
+	raw_entry->date = CT_LE_W(raw_entry->date);
+	if (MSDOS_SB(sb)->options.isvfat) {
+		fat_date_unix2dos(inode->i_ctime,&raw_entry->ctime,&raw_entry->cdate);
+		raw_entry->ctime_ms = MSDOS_I(inode)->i_ctime_ms;
+		raw_entry->ctime = CT_LE_W(raw_entry->ctime);
+		raw_entry->cdate = CT_LE_W(raw_entry->cdate);
+	}
+	spin_unlock(&fat_inode_lock);
+	fat_mark_buffer_dirty(sb, bh);
+	fat_brelse(sb, bh);
+	unlock_kernel();
+}
+
+
+int fat_notify_change(struct dentry * dentry, struct iattr * attr)
+{
+	struct super_block *sb = dentry->d_sb;
+	struct inode *inode = dentry->d_inode;
+	int error;
+
+	/* FAT cannot truncate to a longer file */
+	if (attr->ia_valid & ATTR_SIZE) {
+		if (attr->ia_size > inode->i_size)
+			return -EPERM;
+	}
+
+	error = inode_change_ok(inode, attr);
+	if (error)
+		return MSDOS_SB(sb)->options.quiet ? 0 : error;
+
+	if (((attr->ia_valid & ATTR_UID) && 
+	     (attr->ia_uid != MSDOS_SB(sb)->options.fs_uid)) ||
+	    ((attr->ia_valid & ATTR_GID) && 
+	     (attr->ia_gid != MSDOS_SB(sb)->options.fs_gid)) ||
+	    ((attr->ia_valid & ATTR_MODE) &&
+	     (attr->ia_mode & ~MSDOS_VALID_MODE)))
+		error = -EPERM;
+
+	if (error)
+		return MSDOS_SB(sb)->options.quiet ? 0 : error;
+
+	error = inode_setattr(inode, attr);
+	if (error)
+		return error;
+
+	if (S_ISDIR(inode->i_mode))
+		inode->i_mode |= S_IXUGO;
+
+	inode->i_mode = ((inode->i_mode & S_IFMT) | ((((inode->i_mode & S_IRWXU
+	    & ~MSDOS_SB(sb)->options.fs_umask) | S_IRUSR) >> 6)*S_IXUGO)) &
+	    ~MSDOS_SB(sb)->options.fs_umask;
+	return 0;
+}
+MODULE_LICENSE("GPL");
diff -urN linux-2.4.16-reiserfspatches-immutable/fs/hpfs/file.c~ linux-2.4.16-reiserfspatches-immutable-ctx4/fs/hpfs/file.c~
--- linux-2.4.16-reiserfspatches-immutable/fs/hpfs/file.c~	Thu Jan  1 01:00:00 1970
+++ linux-2.4.16-reiserfspatches-immutable-ctx4/fs/hpfs/file.c~	Mon Dec 10 14:28:03 2001
@@ -0,0 +1,136 @@
+/*
+ *  linux/fs/hpfs/file.c
+ *
+ *  Mikulas Patocka (mikulas@artax.karlin.mff.cuni.cz), 1998-1999
+ *
+ *  file VFS functions
+ */
+
+#include <linux/string.h>
+#include <linux/sched.h>
+#include <linux/smp_lock.h>
+#include "hpfs_fn.h"
+
+#define BLOCKS(size) (((size) + 511) >> 9)
+
+/* HUH? */
+int hpfs_open(struct inode *i, struct file *f)
+{
+	lock_kernel();
+	hpfs_lock_inode(i);
+	hpfs_unlock_inode(i); /* make sure nobody is deleting the file */
+	unlock_kernel();
+	if (!i->i_nlink) return -ENOENT;
+	return 0;
+}
+
+int hpfs_file_release(struct inode *inode, struct file *file)
+{
+	lock_kernel();
+	hpfs_write_if_changed(inode);
+	unlock_kernel();
+	return 0;
+}
+
+int hpfs_file_fsync(struct file *file, struct dentry *dentry, int datasync)
+{
+	/*return file_fsync(file, dentry);*/
+	return 0; /* Don't fsync :-) */
+}
+
+/*
+ * generic_file_read often calls bmap with non-existing sector,
+ * so we must ignore such errors.
+ */
+
+secno hpfs_bmap(struct inode *inode, unsigned file_secno)
+{
+	unsigned n, disk_secno;
+	struct fnode *fnode;
+	struct buffer_head *bh;
+	if (BLOCKS(inode->u.hpfs_i.mmu_private) <= file_secno) return 0;
+	n = file_secno - inode->i_hpfs_file_sec;
+	if (n < inode->i_hpfs_n_secs) return inode->i_hpfs_disk_sec + n;
+	if (!(fnode = hpfs_map_fnode(inode->i_sb, inode->i_ino, &bh))) return 0;
+	disk_secno = hpfs_bplus_lookup(inode->i_sb, inode, &fnode->btree, file_secno, bh);
+	if (disk_secno == -1) return 0;
+	if (hpfs_chk_sectors(inode->i_sb, disk_secno, 1, "bmap")) return 0;
+	return disk_secno;
+}
+
+void hpfs_truncate(struct inode *i)
+{
+	if (IS_IMMUTABLE_FILE(i)) return /*-EPERM*/;
+	i->i_hpfs_n_secs = 0;
+	i->i_blocks = 1 + ((i->i_size + 511) >> 9);
+	i->u.hpfs_i.mmu_private = i->i_size;
+	hpfs_truncate_btree(i->i_sb, i->i_ino, 1, ((i->i_size + 511) >> 9));
+	hpfs_write_inode(i);
+}
+
+int hpfs_get_block(struct inode *inode, long iblock, struct buffer_head *bh_result, int create)
+{
+	secno s;
+	s = hpfs_bmap(inode, iblock);
+	if (s) {
+		bh_result->b_dev = inode->i_dev;
+		bh_result->b_blocknr = s;
+		bh_result->b_state |= (1UL << BH_Mapped);
+		return 0;
+	}
+	if (!create) return 0;
+	if (iblock<<9 != inode->u.hpfs_i.mmu_private) {
+		BUG();
+		return -EIO;
+	}
+	if ((s = hpfs_add_sector_to_btree(inode->i_sb, inode->i_ino, 1, inode->i_blocks - 1)) == -1) {
+		hpfs_truncate_btree(inode->i_sb, inode->i_ino, 1, inode->i_blocks - 1);
+		return -ENOSPC;
+	}
+	inode->i_blocks++;
+	inode->u.hpfs_i.mmu_private += 512;
+	bh_result->b_dev = inode->i_dev;
+	bh_result->b_blocknr = s;
+	bh_result->b_state |= (1UL << BH_Mapped) | (1UL << BH_New);
+	return 0;
+}
+
+static int hpfs_writepage(struct page *page)
+{
+	return block_write_full_page(page,hpfs_get_block);
+}
+static int hpfs_readpage(struct file *file, struct page *page)
+{
+	return block_read_full_page(page,hpfs_get_block);
+}
+static int hpfs_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
+{
+	return cont_prepare_write(page,from,to,hpfs_get_block,
+		&page->mapping->host->u.hpfs_i.mmu_private);
+}
+static int _hpfs_bmap(struct address_space *mapping, long block)
+{
+	return generic_block_bmap(mapping,block,hpfs_get_block);
+}
+struct address_space_operations hpfs_aops = {
+	readpage: hpfs_readpage,
+	writepage: hpfs_writepage,
+	sync_page: block_sync_page,
+	prepare_write: hpfs_prepare_write,
+	commit_write: generic_commit_write,
+	bmap: _hpfs_bmap
+};
+
+ssize_t hpfs_file_write(struct file *file, const char *buf, size_t count, loff_t *ppos)
+{
+	ssize_t retval;
+
+	retval = generic_file_write(file, buf, count, ppos);
+	if (retval > 0) {
+		struct inode *inode = file->f_dentry->d_inode;
+		inode->i_mtime = CURRENT_TIME;
+		inode->i_hpfs_dirty = 1;
+	}
+	return retval;
+}
+
diff -urN linux-2.4.16-reiserfspatches-immutable/fs/namei.c~ linux-2.4.16-reiserfspatches-immutable-ctx4/fs/namei.c~
--- linux-2.4.16-reiserfspatches-immutable/fs/namei.c~	Thu Jan  1 01:00:00 1970
+++ linux-2.4.16-reiserfspatches-immutable-ctx4/fs/namei.c~	Mon Dec 10 14:28:03 2001
@@ -0,0 +1,2033 @@
+/*
+ *  linux/fs/namei.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+/*
+ * Some corrections by tytso.
+ */
+
+/* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname
+ * lookup logic.
+ */
+/* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture.
+ */
+
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/quotaops.h>
+#include <linux/pagemap.h>
+#include <linux/dnotify.h>
+#include <linux/smp_lock.h>
+#include <linux/personality.h>
+
+#include <asm/namei.h>
+#include <asm/uaccess.h>
+
+#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
+
+/* [Feb-1997 T. Schoebel-Theuer]
+ * Fundamental changes in the pathname lookup mechanisms (namei)
+ * were necessary because of omirr.  The reason is that omirr needs
+ * to know the _real_ pathname, not the user-supplied one, in case
+ * of symlinks (and also when transname replacements occur).
+ *
+ * The new code replaces the old recursive symlink resolution with
+ * an iterative one (in case of non-nested symlink chains).  It does
+ * this with calls to <fs>_follow_link().
+ * As a side effect, dir_namei(), _namei() and follow_link() are now 
+ * replaced with a single function lookup_dentry() that can handle all 
+ * the special cases of the former code.
+ *
+ * With the new dcache, the pathname is stored at each inode, at least as
+ * long as the refcount of the inode is positive.  As a side effect, the
+ * size of the dcache depends on the inode cache and thus is dynamic.
+ *
+ * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink
+ * resolution to correspond with current state of the code.
+ *
+ * Note that the symlink resolution is not *completely* iterative.
+ * There is still a significant amount of tail- and mid- recursion in
+ * the algorithm.  Also, note that <fs>_readlink() is not used in
+ * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink()
+ * may return different results than <fs>_follow_link().  Many virtual
+ * filesystems (including /proc) exhibit this behavior.
+ */
+
+/* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation:
+ * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL
+ * and the name already exists in form of a symlink, try to create the new
+ * name indicated by the symlink. The old code always complained that the
+ * name already exists, due to not following the symlink even if its target
+ * is nonexistent.  The new semantics affects also mknod() and link() when
+ * the name is a symlink pointing to a non-existant name.
+ *
+ * I don't know which semantics is the right one, since I have no access
+ * to standards. But I found by trial that HP-UX 9.0 has the full "new"
+ * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the
+ * "old" one. Personally, I think the new semantics is much more logical.
+ * Note that "ln old new" where "new" is a symlink pointing to a non-existing
+ * file does succeed in both HP-UX and SunOs, but not in Solaris
+ * and in the old Linux semantics.
+ */
+
+/* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink
+ * semantics.  See the comments in "open_namei" and "do_link" below.
+ *
+ * [10-Sep-98 Alan Modra] Another symlink change.
+ */
+
+/* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks:
+ *	inside the path - always follow.
+ *	in the last component in creation/removal/renaming - never follow.
+ *	if LOOKUP_FOLLOW passed - follow.
+ *	if the pathname has trailing slashes - follow.
+ *	otherwise - don't follow.
+ * (applied in that order).
+ *
+ * [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT
+ * restored for 2.4. This is the last surviving part of old 4.2BSD bug.
+ * During the 2.4 we need to fix the userland stuff depending on it -
+ * hopefully we will be able to get rid of that wart in 2.5. So far only
+ * XEmacs seems to be relying on it...
+ */
+
+/* In order to reduce some races, while at the same time doing additional
+ * checking and hopefully speeding things up, we copy filenames to the
+ * kernel data space before using them..
+ *
+ * POSIX.1 2.4: an empty pathname is invalid (ENOENT).
+ */
+static inline int do_getname(const char *filename, char *page)
+{
+	int retval;
+	unsigned long len = PATH_MAX + 1;
+
+	if ((unsigned long) filename >= TASK_SIZE) {
+		if (!segment_eq(get_fs(), KERNEL_DS))
+			return -EFAULT;
+	} else if (TASK_SIZE - (unsigned long) filename < PATH_MAX + 1)
+		len = TASK_SIZE - (unsigned long) filename;
+
+	retval = strncpy_from_user((char *)page, filename, len);
+	if (retval > 0) {
+		if (retval < len)
+			return 0;
+		return -ENAMETOOLONG;
+	} else if (!retval)
+		retval = -ENOENT;
+	return retval;
+}
+
+char * getname(const char * filename)
+{
+	char *tmp, *result;
+
+	result = ERR_PTR(-ENOMEM);
+	tmp = __getname();
+	if (tmp)  {
+		int retval = do_getname(filename, tmp);
+
+		result = tmp;
+		if (retval < 0) {
+			putname(tmp);
+			result = ERR_PTR(retval);
+		}
+	}
+	return result;
+}
+
+/*
+ *	vfs_permission()
+ *
+ * is used to check for read/write/execute permissions on a file.
+ * We use "fsuid" for this, letting us set arbitrary permissions
+ * for filesystem access without changing the "normal" uids which
+ * are used for other things..
+ */
+int vfs_permission(struct inode * inode, int mask)
+{
+	umode_t			mode = inode->i_mode;
+
+	if (mask & MAY_WRITE) {
+		/*
+		 * Nobody gets write access to a read-only fs.
+		 */
+		if (IS_RDONLY(inode) &&
+		    (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
+			return -EROFS;
+
+		/*
+		 * Nobody gets write access to an immutable file.
+		 */
+		if (IS_IMMUTABLE_FILE(inode))
+			return -EACCES;
+	}
+
+	if (current->fsuid == inode->i_uid)
+		mode >>= 6;
+	else if (in_group_p(inode->i_gid))
+		mode >>= 3;
+
+	/*
+	 * If the DACs are ok we don't need any capability check.
+	 */
+	if (((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask))
+		return 0;
+
+	/*
+	 * Read/write DACs are always overridable.
+	 * Executable DACs are overridable if at least one exec bit is set.
+	 */
+	if ((mask & (MAY_READ|MAY_WRITE)) || (inode->i_mode & S_IXUGO))
+		if (capable(CAP_DAC_OVERRIDE))
+			return 0;
+
+	/*
+	 * Searching includes executable on directories, else just read.
+	 */
+	if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE)))
+		if (capable(CAP_DAC_READ_SEARCH))
+			return 0;
+
+	return -EACCES;
+}
+
+int permission(struct inode * inode,int mask)
+{
+	if (inode->i_op && inode->i_op->permission) {
+		int retval;
+		lock_kernel();
+		retval = inode->i_op->permission(inode, mask);
+		unlock_kernel();
+		return retval;
+	}
+	return vfs_permission(inode, mask);
+}
+
+/*
+ * get_write_access() gets write permission for a file.
+ * put_write_access() releases this write permission.
+ * This is used for regular files.
+ * We cannot support write (and maybe mmap read-write shared) accesses and
+ * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode
+ * can have the following values:
+ * 0: no writers, no VM_DENYWRITE mappings
+ * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist
+ * > 0: (i_writecount) users are writing to the file.
+ *
+ * Normally we operate on that counter with atomic_{inc,dec} and it's safe
+ * except for the cases where we don't hold i_writecount yet. Then we need to
+ * use {get,deny}_write_access() - these functions check the sign and refuse
+ * to do the change if sign is wrong. Exclusion between them is provided by
+ * spinlock (arbitration_lock) and I'll rip the second arsehole to the first
+ * who will try to move it in struct inode - just leave it here.
+ */
+static spinlock_t arbitration_lock = SPIN_LOCK_UNLOCKED;
+int get_write_access(struct inode * inode)
+{
+	spin_lock(&arbitration_lock);
+	if (atomic_read(&inode->i_writecount) < 0) {
+		spin_unlock(&arbitration_lock);
+		return -ETXTBSY;
+	}
+	atomic_inc(&inode->i_writecount);
+	spin_unlock(&arbitration_lock);
+	return 0;
+}
+int deny_write_access(struct file * file)
+{
+	spin_lock(&arbitration_lock);
+	if (atomic_read(&file->f_dentry->d_inode->i_writecount) > 0) {
+		spin_unlock(&arbitration_lock);
+		return -ETXTBSY;
+	}
+	atomic_dec(&file->f_dentry->d_inode->i_writecount);
+	spin_unlock(&arbitration_lock);
+	return 0;
+}
+
+void path_release(struct nameidata *nd)
+{
+	dput(nd->dentry);
+	mntput(nd->mnt);
+}
+
+/*
+ * Internal lookup() using the new generic dcache.
+ * SMP-safe
+ */
+static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags)
+{
+	struct dentry * dentry = d_lookup(parent, name);
+
+	if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
+		if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
+			dput(dentry);
+			dentry = NULL;
+		}
+	}
+	return dentry;
+}
+
+/*
+ * This is called when everything else fails, and we actually have
+ * to go to the low-level filesystem to find out what we should do..
+ *
+ * We get the directory semaphore, and after getting that we also
+ * make sure that nobody added the entry to the dcache in the meantime..
+ * SMP-safe
+ */
+static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags)
+{
+	struct dentry * result;
+	struct inode *dir = parent->d_inode;
+
+	down(&dir->i_sem);
+	/*
+	 * First re-do the cached lookup just in case it was created
+	 * while we waited for the directory semaphore..
+	 *
+	 * FIXME! This could use version numbering or similar to
+	 * avoid unnecessary cache lookups.
+	 */
+	result = d_lookup(parent, name);
+	if (!result) {
+		struct dentry * dentry = d_alloc(parent, name);
+		result = ERR_PTR(-ENOMEM);
+		if (dentry) {
+			lock_kernel();
+			result = dir->i_op->lookup(dir, dentry);
+			unlock_kernel();
+			if (result)
+				dput(dentry);
+			else
+				result = dentry;
+		}
+		up(&dir->i_sem);
+		return result;
+	}
+
+	/*
+	 * Uhhuh! Nasty case: the cache was re-populated while
+	 * we waited on the semaphore. Need to revalidate.
+	 */
+	up(&dir->i_sem);
+	if (result->d_op && result->d_op->d_revalidate) {
+		if (!result->d_op->d_revalidate(result, flags) && !d_invalidate(result)) {
+			dput(result);
+			result = ERR_PTR(-ENOENT);
+		}
+	}
+	return result;
+}
+
+/*
+ * This limits recursive symlink follows to 8, while
+ * limiting consecutive symlinks to 40.
+ *
+ * Without that kind of total limit, nasty chains of consecutive
+ * symlinks can cause almost arbitrarily long lookups. 
+ */
+static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+	int err;
+	if (current->link_count >= 5)
+		goto loop;
+	if (current->total_link_count >= 40)
+		goto loop;
+	if (current->need_resched) {
+		current->state = TASK_RUNNING;
+		schedule();
+	}
+	current->link_count++;
+	current->total_link_count++;
+	UPDATE_ATIME(dentry->d_inode);
+	err = dentry->d_inode->i_op->follow_link(dentry, nd);
+	current->link_count--;
+	return err;
+loop:
+	path_release(nd);
+	return -ELOOP;
+}
+
+static inline int __follow_up(struct vfsmount **mnt, struct dentry **base)
+{
+	struct vfsmount *parent;
+	struct dentry *dentry;
+	spin_lock(&dcache_lock);
+	parent=(*mnt)->mnt_parent;
+	if (parent == *mnt) {
+		spin_unlock(&dcache_lock);
+		return 0;
+	}
+	mntget(parent);
+	dentry=dget((*mnt)->mnt_mountpoint);
+	spin_unlock(&dcache_lock);
+	dput(*base);
+	*base = dentry;
+	mntput(*mnt);
+	*mnt = parent;
+	return 1;
+}
+
+int follow_up(struct vfsmount **mnt, struct dentry **dentry)
+{
+	return __follow_up(mnt, dentry);
+}
+
+static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry)
+{
+	struct vfsmount *mounted;
+
+	spin_lock(&dcache_lock);
+	mounted = lookup_mnt(*mnt, *dentry);
+	if (mounted) {
+		*mnt = mntget(mounted);
+		spin_unlock(&dcache_lock);
+		dput(*dentry);
+		mntput(mounted->mnt_parent);
+		*dentry = dget(mounted->mnt_root);
+		return 1;
+	}
+	spin_unlock(&dcache_lock);
+	return 0;
+}
+
+int follow_down(struct vfsmount **mnt, struct dentry **dentry)
+{
+	return __follow_down(mnt,dentry);
+}
+ 
+static inline void follow_dotdot(struct nameidata *nd)
+{
+	while(1) {
+		struct vfsmount *parent;
+		struct dentry *dentry;
+		read_lock(&current->fs->lock);
+		if (nd->dentry == current->fs->root &&
+		    nd->mnt == current->fs->rootmnt)  {
+			read_unlock(&current->fs->lock);
+			break;
+		}
+		read_unlock(&current->fs->lock);
+		spin_lock(&dcache_lock);
+		if (nd->dentry != nd->mnt->mnt_root) {
+			dentry = dget(nd->dentry->d_parent);
+			spin_unlock(&dcache_lock);
+			dput(nd->dentry);
+			nd->dentry = dentry;
+			break;
+		}
+		parent=nd->mnt->mnt_parent;
+		if (parent == nd->mnt) {
+			spin_unlock(&dcache_lock);
+			break;
+		}
+		mntget(parent);
+		dentry=dget(nd->mnt->mnt_mountpoint);
+		spin_unlock(&dcache_lock);
+		dput(nd->dentry);
+		nd->dentry = dentry;
+		mntput(nd->mnt);
+		nd->mnt = parent;
+	}
+}
+
+/*
+ * Name resolution.
+ *
+ * This is the basic name resolution function, turning a pathname
+ * into the final dentry.
+ *
+ * We expect 'base' to be positive and a directory.
+ */
+int link_path_walk(const char * name, struct nameidata *nd)
+{
+	struct dentry *dentry;
+	struct inode *inode;
+	int err;
+	unsigned int lookup_flags = nd->flags;
+
+	while (*name=='/')
+		name++;
+	if (!*name)
+		goto return_base;
+
+	inode = nd->dentry->d_inode;
+	if (current->link_count)
+		lookup_flags = LOOKUP_FOLLOW;
+
+	/* At this point we know we have a real path component. */
+	for(;;) {
+		unsigned long hash;
+		struct qstr this;
+		unsigned int c;
+
+		err = permission(inode, MAY_EXEC);
+		dentry = ERR_PTR(err);
+ 		if (err)
+			break;
+
+		this.name = name;
+		c = *(const unsigned char *)name;
+
+		hash = init_name_hash();
+		do {
+			name++;
+			hash = partial_name_hash(c, hash);
+			c = *(const unsigned char *)name;
+		} while (c && (c != '/'));
+		this.len = name - (const char *) this.name;
+		this.hash = end_name_hash(hash);
+
+		/* remove trailing slashes? */
+		if (!c)
+			goto last_component;
+		while (*++name == '/');
+		if (!*name)
+			goto last_with_slashes;
+
+		/*
+		 * "." and ".." are special - ".." especially so because it has
+		 * to be able to know about the current root directory and
+		 * parent relationships.
+		 */
+		if (this.name[0] == '.') switch (this.len) {
+			default:
+				break;
+			case 2:	
+				if (this.name[1] != '.')
+					break;
+				follow_dotdot(nd);
+				inode = nd->dentry->d_inode;
+				/* fallthrough */
+			case 1:
+				continue;
+		}
+		/*
+		 * See if the low-level filesystem might want
+		 * to use its own hash..
+		 */
+		if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
+			err = nd->dentry->d_op->d_hash(nd->dentry, &this);
+			if (err < 0)
+				break;
+		}
+		/* This does the actual lookups.. */
+		dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
+		if (!dentry) {
+			dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
+			err = PTR_ERR(dentry);
+			if (IS_ERR(dentry))
+				break;
+		}
+		/* Check mountpoints.. */
+		while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
+			;
+
+		err = -ENOENT;
+		inode = dentry->d_inode;
+		if (!inode)
+			goto out_dput;
+		err = -ENOTDIR; 
+		if (!inode->i_op)
+			goto out_dput;
+
+		if (inode->i_op->follow_link) {
+			err = do_follow_link(dentry, nd);
+			dput(dentry);
+			if (err)
+				goto return_err;
+			err = -ENOENT;
+			inode = nd->dentry->d_inode;
+			if (!inode)
+				break;
+			err = -ENOTDIR; 
+			if (!inode->i_op)
+				break;
+		} else {
+			dput(nd->dentry);
+			nd->dentry = dentry;
+		}
+		err = -ENOTDIR; 
+		if (!inode->i_op->lookup)
+			break;
+		continue;
+		/* here ends the main loop */
+
+last_with_slashes:
+		lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
+last_component:
+		if (lookup_flags & LOOKUP_PARENT)
+			goto lookup_parent;
+		if (this.name[0] == '.') switch (this.len) {
+			default:
+				break;
+			case 2:	
+				if (this.name[1] != '.')
+					break;
+				follow_dotdot(nd);
+				inode = nd->dentry->d_inode;
+				/* fallthrough */
+			case 1:
+				goto return_base;
+		}
+		if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
+			err = nd->dentry->d_op->d_hash(nd->dentry, &this);
+			if (err < 0)
+				break;
+		}
+		dentry = cached_lookup(nd->dentry, &this, 0);
+		if (!dentry) {
+			dentry = real_lookup(nd->dentry, &this, 0);
+			err = PTR_ERR(dentry);
+			if (IS_ERR(dentry))
+				break;
+		}
+		while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
+			;
+		inode = dentry->d_inode;
+		if ((lookup_flags & LOOKUP_FOLLOW)
+		    && inode && inode->i_op && inode->i_op->follow_link) {
+			err = do_follow_link(dentry, nd);
+			dput(dentry);
+			if (err)
+				goto return_err;
+			inode = nd->dentry->d_inode;
+		} else {
+			dput(nd->dentry);
+			nd->dentry = dentry;
+		}
+		err = -ENOENT;
+		if (!inode)
+			goto no_inode;
+		if (lookup_flags & LOOKUP_DIRECTORY) {
+			err = -ENOTDIR; 
+			if (!inode->i_op || !inode->i_op->lookup)
+				break;
+		}
+		goto return_base;
+no_inode:
+		err = -ENOENT;
+		if (lookup_flags & (LOOKUP_POSITIVE|LOOKUP_DIRECTORY))
+			break;
+		goto return_base;
+lookup_parent:
+		nd->last = this;
+		nd->last_type = LAST_NORM;
+		if (this.name[0] != '.')
+			goto return_base;
+		if (this.len == 1)
+			nd->last_type = LAST_DOT;
+		else if (this.len == 2 && this.name[1] == '.')
+			nd->last_type = LAST_DOTDOT;
+return_base:
+		return 0;
+out_dput:
+		dput(dentry);
+		break;
+	}
+	path_release(nd);
+return_err:
+	return err;
+}
+
+int path_walk(const char * name, struct nameidata *nd)
+{
+	current->total_link_count = 0;
+	return link_path_walk(name, nd);
+}
+
+/* SMP-safe */
+/* returns 1 if everything is done */
+static int __emul_lookup_dentry(const char *name, struct nameidata *nd)
+{
+	if (path_walk(name, nd))
+		return 0;		/* something went wrong... */
+
+	if (!nd->dentry->d_inode || S_ISDIR(nd->dentry->d_inode->i_mode)) {
+		struct nameidata nd_root;
+		/*
+		 * NAME was not found in alternate root or it's a directory.  Try to find
+		 * it in the normal root:
+		 */
+		nd_root.last_type = LAST_ROOT;
+		nd_root.flags = nd->flags;
+		read_lock(&current->fs->lock);
+		nd_root.mnt = mntget(current->fs->rootmnt);
+		nd_root.dentry = dget(current->fs->root);
+		read_unlock(&current->fs->lock);
+		if (path_walk(name, &nd_root))
+			return 1;
+		if (nd_root.dentry->d_inode) {
+			path_release(nd);
+			nd->dentry = nd_root.dentry;
+			nd->mnt = nd_root.mnt;
+			nd->last = nd_root.last;
+			return 1;
+		}
+		path_release(&nd_root);
+	}
+	return 1;
+}
+
+void set_fs_altroot(void)
+{
+	char *emul = __emul_prefix();
+	struct nameidata nd;
+	struct vfsmount *mnt = NULL, *oldmnt;
+	struct dentry *dentry = NULL, *olddentry;
+	if (emul) {
+		read_lock(&current->fs->lock);
+		nd.mnt = mntget(current->fs->rootmnt);
+		nd.dentry = dget(current->fs->root);
+		read_unlock(&current->fs->lock);
+		nd.flags = LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_POSITIVE;
+		if (path_walk(emul,&nd) == 0) {
+			mnt = nd.mnt;
+			dentry = nd.dentry;
+		}
+	}
+	write_lock(&current->fs->lock);
+	oldmnt = current->fs->altrootmnt;
+	olddentry = current->fs->altroot;
+	current->fs->altrootmnt = mnt;
+	current->fs->altroot = dentry;
+	write_unlock(&current->fs->lock);
+	if (olddentry) {
+		dput(olddentry);
+		mntput(oldmnt);
+	}
+}
+
+/* SMP-safe */
+static inline int
+walk_init_root(const char *name, struct nameidata *nd)
+{
+	read_lock(&current->fs->lock);
+	if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
+		nd->mnt = mntget(current->fs->altrootmnt);
+		nd->dentry = dget(current->fs->altroot);
+		read_unlock(&current->fs->lock);
+		if (__emul_lookup_dentry(name,nd))
+			return 0;
+		read_lock(&current->fs->lock);
+	}
+	nd->mnt = mntget(current->fs->rootmnt);
+	nd->dentry = dget(current->fs->root);
+	read_unlock(&current->fs->lock);
+	return 1;
+}
+
+/* SMP-safe */
+int path_init(const char *name, unsigned int flags, struct nameidata *nd)
+{
+	nd->last_type = LAST_ROOT; /* if there are only slashes... */
+	nd->flags = flags;
+	if (*name=='/')
+		return walk_init_root(name,nd);
+	read_lock(&current->fs->lock);
+	nd->mnt = mntget(current->fs->pwdmnt);
+	nd->dentry = dget(current->fs->pwd);
+	read_unlock(&current->fs->lock);
+	return 1;
+}
+
+/*
+ * Restricted form of lookup. Doesn't follow links, single-component only,
+ * needs parent already locked. Doesn't follow mounts.
+ * SMP-safe.
+ */
+struct dentry * lookup_hash(struct qstr *name, struct dentry * base)
+{
+	struct dentry * dentry;
+	struct inode *inode;
+	int err;
+
+	inode = base->d_inode;
+	err = permission(inode, MAY_EXEC);
+	dentry = ERR_PTR(err);
+	if (err)
+		goto out;
+
+	/*
+	 * See if the low-level filesystem might want
+	 * to use its own hash..
+	 */
+	if (base->d_op && base->d_op->d_hash) {
+		err = base->d_op->d_hash(base, name);
+		dentry = ERR_PTR(err);
+		if (err < 0)
+			goto out;
+	}
+
+	dentry = cached_lookup(base, name, 0);
+	if (!dentry) {
+		struct dentry *new = d_alloc(base, name);
+		dentry = ERR_PTR(-ENOMEM);
+		if (!new)
+			goto out;
+		lock_kernel();
+		dentry = inode->i_op->lookup(inode, new);
+		unlock_kernel();
+		if (!dentry)
+			dentry = new;
+		else
+			dput(new);
+	}
+out:
+	return dentry;
+}
+
+/* SMP-safe */
+struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
+{
+	unsigned long hash;
+	struct qstr this;
+	unsigned int c;
+
+	this.name = name;
+	this.len = len;
+	if (!len)
+		goto access;
+
+	hash = init_name_hash();
+	while (len--) {
+		c = *(const unsigned char *)name++;
+		if (c == '/' || c == '\0')
+			goto access;
+		hash = partial_name_hash(c, hash);
+	}
+	this.hash = end_name_hash(hash);
+
+	return lookup_hash(&this, base);
+access:
+	return ERR_PTR(-EACCES);
+}
+
+/*
+ *	namei()
+ *
+ * is used by most simple commands to get the inode of a specified name.
+ * Open, link etc use their own routines, but this is enough for things
+ * like 'chmod' etc.
+ *
+ * namei exists in two versions: namei/lnamei. The only difference is
+ * that namei follows links, while lnamei does not.
+ * SMP-safe
+ */
+int __user_walk(const char *name, unsigned flags, struct nameidata *nd)
+{
+	char *tmp;
+	int err;
+
+	tmp = getname(name);
+	err = PTR_ERR(tmp);
+	if (!IS_ERR(tmp)) {
+		err = 0;
+		if (path_init(tmp, flags, nd))
+			err = path_walk(tmp, nd);
+		putname(tmp);
+	}
+	return err;
+}
+
+/*
+ * It's inline, so penalty for filesystems that don't use sticky bit is
+ * minimal.
+ */
+static inline int check_sticky(struct inode *dir, struct inode *inode)
+{
+	if (!(dir->i_mode & S_ISVTX))
+		return 0;
+	if (inode->i_uid == current->fsuid)
+		return 0;
+	if (dir->i_uid == current->fsuid)
+		return 0;
+	return !capable(CAP_FOWNER);
+}
+
+/*
+ *	Check whether we can remove a link victim from directory dir, check
+ *  whether the type of victim is right.
+ *  1. We can't do it if dir is read-only (done in permission())
+ *  2. We should have write and exec permissions on dir
+ *  3. We can't remove anything from append-only dir
+ *  4. We can't do anything with immutable dir (done in permission())
+ *  5. If the sticky bit on dir is set we should either
+ *	a. be owner of dir, or
+ *	b. be owner of victim, or
+ *	c. have CAP_FOWNER capability
+ *  6. If the victim is append-only or immutable we can't do antyhing with
+ *     links pointing to it.
+ *  7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
+ *  8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
+ *  9. We can't remove a root or mountpoint.
+ */
+static inline int may_delete(struct inode *dir,struct dentry *victim, int isdir)
+{
+	int error;
+	if (!victim->d_inode || victim->d_parent->d_inode != dir)
+		return -ENOENT;
+	error = permission(dir,MAY_WRITE | MAY_EXEC);
+	if (error)
+		return error;
+	if (IS_APPEND(dir))
+		return -EPERM;
+	if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||IS_IMMUTABLE_LINK(victim->d_inode))
+		return -EPERM;
+	if (isdir) {
+		if (!S_ISDIR(victim->d_inode->i_mode))
+			return -ENOTDIR;
+		if (IS_ROOT(victim))
+			return -EBUSY;
+	} else if (S_ISDIR(victim->d_inode->i_mode))
+		return -EISDIR;
+	return 0;
+}
+
+/*	Check whether we can create an object with dentry child in directory
+ *  dir.
+ *  1. We can't do it if child already exists (open has special treatment for
+ *     this case, but since we are inlined it's OK)
+ *  2. We can't do it if dir is read-only (done in permission())
+ *  3. We should have write and exec permissions on dir
+ *  4. We can't do it if dir is immutable (done in permission())
+ */
+static inline int may_create(struct inode *dir, struct dentry *child) {
+	if (child->d_inode)
+		return -EEXIST;
+	if (IS_DEADDIR(dir))
+		return -ENOENT;
+	return permission(dir,MAY_WRITE | MAY_EXEC);
+}
+
+/* 
+ * Special case: O_CREAT|O_EXCL implies O_NOFOLLOW for security
+ * reasons.
+ *
+ * O_DIRECTORY translates into forcing a directory lookup.
+ */
+static inline int lookup_flags(unsigned int f)
+{
+	unsigned long retval = LOOKUP_FOLLOW;
+
+	if (f & O_NOFOLLOW)
+		retval &= ~LOOKUP_FOLLOW;
+	
+	if ((f & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
+		retval &= ~LOOKUP_FOLLOW;
+	
+	if (f & O_DIRECTORY)
+		retval |= LOOKUP_DIRECTORY;
+
+	return retval;
+}
+
+int vfs_create(struct inode *dir, struct dentry *dentry, int mode)
+{
+	int error;
+
+	mode &= S_IALLUGO;
+	mode |= S_IFREG;
+
+	down(&dir->i_zombie);
+	error = may_create(dir, dentry);
+	if (error)
+		goto exit_lock;
+
+	error = -EACCES;	/* shouldn't it be ENOSYS? */
+	if (!dir->i_op || !dir->i_op->create)
+		goto exit_lock;
+
+	DQUOT_INIT(dir);
+	lock_kernel();
+	error = dir->i_op->create(dir, dentry, mode);
+	unlock_kernel();
+exit_lock:
+	up(&dir->i_zombie);
+	if (!error)
+		inode_dir_notify(dir, DN_CREATE);
+	return error;
+}
+
+/*
+ *	open_namei()
+ *
+ * namei for open - this is in fact almost the whole open-routine.
+ *
+ * Note that the low bits of "flag" aren't the same as in the open
+ * system call - they are 00 - no permissions needed
+ *			  01 - read permission needed
+ *			  10 - write permission needed
+ *			  11 - read/write permissions needed
+ * which is a lot more logical, and also allows the "no perm" needed
+ * for symlinks (where the permissions are checked later).
+ * SMP-safe
+ */
+int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
+{
+	int acc_mode, error = 0;
+	struct inode *inode;
+	struct dentry *dentry;
+	struct dentry *dir;
+	int count = 0;
+
+	acc_mode = ACC_MODE(flag);
+
+	/*
+	 * The simplest case - just a plain lookup.
+	 */
+	if (!(flag & O_CREAT)) {
+		if (path_init(pathname, lookup_flags(flag), nd))
+			error = path_walk(pathname, nd);
+		if (error)
+			return error;
+		dentry = nd->dentry;
+		goto ok;
+	}
+
+	/*
+	 * Create - we need to know the parent.
+	 */
+	if (path_init(pathname, LOOKUP_PARENT, nd))
+		error = path_walk(pathname, nd);
+	if (error)
+		return error;
+
+	/*
+	 * We have the parent and last component. First of all, check
+	 * that we are not asked to creat(2) an obvious directory - that
+	 * will not do.
+	 */
+	error = -EISDIR;
+	if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len])
+		goto exit;
+
+	dir = nd->dentry;
+	down(&dir->d_inode->i_sem);
+	dentry = lookup_hash(&nd->last, nd->dentry);
+
+do_last:
+	error = PTR_ERR(dentry);
+	if (IS_ERR(dentry)) {
+		up(&dir->d_inode->i_sem);
+		goto exit;
+	}
+
+	/* Negative dentry, just create the file */
+	if (!dentry->d_inode) {
+		error = vfs_create(dir->d_inode, dentry,
+				   mode & ~current->fs->umask);
+		up(&dir->d_inode->i_sem);
+		dput(nd->dentry);
+		nd->dentry = dentry;
+		if (error)
+			goto exit;
+		/* Don't check for write permission, don't truncate */
+		acc_mode = 0;
+		flag &= ~O_TRUNC;
+		goto ok;
+	}
+
+	/*
+	 * It already exists.
+	 */
+	up(&dir->d_inode->i_sem);
+
+	error = -EEXIST;
+	if (flag & O_EXCL)
+		goto exit_dput;
+
+	if (d_mountpoint(dentry)) {
+		error = -ELOOP;
+		if (flag & O_NOFOLLOW)
+			goto exit_dput;
+		while (__follow_down(&nd->mnt,&dentry) && d_mountpoint(dentry));
+	}
+	error = -ENOENT;
+	if (!dentry->d_inode)
+		goto exit_dput;
+	if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link)
+		goto do_link;
+
+	dput(nd->dentry);
+	nd->dentry = dentry;
+	error = -EISDIR;
+	if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode))
+		goto exit;
+ok:
+	error = -ENOENT;
+	inode = dentry->d_inode;
+	if (!inode)
+		goto exit;
+
+	error = -ELOOP;
+	if (S_ISLNK(inode->i_mode))
+		goto exit;
+	
+	error = -EISDIR;
+	if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE))
+		goto exit;
+
+	error = permission(inode,acc_mode);
+	if (error)
+		goto exit;
+
+	/*
+	 * FIFO's, sockets and device files are special: they don't
+	 * actually live on the filesystem itself, and as such you
+	 * can write to them even if the filesystem is read-only.
+	 */
+	if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
+	    	flag &= ~O_TRUNC;
+	} else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) {
+		error = -EACCES;
+		if (nd->mnt->mnt_flags & MNT_NODEV)
+			goto exit;
+
+		flag &= ~O_TRUNC;
+	} else {
+		error = -EROFS;
+		if (IS_RDONLY(inode) && (flag & 2))
+			goto exit;
+	}
+	/*
+	 * An append-only file must be opened in append mode for writing.
+	 */
+	error = -EPERM;
+	if (IS_APPEND(inode)) {
+		if  ((flag & FMODE_WRITE) && !(flag & O_APPEND))
+			goto exit;
+		if (flag & O_TRUNC)
+			goto exit;
+	}
+
+	/*
+	 * Ensure there are no outstanding leases on the file.
+	 */
+	error = get_lease(inode, flag);
+	if (error)
+		goto exit;
+
+	if (flag & O_TRUNC) {
+		error = get_write_access(inode);
+		if (error)
+			goto exit;
+
+		/*
+		 * Refuse to truncate files with mandatory locks held on them.
+		 */
+		error = locks_verify_locked(inode);
+		if (!error) {
+			DQUOT_INIT(inode);
+			
+			error = do_truncate(dentry, 0);
+		}
+		put_write_access(inode);
+		if (error)
+			goto exit;
+	} else
+		if (flag & FMODE_WRITE)
+			DQUOT_INIT(inode);
+
+	return 0;
+
+exit_dput:
+	dput(dentry);
+exit:
+	path_release(nd);
+	return error;
+
+do_link:
+	error = -ELOOP;
+	if (flag & O_NOFOLLOW)
+		goto exit_dput;
+	/*
+	 * This is subtle. Instead of calling do_follow_link() we do the
+	 * thing by hands. The reason is that this way we have zero link_count
+	 * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT.
+	 * After that we have the parent and last component, i.e.
+	 * we are in the same situation as after the first path_walk().
+	 * Well, almost - if the last component is normal we get its copy
+	 * stored in nd->last.name and we will have to putname() it when we
+	 * are done. Procfs-like symlinks just set LAST_BIND.
+	 */
+	UPDATE_ATIME(dentry->d_inode);
+	error = dentry->d_inode->i_op->follow_link(dentry, nd);
+	dput(dentry);
+	if (error)
+		return error;
+	if (nd->last_type == LAST_BIND) {
+		dentry = nd->dentry;
+		goto ok;
+	}
+	error = -EISDIR;
+	if (nd->last_type != LAST_NORM)
+		goto exit;
+	if (nd->last.name[nd->last.len]) {
+		putname(nd->last.name);
+		goto exit;
+	}
+	error = -ELOOP;
+	if (count++==32) {
+		putname(nd->last.name);
+		goto exit;
+	}
+	dir = nd->dentry;
+	down(&dir->d_inode->i_sem);
+	dentry = lookup_hash(&nd->last, nd->dentry);
+	putname(nd->last.name);
+	goto do_last;
+}
+
+/* SMP-safe */
+static struct dentry *lookup_create(struct nameidata *nd, int is_dir)
+{
+	struct dentry *dentry;
+
+	down(&nd->dentry->d_inode->i_sem);
+	dentry = ERR_PTR(-EEXIST);
+	if (nd->last_type != LAST_NORM)
+		goto fail;
+	dentry = lookup_hash(&nd->last, nd->dentry);
+	if (IS_ERR(dentry))
+		goto fail;
+	if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
+		goto enoent;
+	return dentry;
+enoent:
+	dput(dentry);
+	dentry = ERR_PTR(-ENOENT);
+fail:
+	return dentry;
+}
+
+int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
+{
+	int error = -EPERM;
+
+	down(&dir->i_zombie);
+	if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD))
+		goto exit_lock;
+
+	error = may_create(dir, dentry);
+	if (error)
+		goto exit_lock;
+
+	error = -EPERM;
+	if (!dir->i_op || !dir->i_op->mknod)
+		goto exit_lock;
+
+	DQUOT_INIT(dir);
+	lock_kernel();
+	error = dir->i_op->mknod(dir, dentry, mode, dev);
+	unlock_kernel();
+exit_lock:
+	up(&dir->i_zombie);
+	if (!error)
+		inode_dir_notify(dir, DN_CREATE);
+	return error;
+}
+
+asmlinkage long sys_mknod(const char * filename, int mode, dev_t dev)
+{
+	int error = 0;
+	char * tmp;
+	struct dentry * dentry;
+	struct nameidata nd;
+
+	if (S_ISDIR(mode))
+		return -EPERM;
+	tmp = getname(filename);
+	if (IS_ERR(tmp))
+		return PTR_ERR(tmp);
+
+	if (path_init(tmp, LOOKUP_PARENT, &nd))
+		error = path_walk(tmp, &nd);
+	if (error)
+		goto out;
+	dentry = lookup_create(&nd, 0);
+	error = PTR_ERR(dentry);
+
+	mode &= ~current->fs->umask;
+	if (!IS_ERR(dentry)) {
+		switch (mode & S_IFMT) {
+		case 0: case S_IFREG:
+			error = vfs_create(nd.dentry->d_inode,dentry,mode);
+			break;
+		case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK:
+			error = vfs_mknod(nd.dentry->d_inode,dentry,mode,dev);
+			break;
+		case S_IFDIR:
+			error = -EPERM;
+			break;
+		default:
+			error = -EINVAL;
+		}
+		dput(dentry);
+	}
+	up(&nd.dentry->d_inode->i_sem);
+	path_release(&nd);
+out:
+	putname(tmp);
+
+	return error;
+}
+
+int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+{
+	int error;
+
+	down(&dir->i_zombie);
+	error = may_create(dir, dentry);
+	if (error)
+		goto exit_lock;
+
+	error = -EPERM;
+	if (!dir->i_op || !dir->i_op->mkdir)
+		goto exit_lock;
+
+	DQUOT_INIT(dir);
+	mode &= (S_IRWXUGO|S_ISVTX);
+	lock_kernel();
+	error = dir->i_op->mkdir(dir, dentry, mode);
+	unlock_kernel();
+
+exit_lock:
+	up(&dir->i_zombie);
+	if (!error)
+		inode_dir_notify(dir, DN_CREATE);
+	return error;
+}
+
+asmlinkage long sys_mkdir(const char * pathname, int mode)
+{
+	int error = 0;
+	char * tmp;
+
+	tmp = getname(pathname);
+	error = PTR_ERR(tmp);
+	if (!IS_ERR(tmp)) {
+		struct dentry *dentry;
+		struct nameidata nd;
+
+		if (path_init(tmp, LOOKUP_PARENT, &nd))
+			error = path_walk(tmp, &nd);
+		if (error)
+			goto out;
+		dentry = lookup_create(&nd, 1);
+		error = PTR_ERR(dentry);
+		if (!IS_ERR(dentry)) {
+			error = vfs_mkdir(nd.dentry->d_inode, dentry,
+					  mode & ~current->fs->umask);
+			dput(dentry);
+		}
+		up(&nd.dentry->d_inode->i_sem);
+		path_release(&nd);
+out:
+		putname(tmp);
+	}
+
+	return error;
+}
+
+/*
+ * We try to drop the dentry early: we should have
+ * a usage count of 2 if we're the only user of this
+ * dentry, and if that is true (possibly after pruning
+ * the dcache), then we drop the dentry now.
+ *
+ * A low-level filesystem can, if it choses, legally
+ * do a
+ *
+ *	if (!d_unhashed(dentry))
+ *		return -EBUSY;
+ *
+ * if it cannot handle the case of removing a directory
+ * that is still in use by something else..
+ */
+static void d_unhash(struct dentry *dentry)
+{
+	dget(dentry);
+	switch (atomic_read(&dentry->d_count)) {
+	default:
+		shrink_dcache_parent(dentry);
+		if (atomic_read(&dentry->d_count) != 2)
+			break;
+	case 2:
+		d_drop(dentry);
+	}
+}
+
+int vfs_rmdir(struct inode *dir, struct dentry *dentry)
+{
+	int error;
+
+	error = may_delete(dir, dentry, 1);
+	if (error)
+		return error;
+
+	if (!dir->i_op || !dir->i_op->rmdir)
+		return -EPERM;
+
+	DQUOT_INIT(dir);
+
+	double_down(&dir->i_zombie, &dentry->d_inode->i_zombie);
+	d_unhash(dentry);
+	if (IS_DEADDIR(dir))
+		error = -ENOENT;
+	else if (d_mountpoint(dentry))
+		error = -EBUSY;
+	else {
+		lock_kernel();
+		error = dir->i_op->rmdir(dir, dentry);
+		unlock_kernel();
+		if (!error)
+			dentry->d_inode->i_flags |= S_DEAD;
+	}
+	double_up(&dir->i_zombie, &dentry->d_inode->i_zombie);
+	if (!error) {
+		inode_dir_notify(dir, DN_DELETE);
+		d_delete(dentry);
+	}
+	dput(dentry);
+
+	return error;
+}
+
+asmlinkage long sys_rmdir(const char * pathname)
+{
+	int error = 0;
+	char * name;
+	struct dentry *dentry;
+	struct nameidata nd;
+
+	name = getname(pathname);
+	if(IS_ERR(name))
+		return PTR_ERR(name);
+
+	if (path_init(name, LOOKUP_PARENT, &nd))
+		error = path_walk(name, &nd);
+	if (error)
+		goto exit;
+
+	switch(nd.last_type) {
+		case LAST_DOTDOT:
+			error = -ENOTEMPTY;
+			goto exit1;
+		case LAST_DOT:
+			error = -EINVAL;
+			goto exit1;
+		case LAST_ROOT:
+			error = -EBUSY;
+			goto exit1;
+	}
+	down(&nd.dentry->d_inode->i_sem);
+	dentry = lookup_hash(&nd.last, nd.dentry);
+	error = PTR_ERR(dentry);
+	if (!IS_ERR(dentry)) {
+		error = vfs_rmdir(nd.dentry->d_inode, dentry);
+		dput(dentry);
+	}
+	up(&nd.dentry->d_inode->i_sem);
+exit1:
+	path_release(&nd);
+exit:
+	putname(name);
+	return error;
+}
+
+int vfs_unlink(struct inode *dir, struct dentry *dentry)
+{
+	int error;
+
+	down(&dir->i_zombie);
+	error = may_delete(dir, dentry, 0);
+	if (!error) {
+		error = -EPERM;
+		if (dir->i_op && dir->i_op->unlink) {
+			DQUOT_INIT(dir);
+			if (d_mountpoint(dentry))
+				error = -EBUSY;
+			else {
+				lock_kernel();
+				error = dir->i_op->unlink(dir, dentry);
+				unlock_kernel();
+				if (!error)
+					d_delete(dentry);
+			}
+		}
+	}
+	up(&dir->i_zombie);
+	if (!error)
+		inode_dir_notify(dir, DN_DELETE);
+	return error;
+}
+
+asmlinkage long sys_unlink(const char * pathname)
+{
+	int error = 0;
+	char * name;
+	struct dentry *dentry;
+	struct nameidata nd;
+
+	name = getname(pathname);
+	if(IS_ERR(name))
+		return PTR_ERR(name);
+
+	if (path_init(name, LOOKUP_PARENT, &nd))
+		error = path_walk(name, &nd);
+	if (error)
+		goto exit;
+	error = -EISDIR;
+	if (nd.last_type != LAST_NORM)
+		goto exit1;
+	down(&nd.dentry->d_inode->i_sem);
+	dentry = lookup_hash(&nd.last, nd.dentry);
+	error = PTR_ERR(dentry);
+	if (!IS_ERR(dentry)) {
+		/* Why not before? Because we want correct error value */
+		if (nd.last.name[nd.last.len])
+			goto slashes;
+		error = vfs_unlink(nd.dentry->d_inode, dentry);
+	exit2:
+		dput(dentry);
+	}
+	up(&nd.dentry->d_inode->i_sem);
+exit1:
+	path_release(&nd);
+exit:
+	putname(name);
+
+	return error;
+
+slashes:
+	error = !dentry->d_inode ? -ENOENT :
+		S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR;
+	goto exit2;
+}
+
+int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
+{
+	int error;
+
+	down(&dir->i_zombie);
+	error = may_create(dir, dentry);
+	if (error)
+		goto exit_lock;
+
+	error = -EPERM;
+	if (!dir->i_op || !dir->i_op->symlink)
+		goto exit_lock;
+
+	DQUOT_INIT(dir);
+	lock_kernel();
+	error = dir->i_op->symlink(dir, dentry, oldname);
+	unlock_kernel();
+
+exit_lock:
+	up(&dir->i_zombie);
+	if (!error)
+		inode_dir_notify(dir, DN_CREATE);
+	return error;
+}
+
+asmlinkage long sys_symlink(const char * oldname, const char * newname)
+{
+	int error = 0;
+	char * from;
+	char * to;
+
+	from = getname(oldname);
+	if(IS_ERR(from))
+		return PTR_ERR(from);
+	to = getname(newname);
+	error = PTR_ERR(to);
+	if (!IS_ERR(to)) {
+		struct dentry *dentry;
+		struct nameidata nd;
+
+		if (path_init(to, LOOKUP_PARENT, &nd))
+			error = path_walk(to, &nd);
+		if (error)
+			goto out;
+		dentry = lookup_create(&nd, 0);
+		error = PTR_ERR(dentry);
+		if (!IS_ERR(dentry)) {
+			error = vfs_symlink(nd.dentry->d_inode, dentry, from);
+			dput(dentry);
+		}
+		up(&nd.dentry->d_inode->i_sem);
+		path_release(&nd);
+out:
+		putname(to);
+	}
+	putname(from);
+	return error;
+}
+
+int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry)
+{
+	struct inode *inode;
+	int error;
+
+	down(&dir->i_zombie);
+	error = -ENOENT;
+	inode = old_dentry->d_inode;
+	if (!inode)
+		goto exit_lock;
+
+	error = may_create(dir, new_dentry);
+	if (error)
+		goto exit_lock;
+
+	error = -EXDEV;
+	if (dir->i_dev != inode->i_dev)
+		goto exit_lock;
+
+	/*
+	 * A link to an append-only or immutable file cannot be created.
+	 */
+	error = -EPERM;
+	if (IS_APPEND(inode) || IS_IMMUTABLE_LINK(inode))
+		goto exit_lock;
+	if (!dir->i_op || !dir->i_op->link)
+		goto exit_lock;
+
+	DQUOT_INIT(dir);
+	lock_kernel();
+	error = dir->i_op->link(old_dentry, dir, new_dentry);
+	unlock_kernel();
+
+exit_lock:
+	up(&dir->i_zombie);
+	if (!error)
+		inode_dir_notify(dir, DN_CREATE);
+	return error;
+}
+
+/*
+ * Hardlinks are often used in delicate situations.  We avoid
+ * security-related surprises by not following symlinks on the
+ * newname.  --KAB
+ *
+ * We don't follow them on the oldname either to be compatible
+ * with linux 2.0, and to avoid hard-linking to directories
+ * and other special files.  --ADM
+ */
+asmlinkage long sys_link(const char * oldname, const char * newname)
+{
+	int error;
+	char * from;
+	char * to;
+
+	from = getname(oldname);
+	if(IS_ERR(from))
+		return PTR_ERR(from);
+	to = getname(newname);
+	error = PTR_ERR(to);
+	if (!IS_ERR(to)) {
+		struct dentry *new_dentry;
+		struct nameidata nd, old_nd;
+
+		error = 0;
+		if (path_init(from, LOOKUP_POSITIVE, &old_nd))
+			error = path_walk(from, &old_nd);
+		if (error)
+			goto exit;
+		if (path_init(to, LOOKUP_PARENT, &nd))
+			error = path_walk(to, &nd);
+		if (error)
+			goto out;
+		error = -EXDEV;
+		if (old_nd.mnt != nd.mnt)
+			goto out_release;
+		new_dentry = lookup_create(&nd, 0);
+		error = PTR_ERR(new_dentry);
+		if (!IS_ERR(new_dentry)) {
+			error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
+			dput(new_dentry);
+		}
+		up(&nd.dentry->d_inode->i_sem);
+out_release:
+		path_release(&nd);
+out:
+		path_release(&old_nd);
+exit:
+		putname(to);
+	}
+	putname(from);
+
+	return error;
+}
+
+/*
+ * The worst of all namespace operations - renaming directory. "Perverted"
+ * doesn't even start to describe it. Somebody in UCB had a heck of a trip...
+ * Problems:
+ *	a) we can get into loop creation. Check is done in is_subdir().
+ *	b) race potential - two innocent renames can create a loop together.
+ *	   That's where 4.4 screws up. Current fix: serialization on
+ *	   sb->s_vfs_rename_sem. We might be more accurate, but that's another
+ *	   story.
+ *	c) we have to lock _three_ objects - parents and victim (if it exists).
+ *	   And that - after we got ->i_sem on parents (until then we don't know
+ *	   whether the target exists at all, let alone whether it is a directory
+ *	   or not). Solution: ->i_zombie. Taken only after ->i_sem. Always taken
+ *	   on link creation/removal of any kind. And taken (without ->i_sem) on
+ *	   directory that will be removed (both in rmdir() and here).
+ *	d) some filesystems don't support opened-but-unlinked directories,
+ *	   either because of layout or because they are not ready to deal with
+ *	   all cases correctly. The latter will be fixed (taking this sort of
+ *	   stuff into VFS), but the former is not going away. Solution: the same
+ *	   trick as in rmdir().
+ *	e) conversion from fhandle to dentry may come in the wrong moment - when
+ *	   we are removing the target. Solution: we will have to grab ->i_zombie
+ *	   in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on
+ *	   ->i_sem on parents, which works but leads to some truely excessive
+ *	   locking].
+ */
+int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
+	       struct inode *new_dir, struct dentry *new_dentry)
+{
+	int error;
+	struct inode *target;
+
+	if (old_dentry->d_inode == new_dentry->d_inode)
+		return 0;
+
+	error = may_delete(old_dir, old_dentry, 1);
+	if (error)
+		return error;
+
+	if (new_dir->i_dev != old_dir->i_dev)
+		return -EXDEV;
+
+	if (!new_dentry->d_inode)
+		error = may_create(new_dir, new_dentry);
+	else
+		error = may_delete(new_dir, new_dentry, 1);
+	if (error)
+		return error;
+
+	if (!old_dir->i_op || !old_dir->i_op->rename)
+		return -EPERM;
+
+	/*
+	 * If we are going to change the parent - check write permissions,
+	 * we'll need to flip '..'.
+	 */
+	if (new_dir != old_dir) {
+		error = permission(old_dentry->d_inode, MAY_WRITE);
+	}
+	if (error)
+		return error;
+
+	DQUOT_INIT(old_dir);
+	DQUOT_INIT(new_dir);
+	down(&old_dir->i_sb->s_vfs_rename_sem);
+	error = -EINVAL;
+	if (is_subdir(new_dentry, old_dentry))
+		goto out_unlock;
+	/* Don't eat your daddy, dear... */
+	/* This also avoids locking issues */
+	if (old_dentry->d_parent == new_dentry)
+		goto out_unlock;
+	target = new_dentry->d_inode;
+	if (target) { /* Hastur! Hastur! Hastur! */
+		triple_down(&old_dir->i_zombie,
+			    &new_dir->i_zombie,
+			    &target->i_zombie);
+		d_unhash(new_dentry);
+	} else
+		double_down(&old_dir->i_zombie,
+			    &new_dir->i_zombie);
+	if (IS_DEADDIR(old_dir)||IS_DEADDIR(new_dir))
+		error = -ENOENT;
+	else if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
+		error = -EBUSY;
+	else 
+		error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
+	if (target) {
+		if (!error)
+			target->i_flags |= S_DEAD;
+		triple_up(&old_dir->i_zombie,
+			  &new_dir->i_zombie,
+			  &target->i_zombie);
+		if (d_unhashed(new_dentry))
+			d_rehash(new_dentry);
+		dput(new_dentry);
+	} else
+		double_up(&old_dir->i_zombie,
+			  &new_dir->i_zombie);
+		
+	if (!error)
+		d_move(old_dentry,new_dentry);
+out_unlock:
+	up(&old_dir->i_sb->s_vfs_rename_sem);
+	return error;
+}
+
+int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
+	       struct inode *new_dir, struct dentry *new_dentry)
+{
+	int error;
+
+	if (old_dentry->d_inode == new_dentry->d_inode)
+		return 0;
+
+	error = may_delete(old_dir, old_dentry, 0);
+	if (error)
+		return error;
+
+	if (new_dir->i_dev != old_dir->i_dev)
+		return -EXDEV;
+
+	if (!new_dentry->d_inode)
+		error = may_create(new_dir, new_dentry);
+	else
+		error = may_delete(new_dir, new_dentry, 0);
+	if (error)
+		return error;
+
+	if (!old_dir->i_op || !old_dir->i_op->rename)
+		return -EPERM;
+
+	DQUOT_INIT(old_dir);
+	DQUOT_INIT(new_dir);
+	double_down(&old_dir->i_zombie, &new_dir->i_zombie);
+	if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
+		error = -EBUSY;
+	else
+		error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
+	double_up(&old_dir->i_zombie, &new_dir->i_zombie);
+	if (error)
+		return error;
+	/* The following d_move() should become unconditional */
+	if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME)) {
+		d_move(old_dentry, new_dentry);
+	}
+	return 0;
+}
+
+int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+	       struct inode *new_dir, struct dentry *new_dentry)
+{
+	int error;
+	if (S_ISDIR(old_dentry->d_inode->i_mode))
+		error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
+	else
+		error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
+	if (!error) {
+		if (old_dir == new_dir)
+			inode_dir_notify(old_dir, DN_RENAME);
+		else {
+			inode_dir_notify(old_dir, DN_DELETE);
+			inode_dir_notify(new_dir, DN_CREATE);
+		}
+	}
+	return error;
+}
+
+static inline int do_rename(const char * oldname, const char * newname)
+{
+	int error = 0;
+	struct dentry * old_dir, * new_dir;
+	struct dentry * old_dentry, *new_dentry;
+	struct nameidata oldnd, newnd;
+
+	if (path_init(oldname, LOOKUP_PARENT, &oldnd))
+		error = path_walk(oldname, &oldnd);
+
+	if (error)
+		goto exit;
+
+	if (path_init(newname, LOOKUP_PARENT, &newnd))
+		error = path_walk(newname, &newnd);
+	if (error)
+		goto exit1;
+
+	error = -EXDEV;
+	if (oldnd.mnt != newnd.mnt)
+		goto exit2;
+
+	old_dir = oldnd.dentry;
+	error = -EBUSY;
+	if (oldnd.last_type != LAST_NORM)
+		goto exit2;
+
+	new_dir = newnd.dentry;
+	if (newnd.last_type != LAST_NORM)
+		goto exit2;
+
+	double_lock(new_dir, old_dir);
+
+	old_dentry = lookup_hash(&oldnd.last, old_dir);
+	error = PTR_ERR(old_dentry);
+	if (IS_ERR(old_dentry))
+		goto exit3;
+	/* source must exist */
+	error = -ENOENT;
+	if (!old_dentry->d_inode)
+		goto exit4;
+	/* unless the source is a directory trailing slashes give -ENOTDIR */
+	if (!S_ISDIR(old_dentry->d_inode->i_mode)) {
+		error = -ENOTDIR;
+		if (oldnd.last.name[oldnd.last.len])
+			goto exit4;
+		if (newnd.last.name[newnd.last.len])
+			goto exit4;
+	}
+	new_dentry = lookup_hash(&newnd.last, new_dir);
+	error = PTR_ERR(new_dentry);
+	if (IS_ERR(new_dentry))
+		goto exit4;
+
+	lock_kernel();
+	error = vfs_rename(old_dir->d_inode, old_dentry,
+				   new_dir->d_inode, new_dentry);
+	unlock_kernel();
+
+	dput(new_dentry);
+exit4:
+	dput(old_dentry);
+exit3:
+	double_up(&new_dir->d_inode->i_sem, &old_dir->d_inode->i_sem);
+exit2:
+	path_release(&newnd);
+exit1:
+	path_release(&oldnd);
+exit:
+	return error;
+}
+
+asmlinkage long sys_rename(const char * oldname, const char * newname)
+{
+	int error;
+	char * from;
+	char * to;
+
+	from = getname(oldname);
+	if(IS_ERR(from))
+		return PTR_ERR(from);
+	to = getname(newname);
+	error = PTR_ERR(to);
+	if (!IS_ERR(to)) {
+		error = do_rename(from,to);
+		putname(to);
+	}
+	putname(from);
+	return error;
+}
+
+int vfs_readlink(struct dentry *dentry, char *buffer, int buflen, const char *link)
+{
+	int len;
+
+	len = PTR_ERR(link);
+	if (IS_ERR(link))
+		goto out;
+
+	len = strlen(link);
+	if (len > (unsigned) buflen)
+		len = buflen;
+	if (copy_to_user(buffer, link, len))
+		len = -EFAULT;
+out:
+	return len;
+}
+
+static inline int
+__vfs_follow_link(struct nameidata *nd, const char *link)
+{
+	int res = 0;
+	char *name;
+	if (IS_ERR(link))
+		goto fail;
+
+	if (*link == '/') {
+		path_release(nd);
+		if (!walk_init_root(link, nd))
+			/* weird __emul_prefix() stuff did it */
+			goto out;
+	}
+	res = link_path_walk(link, nd);
+out:
+	if (current->link_count || res || nd->last_type!=LAST_NORM)
+		return res;
+	/*
+	 * If it is an iterative symlinks resolution in open_namei() we
+	 * have to copy the last component. And all that crap because of
+	 * bloody create() on broken symlinks. Furrfu...
+	 */
+	name = __getname();
+	if (!name)
+		return -ENOMEM;
+	strcpy(name, nd->last.name);
+	nd->last.name = name;
+	return 0;
+fail:
+	path_release(nd);
+	return PTR_ERR(link);
+}
+
+int vfs_follow_link(struct nameidata *nd, const char *link)
+{
+	return __vfs_follow_link(nd, link);
+}
+
+/* get the link contents into pagecache */
+static char *page_getlink(struct dentry * dentry, struct page **ppage)
+{
+	struct page * page;
+	struct address_space *mapping = dentry->d_inode->i_mapping;
+	page = read_cache_page(mapping, 0, (filler_t *)mapping->a_ops->readpage,
+				NULL);
+	if (IS_ERR(page))
+		goto sync_fail;
+	wait_on_page(page);
+	if (!Page_Uptodate(page))
+		goto async_fail;
+	*ppage = page;
+	return kmap(page);
+
+async_fail:
+	page_cache_release(page);
+	return ERR_PTR(-EIO);
+
+sync_fail:
+	return (char*)page;
+}
+
+int page_readlink(struct dentry *dentry, char *buffer, int buflen)
+{
+	struct page *page = NULL;
+	char *s = page_getlink(dentry, &page);
+	int res = vfs_readlink(dentry,buffer,buflen,s);
+	if (page) {
+		kunmap(page);
+		page_cache_release(page);
+	}
+	return res;
+}
+
+int page_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+	struct page *page = NULL;
+	char *s = page_getlink(dentry, &page);
+	int res = __vfs_follow_link(nd, s);
+	if (page) {
+		kunmap(page);
+		page_cache_release(page);
+	}
+	return res;
+}
+
+struct inode_operations page_symlink_inode_operations = {
+	readlink:	page_readlink,
+	follow_link:	page_follow_link,
+};
diff -urN linux-2.4.16-reiserfspatches-immutable/fs/nfsd/vfs.c~ linux-2.4.16-reiserfspatches-immutable-ctx4/fs/nfsd/vfs.c~
--- linux-2.4.16-reiserfspatches-immutable/fs/nfsd/vfs.c~	Thu Jan  1 01:00:00 1970
+++ linux-2.4.16-reiserfspatches-immutable-ctx4/fs/nfsd/vfs.c~	Mon Dec 10 14:28:03 2001
@@ -0,0 +1,1591 @@
+#define MSNFS	/* HACK HACK */
+/*
+ * linux/fs/nfsd/vfs.c
+ *
+ * File operations used by nfsd. Some of these have been ripped from
+ * other parts of the kernel because they weren't in ksyms.c, others
+ * are partial duplicates with added or changed functionality.
+ *
+ * Note that several functions dget() the dentry upon which they want
+ * to act, most notably those that create directory entries. Response
+ * dentry's are dput()'d if necessary in the release callback.
+ * So if you notice code paths that apparently fail to dput() the
+ * dentry, don't worry--they have been taken care of.
+ *
+ * Copyright (C) 1995-1999 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#include <linux/config.h>
+#include <linux/version.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+#include <linux/errno.h>
+#include <linux/locks.h>
+#include <linux/fs.h>
+#include <linux/major.h>
+#include <linux/ext2_fs.h>
+#include <linux/proc_fs.h>
+#include <linux/stat.h>
+#include <linux/fcntl.h>
+#include <linux/net.h>
+#include <linux/unistd.h>
+#include <linux/slab.h>
+#include <linux/in.h>
+#define __NO_VERSION__
+#include <linux/module.h>
+
+#include <linux/sunrpc/svc.h>
+#include <linux/nfsd/nfsd.h>
+#ifdef CONFIG_NFSD_V3
+#include <linux/nfs3.h>
+#include <linux/nfsd/xdr3.h>
+#endif /* CONFIG_NFSD_V3 */
+#include <linux/nfsd/nfsfh.h>
+#include <linux/quotaops.h>
+
+#include <asm/uaccess.h>
+
+#define NFSDDBG_FACILITY		NFSDDBG_FILEOP
+#define NFSD_PARANOIA
+
+
+/* We must ignore files (but only files) which might have mandatory
+ * locks on them because there is no way to know if the accesser has
+ * the lock.
+ */
+#define IS_ISMNDLK(i)	(S_ISREG((i)->i_mode) && MANDATORY_LOCK(i))
+
+/*
+ * This is a cache of readahead params that help us choose the proper
+ * readahead strategy. Initially, we set all readahead parameters to 0
+ * and let the VFS handle things.
+ * If you increase the number of cached files very much, you'll need to
+ * add a hash table here.
+ */
+struct raparms {
+	struct raparms		*p_next;
+	unsigned int		p_count;
+	ino_t			p_ino;
+	dev_t			p_dev;
+	unsigned long		p_reada,
+				p_ramax,
+				p_raend,
+				p_ralen,
+				p_rawin;
+};
+
+static struct raparms *		raparml;
+static struct raparms *		raparm_cache;
+
+/*
+ * Look up one component of a pathname.
+ * N.B. After this call _both_ fhp and resfh need an fh_put
+ *
+ * If the lookup would cross a mountpoint, and the mounted filesystem
+ * is exported to the client with NFSEXP_CROSSMNT, then the lookup is
+ * accepted as it stands and the mounted directory is
+ * returned. Otherwise the covered directory is returned.
+ * NOTE: this mountpoint crossing is not supported properly by all
+ *   clients and is explicitly disallowed for NFSv3
+ *      NeilBrown <neilb@cse.unsw.edu.au>
+ */
+int
+nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
+					int len, struct svc_fh *resfh)
+{
+	struct svc_export	*exp;
+	struct dentry		*dparent;
+	struct dentry		*dentry;
+	int			err;
+
+	dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name);
+
+	/* Obtain dentry and export. */
+	err = fh_verify(rqstp, fhp, S_IFDIR, MAY_EXEC);
+	if (err)
+		goto out;
+
+	dparent = fhp->fh_dentry;
+	exp  = fhp->fh_export;
+
+	err = nfserr_acces;
+
+	/* Lookup the name, but don't follow links */
+	if (isdotent(name, len)) {
+		if (len==1)
+			dentry = dget(dparent);
+		else  { /* must be ".." */
+			/* checking mountpoint crossing is very different when stepping up */
+			if (dparent == exp->ex_dentry) {
+				if (!EX_CROSSMNT(exp))
+					dentry = dget(dparent); /* .. == . just like at / */
+				else
+				{
+					struct svc_export *exp2 = NULL;
+					struct dentry *dp;
+					struct vfsmount *mnt = mntget(exp->ex_mnt);
+					dentry = dget(dparent);
+					while(follow_up(&mnt, &dentry))
+						;
+					dp = dget(dentry->d_parent);
+					dput(dentry);
+					dentry = dp;
+					for ( ; exp2 == NULL && dp->d_parent != dp;
+					      dp=dp->d_parent)
+						exp2 = exp_get(exp->ex_client, dp->d_inode->i_dev, dp->d_inode->i_ino);
+					if (exp2==NULL) {
+						dput(dentry);
+						dentry = dget(dparent);
+					} else {
+						exp = exp2;
+					}
+					mntput(mnt);
+				}
+			} else
+				dentry = dget(dparent->d_parent);
+		}
+	} else {
+		fh_lock(fhp);
+		dentry = lookup_one_len(name, dparent, len);
+		err = PTR_ERR(dentry);
+		if (IS_ERR(dentry))
+			goto out_nfserr;
+		/*
+		 * check if we have crossed a mount point ...
+		 */
+		if (d_mountpoint(dentry)) {
+			struct svc_export *exp2 = NULL;
+			struct vfsmount *mnt = mntget(exp->ex_mnt);
+			struct dentry *mounts = dget(dentry);
+			while (follow_down(&mnt,&mounts)&&d_mountpoint(mounts))
+				;
+			exp2 = exp_get(rqstp->rq_client,
+				       mounts->d_inode->i_dev,
+				       mounts->d_inode->i_ino);
+			if (exp2 && EX_CROSSMNT(exp2)) {
+				/* successfully crossed mount point */
+				exp = exp2;
+				dput(dentry);
+				dentry = mounts;
+			} else
+				dput(mounts);
+			mntput(mnt);
+		}
+	}
+	/*
+	 * Note: we compose the file handle now, but as the
+	 * dentry may be negative, it may need to be updated.
+	 */
+	err = fh_compose(resfh, exp, dentry, fhp);
+	if (!err && !dentry->d_inode)
+		err = nfserr_noent;
+out:
+	return err;
+
+out_nfserr:
+	err = nfserrno(err);
+	goto out;
+}
+
+/*
+ * Set various file attributes.
+ * N.B. After this call fhp needs an fh_put
+ */
+int
+nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
+	     int check_guard, time_t guardtime)
+{
+	struct dentry	*dentry;
+	struct inode	*inode;
+	int		accmode = MAY_SATTR;
+	int		ftype = 0;
+	int		imode;
+	int		err;
+	int		size_change = 0;
+
+	if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
+		accmode |= MAY_WRITE|MAY_OWNER_OVERRIDE;
+	if (iap->ia_valid & ATTR_SIZE)
+		ftype = S_IFREG;
+
+	/* Get inode */
+	err = fh_verify(rqstp, fhp, ftype, accmode);
+	if (err || !iap->ia_valid)
+		goto out;
+
+	dentry = fhp->fh_dentry;
+	inode = dentry->d_inode;
+
+	err = inode_change_ok(inode, iap);
+	/* could be a "touch" (utimes) request where the user is not the owner but does
+	 * have write permission. In this case the user should be allowed to set
+	 * both times to the current time.  We could just assume any such SETATTR
+	 * is intended to set the times to "now", but we do a couple of simple tests
+	 * to increase our confidence.
+	 */
+#define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET)
+#define	MAX_TOUCH_TIME_ERROR (30*60)
+	if (err
+	    && (iap->ia_valid & BOTH_TIME_SET) == BOTH_TIME_SET
+	    && iap->ia_mtime == iap->ia_atime
+	    ) {
+	    /* looks good.  now just make sure time is in the right ballpark.
+	     * solaris, at least, doesn't seem to care what the time request is
+	     */
+	    time_t delta = iap->ia_atime - CURRENT_TIME;
+	    if (delta<0) delta = -delta;
+	    if (delta < MAX_TOUCH_TIME_ERROR) {
+		/* turn off ATTR_[AM]TIME_SET but leave ATTR_[AM]TIME
+		 * this will cause notify_change to set these times to "now"
+		 */
+		iap->ia_valid &= ~BOTH_TIME_SET;
+		err = inode_change_ok(inode, iap);
+	    }
+	}
+	    
+	if (err)
+		goto out_nfserr;
+
+	/* The size case is special. It changes the file as well as the attributes.  */
+	if (iap->ia_valid & ATTR_SIZE) {
+		if (iap->ia_size < inode->i_size) {
+			err = nfsd_permission(fhp->fh_export, dentry, MAY_TRUNC|MAY_OWNER_OVERRIDE);
+			if (err)
+				goto out;
+		}
+
+		/*
+		 * If we are changing the size of the file, then
+		 * we need to break all leases.
+		 */
+		err = get_lease(inode, FMODE_WRITE);
+		if (err)
+			goto out_nfserr;
+
+		err = get_write_access(inode);
+		if (err)
+			goto out_nfserr;
+
+		err = locks_verify_truncate(inode, NULL, iap->ia_size);
+		if (err) {
+			put_write_access(inode);
+			goto out_nfserr;
+		}
+		DQUOT_INIT(inode);
+	}
+
+	imode = inode->i_mode;
+	if (iap->ia_valid & ATTR_MODE) {
+		iap->ia_mode &= S_IALLUGO;
+		imode = iap->ia_mode |= (imode & ~S_IALLUGO);
+	}
+
+	/* Revoke setuid/setgid bit on chown/chgrp */
+	if ((iap->ia_valid & ATTR_UID) && (imode & S_ISUID)
+	 && iap->ia_uid != inode->i_uid) {
+		iap->ia_valid |= ATTR_MODE;
+		iap->ia_mode = imode &= ~S_ISUID;
+	}
+	if ((iap->ia_valid & ATTR_GID) && (imode & S_ISGID)
+	 && iap->ia_gid != inode->i_gid) {
+		iap->ia_valid |= ATTR_MODE;
+		iap->ia_mode = imode &= ~S_ISGID;
+	}
+
+	/* Change the attributes. */
+
+
+	iap->ia_valid |= ATTR_CTIME;
+
+	if (iap->ia_valid & ATTR_SIZE) {
+		fh_lock(fhp);
+		size_change = 1;
+	}
+	err = nfserr_notsync;
+	if (!check_guard || guardtime == inode->i_ctime) {
+		err = notify_change(dentry, iap);
+		err = nfserrno(err);
+	}
+	if (size_change) {
+		fh_unlock(fhp);
+		put_write_access(inode);
+	}
+	if (!err)
+		if (EX_ISSYNC(fhp->fh_export))
+			write_inode_now(inode, 1);
+out:
+	return err;
+
+out_nfserr:
+	err = nfserrno(err);
+	goto out;
+}
+
+#ifdef CONFIG_NFSD_V3
+/*
+ * Check server access rights to a file system object
+ */
+struct accessmap {
+	u32		access;
+	int		how;
+};
+static struct accessmap	nfs3_regaccess[] = {
+    {	NFS3_ACCESS_READ,	MAY_READ			},
+    {	NFS3_ACCESS_EXECUTE,	MAY_EXEC			},
+    {	NFS3_ACCESS_MODIFY,	MAY_WRITE|MAY_TRUNC		},
+    {	NFS3_ACCESS_EXTEND,	MAY_WRITE			},
+
+    {	0,			0				}
+};
+
+static struct accessmap	nfs3_diraccess[] = {
+    {	NFS3_ACCESS_READ,	MAY_READ			},
+    {	NFS3_ACCESS_LOOKUP,	MAY_EXEC			},
+    {	NFS3_ACCESS_MODIFY,	MAY_EXEC|MAY_WRITE|MAY_TRUNC	},
+    {	NFS3_ACCESS_EXTEND,	MAY_EXEC|MAY_WRITE		},
+    {	NFS3_ACCESS_DELETE,	MAY_REMOVE			},
+
+    {	0,			0				}
+};
+
+static struct accessmap	nfs3_anyaccess[] = {
+	/* Some clients - Solaris 2.6 at least, make an access call
+	 * to the server to check for access for things like /dev/null
+	 * (which really, the server doesn't care about).  So
+	 * We provide simple access checking for them, looking
+	 * mainly at mode bits
+	 */
+    {	NFS3_ACCESS_READ,	MAY_READ			},
+    {	NFS3_ACCESS_EXECUTE,	MAY_EXEC			},
+    {	NFS3_ACCESS_MODIFY,	MAY_WRITE			},
+    {	NFS3_ACCESS_EXTEND,	MAY_WRITE			},
+
+    {	0,			0				}
+};
+
+int
+nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access)
+{
+	struct accessmap	*map;
+	struct svc_export	*export;
+	struct dentry		*dentry;
+	u32			query, result = 0;
+	unsigned int		error;
+
+	error = fh_verify(rqstp, fhp, 0, MAY_NOP);
+	if (error)
+		goto out;
+
+	export = fhp->fh_export;
+	dentry = fhp->fh_dentry;
+
+	if (S_ISREG(dentry->d_inode->i_mode))
+		map = nfs3_regaccess;
+	else if (S_ISDIR(dentry->d_inode->i_mode))
+		map = nfs3_diraccess;
+	else
+		map = nfs3_anyaccess;
+
+
+	query = *access;
+	for  (; map->access; map++) {
+		if (map->access & query) {
+			unsigned int err2;
+			err2 = nfsd_permission(export, dentry, map->how);
+			switch (err2) {
+			case nfs_ok:
+				result |= map->access;
+				break;
+				
+			/* the following error codes just mean the access was not allowed,
+			 * rather than an error occurred */
+			case nfserr_rofs:
+			case nfserr_acces:
+			case nfserr_perm:
+				/* simply don't "or" in the access bit. */
+				break;
+			default:
+				error = err2;
+				goto out;
+			}
+		}
+	}
+	*access = result;
+
+ out:
+	return error;
+}
+#endif /* CONFIG_NFSD_V3 */
+
+
+
+/*
+ * Open an existing file or directory.
+ * The access argument indicates the type of open (read/write/lock)
+ * N.B. After this call fhp needs an fh_put
+ */
+int
+nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
+			int access, struct file *filp)
+{
+	struct dentry	*dentry;
+	struct inode	*inode;
+	int		err;
+
+	/* If we get here, then the client has already done an "open", and (hopefully)
+	 * checked permission - so allow OWNER_OVERRIDE in case a chmod has now revoked
+	 * permission */
+	err = fh_verify(rqstp, fhp, type, access | MAY_OWNER_OVERRIDE);
+	if (err)
+		goto out;
+
+	dentry = fhp->fh_dentry;
+	inode = dentry->d_inode;
+
+	/* Disallow access to files with the append-only bit set or
+	 * with mandatory locking enabled
+	 */
+	err = nfserr_perm;
+	if (IS_APPEND(inode) || IS_ISMNDLK(inode))
+		goto out;
+	if (!inode->i_fop)
+		goto out;
+
+	/*
+	 * Check to see if there are any leases on this file.
+	 * This may block while leases are broken.
+	 */
+	err = get_lease(inode, (access & MAY_WRITE) ? FMODE_WRITE : 0);
+	if (err)
+		goto out_nfserr;
+
+	if ((access & MAY_WRITE) && (err = get_write_access(inode)) != 0)
+		goto out_nfserr;
+
+	memset(filp, 0, sizeof(*filp));
+	filp->f_op    = fops_get(inode->i_fop);
+	atomic_set(&filp->f_count, 1);
+	filp->f_dentry = dentry;
+	filp->f_vfsmnt = fhp->fh_export->ex_mnt;
+	if (access & MAY_WRITE) {
+		filp->f_flags = O_WRONLY|O_LARGEFILE;
+		filp->f_mode  = FMODE_WRITE;
+		DQUOT_INIT(inode);
+	} else {
+		filp->f_flags = O_RDONLY|O_LARGEFILE;
+		filp->f_mode  = FMODE_READ;
+	}
+
+	err = 0;
+	if (filp->f_op && filp->f_op->open) {
+		err = filp->f_op->open(inode, filp);
+		if (err) {
+			fops_put(filp->f_op);
+			if (access & MAY_WRITE)
+				put_write_access(inode);
+
+			/* I nearly added put_filp() call here, but this filp
+			 * is really on callers stack frame. -DaveM
+			 */
+			atomic_dec(&filp->f_count);
+		}
+	}
+out_nfserr:
+	if (err)
+		err = nfserrno(err);
+out:
+	return err;
+}
+
+/*
+ * Close a file.
+ */
+void
+nfsd_close(struct file *filp)
+{
+	struct dentry	*dentry = filp->f_dentry;
+	struct inode	*inode = dentry->d_inode;
+
+	if (filp->f_op && filp->f_op->release)
+		filp->f_op->release(inode, filp);
+	fops_put(filp->f_op);
+	if (filp->f_mode & FMODE_WRITE)
+		put_write_access(inode);
+}
+
+/*
+ * Sync a file
+ * As this calls fsync (not fdatasync) there is no need for a write_inode
+ * after it.
+ */
+void
+nfsd_sync(struct file *filp)
+{
+	dprintk("nfsd: sync file %s\n", filp->f_dentry->d_name.name);
+	down(&filp->f_dentry->d_inode->i_sem);
+	filp->f_op->fsync(filp, filp->f_dentry, 0);
+	up(&filp->f_dentry->d_inode->i_sem);
+}
+
+void
+nfsd_sync_dir(struct dentry *dp)
+{
+	struct inode *inode = dp->d_inode;
+	int (*fsync) (struct file *, struct dentry *, int);
+	
+	if (inode->i_fop && (fsync = inode->i_fop->fsync)) {
+		fsync(NULL, dp, 0);
+	}
+}
+
+/*
+ * Obtain the readahead parameters for the file
+ * specified by (dev, ino).
+ */
+static inline struct raparms *
+nfsd_get_raparms(dev_t dev, ino_t ino)
+{
+	struct raparms	*ra, **rap, **frap = NULL;
+	int depth = 0;
+	
+	for (rap = &raparm_cache; (ra = *rap); rap = &ra->p_next) {
+		if (ra->p_ino == ino && ra->p_dev == dev)
+			goto found;
+		depth++;
+		if (ra->p_count == 0)
+			frap = rap;
+	}
+	depth = nfsdstats.ra_size*11/10;
+	if (!frap)
+		return NULL;
+	rap = frap;
+	ra = *frap;
+	memset(ra, 0, sizeof(*ra));
+	ra->p_dev = dev;
+	ra->p_ino = ino;
+found:
+	if (rap != &raparm_cache) {
+		*rap = ra->p_next;
+		ra->p_next   = raparm_cache;
+		raparm_cache = ra;
+	}
+	ra->p_count++;
+	nfsdstats.ra_depth[depth*10/nfsdstats.ra_size]++;
+	return ra;
+}
+
+/*
+ * Read data from a file. count must contain the requested read count
+ * on entry. On return, *count contains the number of bytes actually read.
+ * N.B. After this call fhp needs an fh_put
+ */
+int
+nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
+          char *buf, unsigned long *count)
+{
+	struct raparms	*ra;
+	mm_segment_t	oldfs;
+	int		err;
+	struct file	file;
+
+	err = nfsd_open(rqstp, fhp, S_IFREG, MAY_READ, &file);
+	if (err)
+		goto out;
+	err = nfserr_perm;
+	if (!file.f_op->read)
+		goto out_close;
+#ifdef MSNFS
+	if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
+		(!lock_may_read(file.f_dentry->d_inode, offset, *count)))
+		goto out_close;
+#endif
+
+	/* Get readahead parameters */
+	ra = nfsd_get_raparms(fhp->fh_export->ex_dev, fhp->fh_dentry->d_inode->i_ino);
+	if (ra) {
+		file.f_reada = ra->p_reada;
+		file.f_ramax = ra->p_ramax;
+		file.f_raend = ra->p_raend;
+		file.f_ralen = ra->p_ralen;
+		file.f_rawin = ra->p_rawin;
+	}
+	file.f_pos = offset;
+
+	oldfs = get_fs(); set_fs(KERNEL_DS);
+	err = file.f_op->read(&file, buf, *count, &file.f_pos);
+	set_fs(oldfs);
+
+	/* Write back readahead params */
+	if (ra != NULL) {
+		dprintk("nfsd: raparms %ld %ld %ld %ld %ld\n",
+			file.f_reada, file.f_ramax, file.f_raend,
+			file.f_ralen, file.f_rawin);
+		ra->p_reada = file.f_reada;
+		ra->p_ramax = file.f_ramax;
+		ra->p_raend = file.f_raend;
+		ra->p_ralen = file.f_ralen;
+		ra->p_rawin = file.f_rawin;
+		ra->p_count -= 1;
+	}
+
+	if (err >= 0) {
+		nfsdstats.io_read += err;
+		*count = err;
+		err = 0;
+	} else 
+		err = nfserrno(err);
+out_close:
+	nfsd_close(&file);
+out:
+	return err;
+}
+
+/*
+ * Write data to a file.
+ * The stable flag requests synchronous writes.
+ * N.B. After this call fhp needs an fh_put
+ */
+int
+nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset,
+				char *buf, unsigned long cnt, int *stablep)
+{
+	struct svc_export	*exp;
+	struct file		file;
+	struct dentry		*dentry;
+	struct inode		*inode;
+	mm_segment_t		oldfs;
+	int			err = 0;
+	int			stable = *stablep;
+
+	err = nfsd_open(rqstp, fhp, S_IFREG, MAY_WRITE, &file);
+	if (err)
+		goto out;
+	if (!cnt)
+		goto out_close;
+	err = nfserr_perm;
+	if (!file.f_op->write)
+		goto out_close;
+#ifdef MSNFS
+	if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
+		(!lock_may_write(file.f_dentry->d_inode, offset, cnt)))
+		goto out_close;
+#endif
+
+	dentry = file.f_dentry;
+	inode = dentry->d_inode;
+	exp   = fhp->fh_export;
+
+	/*
+	 * Request sync writes if
+	 *  -	the sync export option has been set, or
+	 *  -	the client requested O_SYNC behavior (NFSv3 feature).
+	 *  -   The file system doesn't support fsync().
+	 * When gathered writes have been configured for this volume,
+	 * flushing the data to disk is handled separately below.
+	 */
+
+	if (file.f_op->fsync == 0) {/* COMMIT3 cannot work */
+	       stable = 2;
+	       *stablep = 2; /* FILE_SYNC */
+	}
+
+	if (!EX_ISSYNC(exp))
+		stable = 0;
+	if (stable && !EX_WGATHER(exp))
+		file.f_flags |= O_SYNC;
+
+	file.f_pos = offset;		/* set write offset */
+
+	/* Write the data. */
+	oldfs = get_fs(); set_fs(KERNEL_DS);
+	err = file.f_op->write(&file, buf, cnt, &file.f_pos);
+	if (err >= 0)
+		nfsdstats.io_write += cnt;
+	set_fs(oldfs);
+
+	/* clear setuid/setgid flag after write */
+	if (err >= 0 && (inode->i_mode & (S_ISUID | S_ISGID))) {
+		struct iattr	ia;
+
+		ia.ia_valid = ATTR_MODE;
+		ia.ia_mode  = inode->i_mode & ~(S_ISUID | S_ISGID);
+		notify_change(dentry, &ia);
+	}
+
+	if (err >= 0 && stable) {
+		static unsigned long	last_ino;
+		static kdev_t		last_dev = NODEV;
+
+		/*
+		 * Gathered writes: If another process is currently
+		 * writing to the file, there's a high chance
+		 * this is another nfsd (triggered by a bulk write
+		 * from a client's biod). Rather than syncing the
+		 * file with each write request, we sleep for 10 msec.
+		 *
+		 * I don't know if this roughly approximates
+		 * C. Juszak's idea of gathered writes, but it's a
+		 * nice and simple solution (IMHO), and it seems to
+		 * work:-)
+		 */
+		if (EX_WGATHER(exp)) {
+			if (atomic_read(&inode->i_writecount) > 1
+			    || (last_ino == inode->i_ino && last_dev == inode->i_dev)) {
+				dprintk("nfsd: write defer %d\n", current->pid);
+				set_current_state(TASK_UNINTERRUPTIBLE);
+				schedule_timeout((HZ+99)/100);
+				current->state = TASK_RUNNING;
+				dprintk("nfsd: write resume %d\n", current->pid);
+			}
+
+			if (inode->i_state & I_DIRTY) {
+				dprintk("nfsd: write sync %d\n", current->pid);
+				nfsd_sync(&file);
+			}
+#if 0
+			wake_up(&inode->i_wait);
+#endif
+		}
+		last_ino = inode->i_ino;
+		last_dev = inode->i_dev;
+	}
+
+	dprintk("nfsd: write complete err=%d\n", err);
+	if (err >= 0)
+		err = 0;
+	else 
+		err = nfserrno(err);
+out_close:
+	nfsd_close(&file);
+out:
+	return err;
+}
+
+
+#ifdef CONFIG_NFSD_V3
+/*
+ * Commit all pending writes to stable storage.
+ * Strictly speaking, we could sync just the indicated file region here,
+ * but there's currently no way we can ask the VFS to do so.
+ *
+ * Unfortunately we cannot lock the file to make sure we return full WCC
+ * data to the client, as locking happens lower down in the filesystem.
+ */
+int
+nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
+               off_t offset, unsigned long count)
+{
+	struct file	file;
+	int		err;
+
+	if ((err = nfsd_open(rqstp, fhp, S_IFREG, MAY_WRITE, &file)) != 0)
+		return err;
+	if (EX_ISSYNC(fhp->fh_export)) {
+		if (file.f_op && file.f_op->fsync) {
+			nfsd_sync(&file);
+		} else {
+			err = nfserr_notsupp;
+		}
+	}
+
+	nfsd_close(&file);
+	return err;
+}
+#endif /* CONFIG_NFSD_V3 */
+
+/*
+ * Create a file (regular, directory, device, fifo); UNIX sockets 
+ * not yet implemented.
+ * If the response fh has been verified, the parent directory should
+ * already be locked. Note that the parent directory is left locked.
+ *
+ * N.B. Every call to nfsd_create needs an fh_put for _both_ fhp and resfhp
+ */
+int
+nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
+		char *fname, int flen, struct iattr *iap,
+		int type, dev_t rdev, struct svc_fh *resfhp)
+{
+	struct dentry	*dentry, *dchild;
+	struct inode	*dirp;
+	int		err;
+
+	err = nfserr_perm;
+	if (!flen)
+		goto out;
+	err = nfserr_exist;
+	if (isdotent(fname, flen))
+		goto out;
+
+	err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE);
+	if (err)
+		goto out;
+
+	dentry = fhp->fh_dentry;
+	dirp = dentry->d_inode;
+
+	err = nfserr_notdir;
+	if(!dirp->i_op || !dirp->i_op->lookup)
+		goto out;
+	/*
+	 * Check whether the response file handle has been verified yet.
+	 * If it has, the parent directory should already be locked.
+	 */
+	if (!resfhp->fh_dentry) {
+		/* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */
+		fh_lock(fhp);
+		dchild = lookup_one_len(fname, dentry, flen);
+		err = PTR_ERR(dchild);
+		if (IS_ERR(dchild))
+			goto out_nfserr;
+		err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
+		if (err)
+			goto out;
+	} else {
+		/* called from nfsd_proc_create */
+		dchild = resfhp->fh_dentry;
+		if (!fhp->fh_locked) {
+			/* not actually possible */
+			printk(KERN_ERR
+				"nfsd_create: parent %s/%s not locked!\n",
+				dentry->d_parent->d_name.name,
+				dentry->d_name.name);
+			err = -EIO;
+			goto out;
+		}
+	}
+	/*
+	 * Make sure the child dentry is still negative ...
+	 */
+	err = nfserr_exist;
+	if (dchild->d_inode) {
+		dprintk("nfsd_create: dentry %s/%s not negative!\n",
+			dentry->d_name.name, dchild->d_name.name);
+		goto out; 
+	}
+
+	if (!(iap->ia_valid & ATTR_MODE))
+		iap->ia_mode = 0;
+	iap->ia_mode = (iap->ia_mode & S_IALLUGO) | type;
+
+	/*
+	 * Get the dir op function pointer.
+	 */
+	err = nfserr_perm;
+	switch (type) {
+	case S_IFREG:
+		err = vfs_create(dirp, dchild, iap->ia_mode);
+		break;
+	case S_IFDIR:
+		err = vfs_mkdir(dirp, dchild, iap->ia_mode);
+		break;
+	case S_IFCHR:
+	case S_IFBLK:
+	case S_IFIFO:
+	case S_IFSOCK:
+		err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
+		break;
+	default:
+	        printk("nfsd: bad file type %o in nfsd_create\n", type);
+		err = -EINVAL;
+	}
+	if (err < 0)
+		goto out_nfserr;
+
+	if (EX_ISSYNC(fhp->fh_export)) {
+		nfsd_sync_dir(dentry);
+		write_inode_now(dchild->d_inode, 1);
+	}
+
+
+	/* Set file attributes. Mode has already been set and
+	 * setting uid/gid works only for root. Irix appears to
+	 * send along the gid when it tries to implement setgid
+	 * directories via NFS.
+	 */
+	err = 0;
+	if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID|ATTR_MODE)) != 0)
+		err = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
+	/*
+	 * Update the file handle to get the new inode info.
+	 */
+	if (!err)
+		err = fh_update(resfhp);
+out:
+	return err;
+
+out_nfserr:
+	err = nfserrno(err);
+	goto out;
+}
+
+#ifdef CONFIG_NFSD_V3
+/*
+ * NFSv3 version of nfsd_create
+ */
+int
+nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
+		char *fname, int flen, struct iattr *iap,
+		struct svc_fh *resfhp, int createmode, u32 *verifier)
+{
+	struct dentry	*dentry, *dchild;
+	struct inode	*dirp;
+	int		err;
+	__u32		v_mtime=0, v_atime=0;
+	int		v_mode=0;
+
+	err = nfserr_perm;
+	if (!flen)
+		goto out;
+	err = nfserr_exist;
+	if (isdotent(fname, flen))
+		goto out;
+	if (!(iap->ia_valid & ATTR_MODE))
+		iap->ia_mode = 0;
+	err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE);
+	if (err)
+		goto out;
+
+	dentry = fhp->fh_dentry;
+	dirp = dentry->d_inode;
+
+	/* Get all the sanity checks out of the way before
+	 * we lock the parent. */
+	err = nfserr_notdir;
+	if(!dirp->i_op || !dirp->i_op->lookup)
+		goto out;
+	fh_lock(fhp);
+
+	/*
+	 * Compose the response file handle.
+	 */
+	dchild = lookup_one_len(fname, dentry, flen);
+	err = PTR_ERR(dchild);
+	if (IS_ERR(dchild))
+		goto out_nfserr;
+
+	err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
+	if (err)
+		goto out;
+
+	if (createmode == NFS3_CREATE_EXCLUSIVE) {
+		/* while the verifier would fit in mtime+atime,
+		 * solaris7 gets confused (bugid 4218508) if these have
+		 * the high bit set, so we use the mode as well
+		 */
+		v_mtime = verifier[0]&0x7fffffff;
+		v_atime = verifier[1]&0x7fffffff;
+		v_mode  = S_IFREG
+			| ((verifier[0]&0x80000000) >> (32-7)) /* u+x */
+			| ((verifier[1]&0x80000000) >> (32-9)) /* u+r */
+			;
+	}
+	
+	if (dchild->d_inode) {
+		err = 0;
+
+		switch (createmode) {
+		case NFS3_CREATE_UNCHECKED:
+			if (! S_ISREG(dchild->d_inode->i_mode))
+				err = nfserr_exist;
+			else {
+				iap->ia_valid &= ATTR_SIZE;
+				goto set_attr;
+			}
+			break;
+		case NFS3_CREATE_EXCLUSIVE:
+			if (   dchild->d_inode->i_mtime == v_mtime
+			    && dchild->d_inode->i_atime == v_atime
+			    && dchild->d_inode->i_mode  == v_mode
+			    && dchild->d_inode->i_size  == 0 )
+				break;
+			 /* fallthru */
+		case NFS3_CREATE_GUARDED:
+			err = nfserr_exist;
+		}
+		goto out;
+	}
+
+	err = vfs_create(dirp, dchild, iap->ia_mode);
+	if (err < 0)
+		goto out_nfserr;
+
+	if (EX_ISSYNC(fhp->fh_export)) {
+		nfsd_sync_dir(dentry);
+		/* setattr will sync the child (or not) */
+	}
+
+	/*
+	 * Update the filehandle to get the new inode info.
+	 */
+	err = fh_update(resfhp);
+	if (err)
+		goto out;
+
+	if (createmode == NFS3_CREATE_EXCLUSIVE) {
+		/* Cram the verifier into atime/mtime/mode */
+		iap->ia_valid = ATTR_MTIME|ATTR_ATIME
+			| ATTR_MTIME_SET|ATTR_ATIME_SET
+			| ATTR_MODE;
+		iap->ia_mtime = v_mtime;
+		iap->ia_atime = v_atime;
+		iap->ia_mode  = v_mode;
+	}
+
+	/* Set file attributes.
+	 * Mode has already been set but we might need to reset it
+	 * for CREATE_EXCLUSIVE
+	 * Irix appears to send along the gid when it tries to
+	 * implement setgid directories via NFS. Clear out all that cruft.
+	 */
+ set_attr:
+	if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID)) != 0)
+ 		err = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
+
+ out:
+	fh_unlock(fhp);
+ 	return err;
+ 
+ out_nfserr:
+	err = nfserrno(err);
+	goto out;
+}
+#endif /* CONFIG_NFSD_V3 */
+
+/*
+ * Read a symlink. On entry, *lenp must contain the maximum path length that
+ * fits into the buffer. On return, it contains the true length.
+ * N.B. After this call fhp needs an fh_put
+ */
+int
+nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp)
+{
+	struct dentry	*dentry;
+	struct inode	*inode;
+	mm_segment_t	oldfs;
+	int		err;
+
+	err = fh_verify(rqstp, fhp, S_IFLNK, MAY_NOP);
+	if (err)
+		goto out;
+
+	dentry = fhp->fh_dentry;
+	inode = dentry->d_inode;
+
+	err = nfserr_inval;
+	if (!inode->i_op || !inode->i_op->readlink)
+		goto out;
+
+	UPDATE_ATIME(inode);
+	/* N.B. Why does this call need a get_fs()??
+	 * Remove the set_fs and watch the fireworks:-) --okir
+	 */
+
+	oldfs = get_fs(); set_fs(KERNEL_DS);
+	err = inode->i_op->readlink(dentry, buf, *lenp);
+	set_fs(oldfs);
+
+	if (err < 0)
+		goto out_nfserr;
+	*lenp = err;
+	err = 0;
+out:
+	return err;
+
+out_nfserr:
+	err = nfserrno(err);
+	goto out;
+}
+
+/*
+ * Create a symlink and look up its inode
+ * N.B. After this call _both_ fhp and resfhp need an fh_put
+ */
+int
+nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
+				char *fname, int flen,
+				char *path,  int plen,
+				struct svc_fh *resfhp,
+				struct iattr *iap)
+{
+	struct dentry	*dentry, *dnew;
+	int		err, cerr;
+
+	err = nfserr_noent;
+	if (!flen || !plen)
+		goto out;
+	err = nfserr_exist;
+	if (isdotent(fname, flen))
+		goto out;
+
+	err = fh_verify(rqstp, fhp, S_IFDIR, MAY_CREATE);
+	if (err)
+		goto out;
+	fh_lock(fhp);
+	dentry = fhp->fh_dentry;
+	dnew = lookup_one_len(fname, dentry, flen);
+	err = PTR_ERR(dnew);
+	if (IS_ERR(dnew))
+		goto out_nfserr;
+
+	err = vfs_symlink(dentry->d_inode, dnew, path);
+	if (!err) {
+		if (EX_ISSYNC(fhp->fh_export))
+			nfsd_sync_dir(dentry);
+		if (iap) {
+			iap->ia_valid &= ATTR_MODE /* ~(ATTR_MODE|ATTR_UID|ATTR_GID)*/;
+			if (iap->ia_valid) {
+				iap->ia_valid |= ATTR_CTIME;
+				iap->ia_mode = (iap->ia_mode&S_IALLUGO)
+					| S_IFLNK;
+				err = notify_change(dnew, iap);
+				if (!err && EX_ISSYNC(fhp->fh_export))
+					write_inode_now(dentry->d_inode, 1);
+		       }
+		}
+	} else
+		err = nfserrno(err);
+	fh_unlock(fhp);
+
+	/* Compose the fh so the dentry will be freed ... */
+	cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp);
+	if (err==0) err = cerr;
+out:
+	return err;
+
+out_nfserr:
+	err = nfserrno(err);
+	goto out;
+}
+
+/*
+ * Create a hardlink
+ * N.B. After this call _both_ ffhp and tfhp need an fh_put
+ */
+int
+nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
+				char *name, int len, struct svc_fh *tfhp)
+{
+	struct dentry	*ddir, *dnew, *dold;
+	struct inode	*dirp, *dest;
+	int		err;
+
+	err = fh_verify(rqstp, ffhp, S_IFDIR, MAY_CREATE);
+	if (err)
+		goto out;
+	err = fh_verify(rqstp, tfhp, -S_IFDIR, MAY_NOP);
+	if (err)
+		goto out;
+
+	err = nfserr_perm;
+	if (!len)
+		goto out;
+	err = nfserr_exist;
+	if (isdotent(name, len))
+		goto out;
+
+	fh_lock(ffhp);
+	ddir = ffhp->fh_dentry;
+	dirp = ddir->d_inode;
+
+	dnew = lookup_one_len(name, ddir, len);
+	err = PTR_ERR(dnew);
+	if (IS_ERR(dnew))
+		goto out_nfserr;
+
+	dold = tfhp->fh_dentry;
+	dest = dold->d_inode;
+
+	err = vfs_link(dold, dirp, dnew);
+	if (!err) {
+		if (EX_ISSYNC(ffhp->fh_export)) {
+			nfsd_sync_dir(ddir);
+			write_inode_now(dest, 1);
+		}
+	} else {
+		if (err == -EXDEV && rqstp->rq_vers == 2)
+			err = nfserr_acces;
+		else
+			err = nfserrno(err);
+	}
+
+	fh_unlock(ffhp);
+	dput(dnew);
+out:
+	return err;
+
+out_nfserr:
+	err = nfserrno(err);
+	goto out;
+}
+
+/*
+ * Rename a file
+ * N.B. After this call _both_ ffhp and tfhp need an fh_put
+ */
+int
+nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
+			    struct svc_fh *tfhp, char *tname, int tlen)
+{
+	struct dentry	*fdentry, *tdentry, *odentry, *ndentry;
+	struct inode	*fdir, *tdir;
+	int		err;
+
+	err = fh_verify(rqstp, ffhp, S_IFDIR, MAY_REMOVE);
+	if (err)
+		goto out;
+	err = fh_verify(rqstp, tfhp, S_IFDIR, MAY_CREATE);
+	if (err)
+		goto out;
+
+	fdentry = ffhp->fh_dentry;
+	fdir = fdentry->d_inode;
+
+	tdentry = tfhp->fh_dentry;
+	tdir = tdentry->d_inode;
+
+	err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev;
+	if (fdir->i_dev != tdir->i_dev)
+		goto out;
+
+	err = nfserr_perm;
+	if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen))
+		goto out;
+
+	/* cannot use fh_lock as we need deadlock protective ordering
+	 * so do it by hand */
+	double_down(&tdir->i_sem, &fdir->i_sem);
+	ffhp->fh_locked = tfhp->fh_locked = 1;
+	fill_pre_wcc(ffhp);
+	fill_pre_wcc(tfhp);
+
+	odentry = lookup_one_len(fname, fdentry, flen);
+	err = PTR_ERR(odentry);
+	if (IS_ERR(odentry))
+		goto out_nfserr;
+
+	err = -ENOENT;
+	if (!odentry->d_inode)
+		goto out_dput_old;
+
+	ndentry = lookup_one_len(tname, tdentry, tlen);
+	err = PTR_ERR(ndentry);
+	if (IS_ERR(ndentry))
+		goto out_dput_old;
+
+
+#ifdef MSNFS
+	if ((ffhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
+		((atomic_read(&odentry->d_count) > 1)
+		 || (atomic_read(&ndentry->d_count) > 1))) {
+			err = nfserr_perm;
+	} else
+#endif
+	err = vfs_rename(fdir, odentry, tdir, ndentry);
+	if (!err && EX_ISSYNC(tfhp->fh_export)) {
+		nfsd_sync_dir(tdentry);
+		nfsd_sync_dir(fdentry);
+	}
+	dput(ndentry);
+
+ out_dput_old:
+	dput(odentry);
+ out_nfserr:
+	if (err)
+		err = nfserrno(err);
+
+	/* we cannot reply on fh_unlock on the two filehandles,
+	 * as that would do the wrong thing if the two directories
+	 * were the same, so again we do it by hand
+	 */
+	fill_post_wcc(ffhp);
+	fill_post_wcc(tfhp);
+	double_up(&tdir->i_sem, &fdir->i_sem);
+	ffhp->fh_locked = tfhp->fh_locked = 0;
+	
+out:
+	return err;
+}
+
+/*
+ * Unlink a file or directory
+ * N.B. After this call fhp needs an fh_put
+ */
+int
+nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
+				char *fname, int flen)
+{
+	struct dentry	*dentry, *rdentry;
+	struct inode	*dirp;
+	int		err;
+
+	err = nfserr_acces;
+	if (!flen || isdotent(fname, flen))
+		goto out;
+	err = fh_verify(rqstp, fhp, S_IFDIR, MAY_REMOVE);
+	if (err)
+		goto out;
+
+	fh_lock(fhp);
+	dentry = fhp->fh_dentry;
+	dirp = dentry->d_inode;
+
+	rdentry = lookup_one_len(fname, dentry, flen);
+	err = PTR_ERR(rdentry);
+	if (IS_ERR(rdentry))
+		goto out_nfserr;
+
+	if (!rdentry->d_inode) {
+		dput(rdentry);
+		err = nfserr_noent;
+		goto out;
+	}
+
+	if (type != S_IFDIR) { /* It's UNLINK */
+#ifdef MSNFS
+		if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
+			(atomic_read(&rdentry->d_count) > 1)) {
+			err = nfserr_perm;
+		} else
+#endif
+		err = vfs_unlink(dirp, rdentry);
+	} else { /* It's RMDIR */
+		err = vfs_rmdir(dirp, rdentry);
+	}
+
+	dput(rdentry);
+
+	if (err)
+		goto out_nfserr;
+	if (EX_ISSYNC(fhp->fh_export)) 
+		nfsd_sync_dir(dentry);
+
+out:
+	return err;
+
+out_nfserr:
+	err = nfserrno(err);
+	goto out;
+}
+
+/*
+ * Read entries from a directory.
+ * The verifier is an NFSv3 thing we ignore for now.
+ */
+int
+nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t offset, 
+             encode_dent_fn func, u32 *buffer, int *countp, u32 *verf)
+{
+	struct inode	*inode;
+	u32		*p;
+	int		oldlen, eof, err;
+	struct file	file;
+	struct readdir_cd cd;
+
+	err = nfsd_open(rqstp, fhp, S_IFDIR, MAY_READ, &file);
+	if (err)
+		goto out;
+	if (offset > ~(u32) 0)
+		goto out_close;
+
+	err = nfserr_notdir;
+	if (!file.f_op->readdir)
+		goto out_close;
+	file.f_pos = offset;
+
+	/* Set up the readdir context */
+	memset(&cd, 0, sizeof(cd));
+	cd.rqstp  = rqstp;
+	cd.buffer = buffer;
+	cd.buflen = *countp; /* count of words */
+	cd.dirfh  = fhp;
+
+	/*
+	 * Read the directory entries. This silly loop is necessary because
+	 * readdir() is not guaranteed to fill up the entire buffer, but
+	 * may choose to do less.
+	 */
+	inode = file.f_dentry->d_inode;
+	down(&inode->i_sem);
+	while (1) {
+		oldlen = cd.buflen;
+
+		/*
+		dprintk("nfsd: f_op->readdir(%x/%ld @ %d) buflen = %d (%d)\n",
+			file.f_inode->i_dev, file.f_inode->i_ino,
+			(int) file.f_pos, (int) oldlen, (int) cd.buflen);
+		 */
+		err = file.f_op->readdir(&file, &cd, (filldir_t) func);
+		if (err < 0)
+			goto out_nfserr;
+		if (oldlen == cd.buflen)
+			break;
+		if (cd.eob)
+			break;
+	}
+	up(&inode->i_sem);
+
+	/* If we didn't fill the buffer completely, we're at EOF */
+	eof = !cd.eob;
+
+	if (cd.offset) {
+		if (rqstp->rq_vers == 3)
+			(void)xdr_encode_hyper(cd.offset, file.f_pos);
+		else
+			*cd.offset = htonl(file.f_pos);
+	}
+
+	p = cd.buffer;
+	*p++ = 0;			/* no more entries */
+	*p++ = htonl(eof);		/* end of directory */
+	*countp = (caddr_t) p - (caddr_t) buffer;
+
+	dprintk("nfsd: readdir result %d bytes, eof %d offset %d\n",
+				*countp, eof,
+				cd.offset? ntohl(*cd.offset) : -1);
+	err = 0;
+out_close:
+	nfsd_close(&file);
+out:
+	return err;
+
+out_nfserr:
+	up(&inode->i_sem);
+	err = nfserrno(err);
+	goto out_close;
+}
+
+/*
+ * Get file system stats
+ * N.B. After this call fhp needs an fh_put
+ */
+int
+nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct statfs *stat)
+{
+	int err = fh_verify(rqstp, fhp, 0, MAY_NOP);
+	if (!err && vfs_statfs(fhp->fh_dentry->d_inode->i_sb,stat))
+		err = nfserr_io;
+	return err;
+}
+
+/*
+ * Check for a user's access permissions to this inode.
+ */
+int
+nfsd_permission(struct svc_export *exp, struct dentry *dentry, int acc)
+{
+	struct inode	*inode = dentry->d_inode;
+	int		err;
+
+	if (acc == MAY_NOP)
+		return 0;
+#if 0
+	dprintk("nfsd: permission 0x%x%s%s%s%s%s%s%s mode 0%o%s%s%s%s\n",
+		acc,
+		(acc & MAY_READ)?	" read"  : "",
+		(acc & MAY_WRITE)?	" write" : "",
+		(acc & MAY_EXEC)?	" exec"  : "",
+		(acc & MAY_SATTR)?	" sattr" : "",
+		(acc & MAY_TRUNC)?	" trunc" : "",
+		(acc & MAY_LOCK)?	" lock"  : "",
+		(acc & MAY_OWNER_OVERRIDE)? " owneroverride" : "",
+		inode->i_mode,
+		IS_IMMUTABLE_FILE(inode)? " immut(F)" : "",
+		IS_IMMUTABLE_LINK(inode)? " immut(L)" : "",
+		IS_APPEND(inode)?	" append" : "",
+		IS_RDONLY(inode)?	" ro" : "");
+	dprintk("      owner %d/%d user %d/%d\n",
+		inode->i_uid, inode->i_gid, current->fsuid, current->fsgid);
+#endif
+
+	/* only care about readonly exports for files and
+	 * directories. links don't have meaningful write access,
+	 * and all else is local to the client
+	 */
+	if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)) 
+		if (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC)) {
+			if (EX_RDONLY(exp) || IS_RDONLY(inode))
+				return nfserr_rofs;
+			if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE_FILE(inode))
+				return nfserr_perm;
+		}
+	if ((acc & MAY_TRUNC) && IS_APPEND(inode))
+		return nfserr_perm;
+
+	if (acc & MAY_LOCK) {
+		/* If we cannot rely on authentication in NLM requests,
+		 * just allow locks, otherwise require read permission, or
+		 * ownership
+		 */
+		if (exp->ex_flags & NFSEXP_NOAUTHNLM)
+			return 0;
+		else
+			acc = MAY_READ | MAY_OWNER_OVERRIDE;
+	}
+	/*
+	 * The file owner always gets access permission for accesses that
+	 * would normally be checked at open time. This is to make
+	 * file access work even when the client has done a fchmod(fd, 0).
+	 *
+	 * However, `cp foo bar' should fail nevertheless when bar is
+	 * readonly. A sensible way to do this might be to reject all
+	 * attempts to truncate a read-only file, because a creat() call
+	 * always implies file truncation.
+	 * ... but this isn't really fair.  A process may reasonably call
+	 * ftruncate on an open file descriptor on a file with perm 000.
+	 * We must trust the client to do permission checking - using "ACCESS"
+	 * with NFSv3.
+	 */
+	if ((acc & MAY_OWNER_OVERRIDE) &&
+	    inode->i_uid == current->fsuid)
+		return 0;
+
+	acc &= ~ MAY_OWNER_OVERRIDE; /* This bit is no longer needed,
+                                        and gets in the way later */
+
+	err = permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC));
+
+	/* Allow read access to binaries even when mode 111 */
+	if (err == -EACCES && S_ISREG(inode->i_mode) && acc == MAY_READ)
+		err = permission(inode, MAY_EXEC);
+
+	return err? nfserrno(err) : 0;
+}
+
+void
+nfsd_racache_shutdown(void)
+{
+	if (!raparm_cache)
+		return;
+	dprintk("nfsd: freeing readahead buffers.\n");
+	kfree(raparml);
+	raparm_cache = raparml = NULL;
+}
+/*
+ * Initialize readahead param cache
+ */
+int
+nfsd_racache_init(int cache_size)
+{
+	int	i;
+
+	if (raparm_cache)
+		return 0;
+	raparml = kmalloc(sizeof(struct raparms) * cache_size, GFP_KERNEL);
+
+	if (raparml != NULL) {
+		dprintk("nfsd: allocating %d readahead buffers.\n",
+			cache_size);
+		memset(raparml, 0, sizeof(struct raparms) * cache_size);
+		for (i = 0; i < cache_size - 1; i++) {
+			raparml[i].p_next = raparml + i + 1;
+		}
+		raparm_cache = raparml;
+	} else {
+		printk(KERN_WARNING
+		       "nfsd: Could not allocate memory read-ahead cache.\n");
+		return -ENOMEM;
+	}
+	nfsdstats.ra_size = cache_size;
+	return 0;
+}
diff -urN linux-2.4.16-reiserfspatches-immutable/fs/open.c linux-2.4.16-reiserfspatches-immutable-ctx4/fs/open.c
--- linux-2.4.16-reiserfspatches-immutable/fs/open.c	Mon Dec 10 14:28:03 2001
+++ linux-2.4.16-reiserfspatches-immutable-ctx4/fs/open.c	Mon Dec 10 15:01:47 2001
@@ -122,7 +122,7 @@
 		goto dput_and_out;
 
 	error = -EPERM;
-	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+	if (IS_IMMUTABLE_FILE(inode) || IS_APPEND(inode))
 		goto dput_and_out;
 
 	/*
diff -urN linux-2.4.16-reiserfspatches-immutable/fs/open.c~ linux-2.4.16-reiserfspatches-immutable-ctx4/fs/open.c~
--- linux-2.4.16-reiserfspatches-immutable/fs/open.c~	Thu Jan  1 01:00:00 1970
+++ linux-2.4.16-reiserfspatches-immutable-ctx4/fs/open.c~	Mon Dec 10 14:28:03 2001
@@ -0,0 +1,895 @@
+/*
+ *  linux/fs/open.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/utime.h>
+#include <linux/file.h>
+#include <linux/smp_lock.h>
+#include <linux/quotaops.h>
+#include <linux/dnotify.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/tty.h>
+#include <linux/iobuf.h>
+
+#include <asm/uaccess.h>
+
+#define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m))
+
+int vfs_statfs(struct super_block *sb, struct statfs *buf)
+{
+	int retval = -ENODEV;
+
+	if (sb) {
+		retval = -ENOSYS;
+		if (sb->s_op && sb->s_op->statfs) {
+			memset(buf, 0, sizeof(struct statfs));
+			lock_kernel();
+			retval = sb->s_op->statfs(sb, buf);
+			unlock_kernel();
+		}
+	}
+	return retval;
+}
+
+
+asmlinkage long sys_statfs(const char * path, struct statfs * buf)
+{
+	struct nameidata nd;
+	int error;
+
+	error = user_path_walk(path, &nd);
+	if (!error) {
+		struct statfs tmp;
+		error = vfs_statfs(nd.dentry->d_inode->i_sb, &tmp);
+		if (!error && copy_to_user(buf, &tmp, sizeof(struct statfs)))
+			error = -EFAULT;
+		path_release(&nd);
+	}
+	return error;
+}
+
+asmlinkage long sys_fstatfs(unsigned int fd, struct statfs * buf)
+{
+	struct file * file;
+	struct statfs tmp;
+	int error;
+
+	error = -EBADF;
+	file = fget(fd);
+	if (!file)
+		goto out;
+	error = vfs_statfs(file->f_dentry->d_inode->i_sb, &tmp);
+	if (!error && copy_to_user(buf, &tmp, sizeof(struct statfs)))
+		error = -EFAULT;
+	fput(file);
+out:
+	return error;
+}
+
+int do_truncate(struct dentry *dentry, loff_t length)
+{
+	struct inode *inode = dentry->d_inode;
+	int error;
+	struct iattr newattrs;
+
+	/* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */
+	if (length < 0)
+		return -EINVAL;
+
+	down(&inode->i_sem);
+	newattrs.ia_size = length;
+	newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
+	error = notify_change(dentry, &newattrs);
+	up(&inode->i_sem);
+	return error;
+}
+
+static inline long do_sys_truncate(const char * path, loff_t length)
+{
+	struct nameidata nd;
+	struct inode * inode;
+	int error;
+
+	error = -EINVAL;
+	if (length < 0)	/* sorry, but loff_t says... */
+		goto out;
+
+	error = user_path_walk(path, &nd);
+	if (error)
+		goto out;
+	inode = nd.dentry->d_inode;
+
+	/* For directories it's -EISDIR, for other non-regulars - -EINVAL */
+	error = -EISDIR;
+	if (S_ISDIR(inode->i_mode))
+		goto dput_and_out;
+
+	error = -EINVAL;
+	if (!S_ISREG(inode->i_mode))
+		goto dput_and_out;
+
+	error = permission(inode,MAY_WRITE);
+	if (error)
+		goto dput_and_out;
+
+	error = -EROFS;
+	if (IS_RDONLY(inode))
+		goto dput_and_out;
+
+	error = -EPERM;
+	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+		goto dput_and_out;
+
+	/*
+	 * Make sure that there are no leases.
+	 */
+	error = get_lease(inode, FMODE_WRITE);
+	if (error)
+		goto dput_and_out;
+
+	error = get_write_access(inode);
+	if (error)
+		goto dput_and_out;
+
+	error = locks_verify_truncate(inode, NULL, length);
+	if (!error) {
+		DQUOT_INIT(inode);
+		error = do_truncate(nd.dentry, length);
+	}
+	put_write_access(inode);
+
+dput_and_out:
+	path_release(&nd);
+out:
+	return error;
+}
+
+asmlinkage long sys_truncate(const char * path, unsigned long length)
+{
+	/* on 32-bit boxen it will cut the range 2^31--2^32-1 off */
+	return do_sys_truncate(path, (long)length);
+}
+
+static inline long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
+{
+	struct inode * inode;
+	struct dentry *dentry;
+	struct file * file;
+	int error;
+
+	error = -EINVAL;
+	if (length < 0)
+		goto out;
+	error = -EBADF;
+	file = fget(fd);
+	if (!file)
+		goto out;
+
+	/* explicitly opened as large or we are on 64-bit box */
+	if (file->f_flags & O_LARGEFILE)
+		small = 0;
+
+	dentry = file->f_dentry;
+	inode = dentry->d_inode;
+	error = -EINVAL;
+	if (!S_ISREG(inode->i_mode) || !(file->f_mode & FMODE_WRITE))
+		goto out_putf;
+
+	error = -EINVAL;
+	/* Cannot ftruncate over 2^31 bytes without large file support */
+	if (small && length > MAX_NON_LFS)
+		goto out_putf;
+
+	error = -EPERM;
+	if (IS_APPEND(inode))
+		goto out_putf;
+
+	error = locks_verify_truncate(inode, file, length);
+	if (!error)
+		error = do_truncate(dentry, length);
+out_putf:
+	fput(file);
+out:
+	return error;
+}
+
+asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length)
+{
+	return do_sys_ftruncate(fd, length, 1);
+}
+
+/* LFS versions of truncate are only needed on 32 bit machines */
+#if BITS_PER_LONG == 32
+asmlinkage long sys_truncate64(const char * path, loff_t length)
+{
+	return do_sys_truncate(path, length);
+}
+
+asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length)
+{
+	return do_sys_ftruncate(fd, length, 0);
+}
+#endif
+
+#if !(defined(__alpha__) || defined(__ia64__))
+
+/*
+ * sys_utime() can be implemented in user-level using sys_utimes().
+ * Is this for backwards compatibility?  If so, why not move it
+ * into the appropriate arch directory (for those architectures that
+ * need it).
+ */
+
+/* If times==NULL, set access and modification to current time,
+ * must be owner or have write permission.
+ * Else, update from *times, must be owner or super user.
+ */
+asmlinkage long sys_utime(char * filename, struct utimbuf * times)
+{
+	int error;
+	struct nameidata nd;
+	struct inode * inode;
+	struct iattr newattrs;
+
+	error = user_path_walk(filename, &nd);
+	if (error)
+		goto out;
+	inode = nd.dentry->d_inode;
+
+	error = -EROFS;
+	if (IS_RDONLY(inode))
+		goto dput_and_out;
+
+	/* Don't worry, the checks are done in inode_change_ok() */
+	newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME;
+	if (times) {
+		error = get_user(newattrs.ia_atime, &times->actime);
+		if (!error) 
+			error = get_user(newattrs.ia_mtime, &times->modtime);
+		if (error)
+			goto dput_and_out;
+
+		newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
+	} else {
+		if (current->fsuid != inode->i_uid &&
+		    (error = permission(inode,MAY_WRITE)) != 0)
+			goto dput_and_out;
+	}
+	error = notify_change(nd.dentry, &newattrs);
+dput_and_out:
+	path_release(&nd);
+out:
+	return error;
+}
+
+#endif
+
+/* If times==NULL, set access and modification to current time,
+ * must be owner or have write permission.
+ * Else, update from *times, must be owner or super user.
+ */
+asmlinkage long sys_utimes(char * filename, struct timeval * utimes)
+{
+	int error;
+	struct nameidata nd;
+	struct inode * inode;
+	struct iattr newattrs;
+
+	error = user_path_walk(filename, &nd);
+
+	if (error)
+		goto out;
+	inode = nd.dentry->d_inode;
+
+	error = -EROFS;
+	if (IS_RDONLY(inode))
+		goto dput_and_out;
+
+	/* Don't worry, the checks are done in inode_change_ok() */
+	newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME;
+	if (utimes) {
+		struct timeval times[2];
+		error = -EFAULT;
+		if (copy_from_user(&times, utimes, sizeof(times)))
+			goto dput_and_out;
+		newattrs.ia_atime = times[0].tv_sec;
+		newattrs.ia_mtime = times[1].tv_sec;
+		newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
+	} else {
+		if ((error = permission(inode,MAY_WRITE)) != 0)
+			goto dput_and_out;
+	}
+	error = notify_change(nd.dentry, &newattrs);
+dput_and_out:
+	path_release(&nd);
+out:
+	return error;
+}
+
+/*
+ * access() needs to use the real uid/gid, not the effective uid/gid.
+ * We do this by temporarily clearing all FS-related capabilities and
+ * switching the fsuid/fsgid around to the real ones.
+ */
+asmlinkage long sys_access(const char * filename, int mode)
+{
+	struct nameidata nd;
+	int old_fsuid, old_fsgid;
+	kernel_cap_t old_cap;
+	int res;
+
+	if (mode & ~S_IRWXO)	/* where's F_OK, X_OK, W_OK, R_OK? */
+		return -EINVAL;
+
+	old_fsuid = current->fsuid;
+	old_fsgid = current->fsgid;
+	old_cap = current->cap_effective;
+
+	current->fsuid = current->uid;
+	current->fsgid = current->gid;
+
+	/* Clear the capabilities if we switch to a non-root user */
+	if (current->uid)
+		cap_clear(current->cap_effective);
+	else
+		current->cap_effective = current->cap_permitted;
+
+	res = user_path_walk(filename, &nd);
+	if (!res) {
+		res = permission(nd.dentry->d_inode, mode);
+		/* SuS v2 requires we report a read only fs too */
+		if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode)
+		   && !special_file(nd.dentry->d_inode->i_mode))
+			res = -EROFS;
+		path_release(&nd);
+	}
+
+	current->fsuid = old_fsuid;
+	current->fsgid = old_fsgid;
+	current->cap_effective = old_cap;
+
+	return res;
+}
+
+asmlinkage long sys_chdir(const char * filename)
+{
+	int error;
+	struct nameidata nd;
+	char *name;
+
+	name = getname(filename);
+	error = PTR_ERR(name);
+	if (IS_ERR(name))
+		goto out;
+
+	error = 0;
+	if (path_init(name,LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY,&nd))
+		error = path_walk(name, &nd);
+	putname(name);
+	if (error)
+		goto out;
+
+	error = permission(nd.dentry->d_inode,MAY_EXEC);
+	if (error)
+		goto dput_and_out;
+
+	set_fs_pwd(current->fs, nd.mnt, nd.dentry);
+
+dput_and_out:
+	path_release(&nd);
+out:
+	return error;
+}
+
+asmlinkage long sys_fchdir(unsigned int fd)
+{
+	struct file *file;
+	struct dentry *dentry;
+	struct inode *inode;
+	struct vfsmount *mnt;
+	int error;
+
+	error = -EBADF;
+	file = fget(fd);
+	if (!file)
+		goto out;
+
+	dentry = file->f_dentry;
+	mnt = file->f_vfsmnt;
+	inode = dentry->d_inode;
+
+	error = -ENOTDIR;
+	if (!S_ISDIR(inode->i_mode))
+		goto out_putf;
+
+	error = permission(inode, MAY_EXEC);
+	if (!error)
+		set_fs_pwd(current->fs, mnt, dentry);
+out_putf:
+	fput(file);
+out:
+	return error;
+}
+
+asmlinkage long sys_chroot(const char * filename)
+{
+	int error;
+	struct nameidata nd;
+	char *name;
+
+	name = getname(filename);
+	error = PTR_ERR(name);
+	if (IS_ERR(name))
+		goto out;
+
+	path_init(name, LOOKUP_POSITIVE | LOOKUP_FOLLOW |
+		      LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd);
+	error = path_walk(name, &nd);	
+	putname(name);
+	if (error)
+		goto out;
+
+	error = permission(nd.dentry->d_inode,MAY_EXEC);
+	if (error)
+		goto dput_and_out;
+
+	error = -EPERM;
+	if (!capable(CAP_SYS_CHROOT))
+		goto dput_and_out;
+
+	set_fs_root(current->fs, nd.mnt, nd.dentry);
+	set_fs_altroot();
+	error = 0;
+dput_and_out:
+	path_release(&nd);
+out:
+	return error;
+}
+
+asmlinkage long sys_fchmod(unsigned int fd, mode_t mode)
+{
+	struct inode * inode;
+	struct dentry * dentry;
+	struct file * file;
+	int err = -EBADF;
+	struct iattr newattrs;
+
+	file = fget(fd);
+	if (!file)
+		goto out;
+
+	dentry = file->f_dentry;
+	inode = dentry->d_inode;
+
+	err = -EROFS;
+	if (IS_RDONLY(inode))
+		goto out_putf;
+	err = -EPERM;
+	if (IS_IMMUTABLE_FILE(inode) || IS_APPEND(inode))
+		goto out_putf;
+	if (mode == (mode_t) -1)
+		mode = inode->i_mode;
+	newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
+	newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
+	err = notify_change(dentry, &newattrs);
+
+out_putf:
+	fput(file);
+out:
+	return err;
+}
+
+asmlinkage long sys_chmod(const char * filename, mode_t mode)
+{
+	struct nameidata nd;
+	struct inode * inode;
+	int error;
+	struct iattr newattrs;
+
+	error = user_path_walk(filename, &nd);
+	if (error)
+		goto out;
+	inode = nd.dentry->d_inode;
+
+	error = -EROFS;
+	if (IS_RDONLY(inode))
+		goto dput_and_out;
+
+	error = -EPERM;
+	if (IS_IMMUTABLE_FILE(inode) || IS_APPEND(inode))
+		goto dput_and_out;
+
+	if (mode == (mode_t) -1)
+		mode = inode->i_mode;
+	newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
+	newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
+	error = notify_change(nd.dentry, &newattrs);
+
+dput_and_out:
+	path_release(&nd);
+out:
+	return error;
+}
+
+static int chown_common(struct dentry * dentry, uid_t user, gid_t group)
+{
+	struct inode * inode;
+	int error;
+	struct iattr newattrs;
+
+	error = -ENOENT;
+	if (!(inode = dentry->d_inode)) {
+		printk(KERN_ERR "chown_common: NULL inode\n");
+		goto out;
+	}
+	error = -EROFS;
+	if (IS_RDONLY(inode))
+		goto out;
+	error = -EPERM;
+	if (IS_IMMUTABLE_FILE(inode) || IS_APPEND(inode))
+		goto out;
+	if (user == (uid_t) -1)
+		user = inode->i_uid;
+	if (group == (gid_t) -1)
+		group = inode->i_gid;
+	newattrs.ia_mode = inode->i_mode;
+	newattrs.ia_uid = user;
+	newattrs.ia_gid = group;
+	newattrs.ia_valid =  ATTR_UID | ATTR_GID | ATTR_CTIME;
+	/*
+	 * If the user or group of a non-directory has been changed by a
+	 * non-root user, remove the setuid bit.
+	 * 19981026	David C Niemi <niemi@tux.org>
+	 *
+	 * Changed this to apply to all users, including root, to avoid
+	 * some races. This is the behavior we had in 2.0. The check for
+	 * non-root was definitely wrong for 2.2 anyway, as it should
+	 * have been using CAP_FSETID rather than fsuid -- 19990830 SD.
+	 */
+	if ((inode->i_mode & S_ISUID) == S_ISUID &&
+		!S_ISDIR(inode->i_mode))
+	{
+		newattrs.ia_mode &= ~S_ISUID;
+		newattrs.ia_valid |= ATTR_MODE;
+	}
+	/*
+	 * Likewise, if the user or group of a non-directory has been changed
+	 * by a non-root user, remove the setgid bit UNLESS there is no group
+	 * execute bit (this would be a file marked for mandatory locking).
+	 * 19981026	David C Niemi <niemi@tux.org>
+	 *
+	 * Removed the fsuid check (see the comment above) -- 19990830 SD.
+	 */
+	if (((inode->i_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) 
+		&& !S_ISDIR(inode->i_mode))
+	{
+		newattrs.ia_mode &= ~S_ISGID;
+		newattrs.ia_valid |= ATTR_MODE;
+	}
+	error = notify_change(dentry, &newattrs);
+out:
+	return error;
+}
+
+asmlinkage long sys_chown(const char * filename, uid_t user, gid_t group)
+{
+	struct nameidata nd;
+	int error;
+
+	error = user_path_walk(filename, &nd);
+	if (!error) {
+		error = chown_common(nd.dentry, user, group);
+		path_release(&nd);
+	}
+	return error;
+}
+
+asmlinkage long sys_lchown(const char * filename, uid_t user, gid_t group)
+{
+	struct nameidata nd;
+	int error;
+
+	error = user_path_walk_link(filename, &nd);
+	if (!error) {
+		error = chown_common(nd.dentry, user, group);
+		path_release(&nd);
+	}
+	return error;
+}
+
+
+asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group)
+{
+	struct file * file;
+	int error = -EBADF;
+
+	file = fget(fd);
+	if (file) {
+		error = chown_common(file->f_dentry, user, group);
+		fput(file);
+	}
+	return error;
+}
+
+/*
+ * Note that while the flag value (low two bits) for sys_open means:
+ *	00 - read-only
+ *	01 - write-only
+ *	10 - read-write
+ *	11 - special
+ * it is changed into
+ *	00 - no permissions needed
+ *	01 - read-permission
+ *	10 - write-permission
+ *	11 - read-write
+ * for the internal routines (ie open_namei()/follow_link() etc). 00 is
+ * used by symlinks.
+ */
+struct file *filp_open(const char * filename, int flags, int mode)
+{
+	int namei_flags, error;
+	struct nameidata nd;
+
+	namei_flags = flags;
+	if ((namei_flags+1) & O_ACCMODE)
+		namei_flags++;
+	if (namei_flags & O_TRUNC)
+		namei_flags |= 2;
+
+	error = open_namei(filename, namei_flags, mode, &nd);
+	if (!error)
+		return dentry_open(nd.dentry, nd.mnt, flags);
+
+	return ERR_PTR(error);
+}
+
+struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
+{
+	struct file * f;
+	struct inode *inode;
+	static LIST_HEAD(kill_list);
+	int error;
+
+	error = -ENFILE;
+	f = get_empty_filp();
+	if (!f)
+		goto cleanup_dentry;
+	f->f_flags = flags;
+	f->f_mode = (flags+1) & O_ACCMODE;
+	inode = dentry->d_inode;
+	if (f->f_mode & FMODE_WRITE) {
+		error = get_write_access(inode);
+		if (error)
+			goto cleanup_file;
+	}
+
+	f->f_dentry = dentry;
+	f->f_vfsmnt = mnt;
+	f->f_pos = 0;
+	f->f_reada = 0;
+	f->f_op = fops_get(inode->i_fop);
+	file_move(f, &inode->i_sb->s_files);
+
+	/* preallocate kiobuf for O_DIRECT */
+	f->f_iobuf = NULL;
+	f->f_iobuf_lock = 0;
+	if (f->f_flags & O_DIRECT) {
+		error = alloc_kiovec(1, &f->f_iobuf);
+		if (error)
+			goto cleanup_all;
+	}
+
+	if (f->f_op && f->f_op->open) {
+		error = f->f_op->open(inode,f);
+		if (error)
+			goto cleanup_all;
+	}
+	f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
+
+	return f;
+
+cleanup_all:
+	if (f->f_iobuf)
+		free_kiovec(1, &f->f_iobuf);
+	fops_put(f->f_op);
+	if (f->f_mode & FMODE_WRITE)
+		put_write_access(inode);
+	file_move(f, &kill_list); /* out of the way.. */
+	f->f_dentry = NULL;
+	f->f_vfsmnt = NULL;
+cleanup_file:
+	put_filp(f);
+cleanup_dentry:
+	dput(dentry);
+	mntput(mnt);
+	return ERR_PTR(error);
+}
+
+/*
+ * Find an empty file descriptor entry, and mark it busy.
+ */
+int get_unused_fd(void)
+{
+	struct files_struct * files = current->files;
+	int fd, error;
+
+  	error = -EMFILE;
+	write_lock(&files->file_lock);
+
+repeat:
+ 	fd = find_next_zero_bit(files->open_fds, 
+				files->max_fdset, 
+				files->next_fd);
+
+	/*
+	 * N.B. For clone tasks sharing a files structure, this test
+	 * will limit the total number of files that can be opened.
+	 */
+	if (fd >= current->rlim[RLIMIT_NOFILE].rlim_cur)
+		goto out;
+
+	/* Do we need to expand the fdset array? */
+	if (fd >= files->max_fdset) {
+		error = expand_fdset(files, fd);
+		if (!error) {
+			error = -EMFILE;
+			goto repeat;
+		}
+		goto out;
+	}
+	
+	/* 
+	 * Check whether we need to expand the fd array.
+	 */
+	if (fd >= files->max_fds) {
+		error = expand_fd_array(files, fd);
+		if (!error) {
+			error = -EMFILE;
+			goto repeat;
+		}
+		goto out;
+	}
+
+	FD_SET(fd, files->open_fds);
+	FD_CLR(fd, files->close_on_exec);
+	files->next_fd = fd + 1;
+#if 1
+	/* Sanity check */
+	if (files->fd[fd] != NULL) {
+		printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd);
+		files->fd[fd] = NULL;
+	}
+#endif
+	error = fd;
+
+out:
+	write_unlock(&files->file_lock);
+	return error;
+}
+
+asmlinkage long sys_open(const char * filename, int flags, int mode)
+{
+	char * tmp;
+	int fd, error;
+
+#if BITS_PER_LONG != 32
+	flags |= O_LARGEFILE;
+#endif
+	tmp = getname(filename);
+	fd = PTR_ERR(tmp);
+	if (!IS_ERR(tmp)) {
+		fd = get_unused_fd();
+		if (fd >= 0) {
+			struct file *f = filp_open(tmp, flags, mode);
+			error = PTR_ERR(f);
+			if (IS_ERR(f))
+				goto out_error;
+			fd_install(fd, f);
+		}
+out:
+		putname(tmp);
+	}
+	return fd;
+
+out_error:
+	put_unused_fd(fd);
+	fd = error;
+	goto out;
+}
+
+#ifndef __alpha__
+
+/*
+ * For backward compatibility?  Maybe this should be moved
+ * into arch/i386 instead?
+ */
+asmlinkage long sys_creat(const char * pathname, int mode)
+{
+	return sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode);
+}
+
+#endif
+
+/*
+ * "id" is the POSIX thread ID. We use the
+ * files pointer for this..
+ */
+int filp_close(struct file *filp, fl_owner_t id)
+{
+	int retval;
+
+	if (!file_count(filp)) {
+		printk(KERN_ERR "VFS: Close: file count is 0\n");
+		return 0;
+	}
+	retval = 0;
+	if (filp->f_op && filp->f_op->flush) {
+		lock_kernel();
+		retval = filp->f_op->flush(filp);
+		unlock_kernel();
+	}
+	fcntl_dirnotify(0, filp, 0);
+	locks_remove_posix(filp, id);
+	fput(filp);
+	return retval;
+}
+
+/*
+ * Careful here! We test whether the file pointer is NULL before
+ * releasing the fd. This ensures that one clone task can't release
+ * an fd while another clone is opening it.
+ */
+asmlinkage long sys_close(unsigned int fd)
+{
+	struct file * filp;
+	struct files_struct *files = current->files;
+
+	write_lock(&files->file_lock);
+	if (fd >= files->max_fds)
+		goto out_unlock;
+	filp = files->fd[fd];
+	if (!filp)
+		goto out_unlock;
+	files->fd[fd] = NULL;
+	FD_CLR(fd, files->close_on_exec);
+	__put_unused_fd(files, fd);
+	write_unlock(&files->file_lock);
+	return filp_close(filp, files);
+
+out_unlock:
+	write_unlock(&files->file_lock);
+	return -EBADF;
+}
+
+/*
+ * This routine simulates a hangup on the tty, to arrange that users
+ * are given clean terminals at login time.
+ */
+asmlinkage long sys_vhangup(void)
+{
+	if (capable(CAP_SYS_TTY_CONFIG)) {
+		tty_vhangup(current->tty);
+		return 0;
+	}
+	return -EPERM;
+}
+
+/*
+ * Called when an inode is about to be open.
+ * We use this to disallow opening RW large files on 32bit systems if
+ * the caller didn't specify O_LARGEFILE.  On 64bit systems we force
+ * on this flag in sys_open.
+ */
+int generic_file_open(struct inode * inode, struct file * filp)
+{
+	if (!(filp->f_flags & O_LARGEFILE) && inode->i_size > MAX_NON_LFS)
+		return -EFBIG;
+	return 0;
+}
+
+EXPORT_SYMBOL(generic_file_open);
diff -urN linux-2.4.16-reiserfspatches-immutable/fs/proc/array.c linux-2.4.16-reiserfspatches-immutable-ctx4/fs/proc/array.c
--- linux-2.4.16-reiserfspatches-immutable/fs/proc/array.c	Mon Dec 10 13:12:23 2001
+++ linux-2.4.16-reiserfspatches-immutable-ctx4/fs/proc/array.c	Mon Dec 10 15:01:47 2001
@@ -263,10 +263,12 @@
 {
     return buffer + sprintf(buffer, "CapInh:\t%016x\n"
 			    "CapPrm:\t%016x\n"
-			    "CapEff:\t%016x\n",
+			    "CapEff:\t%016x\n"
+			    "CapBset:\t%016x\n",
 			    cap_t(p->cap_inheritable),
 			    cap_t(p->cap_permitted),
-			    cap_t(p->cap_effective));
+			    cap_t(p->cap_effective),
+			    cap_t(p->cap_bset));
 }
 
 
@@ -288,6 +290,18 @@
 	}
 	buffer = task_sig(task, buffer);
 	buffer = task_cap(task, buffer);
+	buffer += sprintf (buffer,"s_context: %d\n",task->s_context);
+	buffer += sprintf (buffer,"ipv4root: %08lx\n",task->ipv4root);
+	if (task->s_info != NULL){
+		buffer += sprintf (buffer,"ctxticks: %d %d %d\n"
+			,atomic_read(&task->s_info->ticks),task->counter
+			,task->s_info->refcount);
+		buffer += sprintf (buffer,"ctxflags: %d\n"
+			,task->s_info->flags);
+	}else{
+		buffer += sprintf (buffer,"ctxticks: none\n");
+		buffer += sprintf (buffer,"ctxflags: none\n");
+	}
 #if defined(CONFIG_ARCH_S390)
 	buffer = task_show_regs(task, buffer);
 #endif
diff -urN linux-2.4.16-reiserfspatches-immutable/fs/proc/base.c linux-2.4.16-reiserfspatches-immutable-ctx4/fs/proc/base.c
--- linux-2.4.16-reiserfspatches-immutable/fs/proc/base.c	Mon Dec 10 14:28:03 2001
+++ linux-2.4.16-reiserfspatches-immutable-ctx4/fs/proc/base.c	Mon Dec 10 15:01:47 2001
@@ -966,6 +966,11 @@
 	if (!task)
 		goto out;
 
+	if (pid != 1
+		&& current->s_context != 1
+		&& task->s_context != current->s_context){
+		goto out;
+	}
 	inode = proc_pid_make_inode(dir->i_sb, task, PROC_PID_INO);
 
 	free_task_struct(task);
@@ -1012,6 +1017,16 @@
 		int pid = p->pid;
 		if (!pid)
 			continue;
+		/* Even if the pid 1 is not part of the security context */
+		/* we show it anyway. This makes the security box */
+		/* more standard (and helps pstree do its job) */
+		/* So current process "knows" pid 1 exist anyway and can't */
+		/* send any signal either */
+
+		/* A process with security context 1 can see all processes */
+		if (pid != 1
+			&& current->s_context != 1
+			&& p->s_context != current->s_context) continue;
 		if (--index >= 0)
 			continue;
 		pids[nr_pids] = pid;
diff -urN linux-2.4.16-reiserfspatches-immutable/fs/proc/base.c~ linux-2.4.16-reiserfspatches-immutable-ctx4/fs/proc/base.c~
--- linux-2.4.16-reiserfspatches-immutable/fs/proc/base.c~	Thu Jan  1 01:00:00 1970
+++ linux-2.4.16-reiserfspatches-immutable-ctx4/fs/proc/base.c~	Mon Dec 10 14:28:03 2001
@@ -0,0 +1,1055 @@
+/*
+ *  linux/fs/proc/base.c
+ *
+ *  Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ *  proc base directory handling functions
+ *
+ *  1999, Al Viro. Rewritten. Now it covers the whole per-process part.
+ *  Instead of using magical inumbers to determine the kind of object
+ *  we allocate and fill in-core inodes upon lookup. They don't even
+ *  go into icache. We cache the reference to task_struct upon lookup too.
+ *  Eventually it should become a filesystem in its own. We don't use the
+ *  rest of procfs anymore.
+ */
+
+#include <asm/uaccess.h>
+
+#include <linux/config.h>
+#include <linux/errno.h>
+#include <linux/sched.h>
+#include <linux/proc_fs.h>
+#include <linux/stat.h>
+#include <linux/init.h>
+#include <linux/file.h>
+#include <linux/string.h>
+
+/*
+ * For hysterical raisins we keep the same inumbers as in the old procfs.
+ * Feel free to change the macro below - just keep the range distinct from
+ * inumbers of the rest of procfs (currently those are in 0x0000--0xffff).
+ * As soon as we'll get a separate superblock we will be able to forget
+ * about magical ranges too.
+ */
+
+#define fake_ino(pid,ino) (((pid)<<16)|(ino))
+
+ssize_t proc_pid_read_maps(struct task_struct*,struct file*,char*,size_t,loff_t*);
+int proc_pid_stat(struct task_struct*,char*);
+int proc_pid_status(struct task_struct*,char*);
+int proc_pid_statm(struct task_struct*,char*);
+int proc_pid_cpu(struct task_struct*,char*);
+
+static int proc_fd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
+{
+	if (inode->u.proc_i.file) {
+		*mnt = mntget(inode->u.proc_i.file->f_vfsmnt);
+		*dentry = dget(inode->u.proc_i.file->f_dentry);
+		return 0;
+	}
+	return -ENOENT;
+}
+
+static int proc_exe_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
+{
+	struct mm_struct * mm;
+	struct vm_area_struct * vma;
+	int result = -ENOENT;
+	struct task_struct *task = inode->u.proc_i.task;
+
+	task_lock(task);
+	mm = task->mm;
+	if (mm)
+		atomic_inc(&mm->mm_users);
+	task_unlock(task);
+	if (!mm)
+		goto out;
+	down_read(&mm->mmap_sem);
+	vma = mm->mmap;
+	while (vma) {
+		if ((vma->vm_flags & VM_EXECUTABLE) && 
+		    vma->vm_file) {
+			*mnt = mntget(vma->vm_file->f_vfsmnt);
+			*dentry = dget(vma->vm_file->f_dentry);
+			result = 0;
+			break;
+		}
+		vma = vma->vm_next;
+	}
+	up_read(&mm->mmap_sem);
+	mmput(mm);
+out:
+	return result;
+}
+
+static int proc_cwd_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
+{
+	struct fs_struct *fs;
+	int result = -ENOENT;
+	task_lock(inode->u.proc_i.task);
+	fs = inode->u.proc_i.task->fs;
+	if(fs)
+		atomic_inc(&fs->count);
+	task_unlock(inode->u.proc_i.task);
+	if (fs) {
+		read_lock(&fs->lock);
+		*mnt = mntget(fs->pwdmnt);
+		*dentry = dget(fs->pwd);
+		read_unlock(&fs->lock);
+		result = 0;
+		put_fs_struct(fs);
+	}
+	return result;
+}
+
+static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vfsmount **mnt)
+{
+	struct fs_struct *fs;
+	int result = -ENOENT;
+	task_lock(inode->u.proc_i.task);
+	fs = inode->u.proc_i.task->fs;
+	if(fs)
+		atomic_inc(&fs->count);
+	task_unlock(inode->u.proc_i.task);
+	if (fs) {
+		read_lock(&fs->lock);
+		*mnt = mntget(fs->rootmnt);
+		*dentry = dget(fs->root);
+		read_unlock(&fs->lock);
+		result = 0;
+		put_fs_struct(fs);
+	}
+	return result;
+}
+
+static int proc_pid_environ(struct task_struct *task, char * buffer)
+{
+	struct mm_struct *mm;
+	int res = 0;
+	task_lock(task);
+	mm = task->mm;
+	if (mm)
+		atomic_inc(&mm->mm_users);
+	task_unlock(task);
+	if (mm) {
+		int len = mm->env_end - mm->env_start;
+		if (len > PAGE_SIZE)
+			len = PAGE_SIZE;
+		res = access_process_vm(task, mm->env_start, buffer, len, 0);
+		mmput(mm);
+	}
+	return res;
+}
+
+static int proc_pid_cmdline(struct task_struct *task, char * buffer)
+{
+	struct mm_struct *mm;
+	int res = 0;
+	task_lock(task);
+	mm = task->mm;
+	if (mm)
+		atomic_inc(&mm->mm_users);
+	task_unlock(task);
+	if (mm) {
+		int len = mm->arg_end - mm->arg_start;
+		if (len > PAGE_SIZE)
+			len = PAGE_SIZE;
+		res = access_process_vm(task, mm->arg_start, buffer, len, 0);
+		// If the nul at the end of args has been overwritten, then
+		// assume application is using setproctitle(3).
+		if ( res > 0 && buffer[res-1] != '\0' )
+		{
+			len = strnlen( buffer, res );
+			if ( len < res )
+			{
+			    res = len;
+			}
+			else
+			{
+				len = mm->env_end - mm->env_start;
+				if (len > PAGE_SIZE - res)
+					len = PAGE_SIZE - res;
+				res += access_process_vm(task, mm->env_start, buffer+res, len, 0);
+				res = strnlen( buffer, res );
+			}
+		}
+		mmput(mm);
+	}
+	return res;
+}
+
+/************************************************************************/
+/*                       Here the fs part begins                        */
+/************************************************************************/
+
+/* permission checks */
+
+static int proc_check_root(struct inode *inode)
+{
+	struct dentry *de, *base, *root;
+	struct vfsmount *our_vfsmnt, *vfsmnt, *mnt;
+	int res = 0;
+
+	if (proc_root_link(inode, &root, &vfsmnt)) /* Ewww... */
+		return -ENOENT;
+	read_lock(&current->fs->lock);
+	our_vfsmnt = mntget(current->fs->rootmnt);
+	base = dget(current->fs->root);
+	read_unlock(&current->fs->lock);
+
+	spin_lock(&dcache_lock);
+	de = root;
+	mnt = vfsmnt;
+
+	while (vfsmnt != our_vfsmnt) {
+		if (vfsmnt == vfsmnt->mnt_parent)
+			goto out;
+		de = vfsmnt->mnt_mountpoint;
+		vfsmnt = vfsmnt->mnt_parent;
+	}
+
+	if (!is_subdir(de, base))
+		goto out;
+	spin_unlock(&dcache_lock);
+
+exit:
+	dput(base);
+	mntput(our_vfsmnt);
+	dput(root);
+	mntput(mnt);
+	return res;
+out:
+	spin_unlock(&dcache_lock);
+	res = -EACCES;
+	goto exit;
+}
+
+static int proc_permission(struct inode *inode, int mask)
+{
+	if (vfs_permission(inode, mask) != 0)
+		return -EACCES;
+	return proc_check_root(inode);
+}
+
+static ssize_t pid_maps_read(struct file * file, char * buf,
+			      size_t count, loff_t *ppos)
+{
+	struct inode * inode = file->f_dentry->d_inode;
+	struct task_struct *task = inode->u.proc_i.task;
+	ssize_t res;
+
+	res = proc_pid_read_maps(task, file, buf, count, ppos);
+	return res;
+}
+
+static struct file_operations proc_maps_operations = {
+	read:		pid_maps_read,
+};
+
+#define PROC_BLOCK_SIZE	(3*1024)		/* 4K page size but our output routines use some slack for overruns */
+
+static ssize_t proc_info_read(struct file * file, char * buf,
+			  size_t count, loff_t *ppos)
+{
+	struct inode * inode = file->f_dentry->d_inode;
+	unsigned long page;
+	ssize_t length;
+	ssize_t end;
+	struct task_struct *task = inode->u.proc_i.task;
+
+	if (count > PROC_BLOCK_SIZE)
+		count = PROC_BLOCK_SIZE;
+	if (!(page = __get_free_page(GFP_KERNEL)))
+		return -ENOMEM;
+
+	length = inode->u.proc_i.op.proc_read(task, (char*)page);
+
+	if (length < 0) {
+		free_page(page);
+		return length;
+	}
+	/* Static 4kB (or whatever) block capacity */
+	if (*ppos >= length) {
+		free_page(page);
+		return 0;
+	}
+	if (count + *ppos > length)
+		count = length - *ppos;
+	end = count + *ppos;
+	copy_to_user(buf, (char *) page + *ppos, count);
+	*ppos = end;
+	free_page(page);
+	return count;
+}
+
+static struct file_operations proc_info_file_operations = {
+	read:		proc_info_read,
+};
+
+#define MAY_PTRACE(p) \
+(p==current||(p->p_pptr==current&&(p->ptrace & PT_PTRACED)&&p->state==TASK_STOPPED))
+
+
+static int mem_open(struct inode* inode, struct file* file)
+{
+	file->private_data = (void*)((long)current->self_exec_id);
+	return 0;
+}
+
+static ssize_t mem_read(struct file * file, char * buf,
+			size_t count, loff_t *ppos)
+{
+	struct task_struct *task = file->f_dentry->d_inode->u.proc_i.task;
+	char *page;
+	unsigned long src = *ppos;
+	int copied = 0;
+	struct mm_struct *mm;
+
+
+	if (!MAY_PTRACE(task))
+		return -ESRCH;
+
+	page = (char *)__get_free_page(GFP_USER);
+	if (!page)
+		return -ENOMEM;
+
+	task_lock(task);
+	mm = task->mm;
+	if (mm)
+		atomic_inc(&mm->mm_users);
+	task_unlock(task);
+	if (!mm)
+		return 0;
+
+	if (file->private_data != (void*)((long)current->self_exec_id) ) {
+		mmput(mm);
+		return -EIO;
+	}
+		
+
+	while (count > 0) {
+		int this_len, retval;
+
+		this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
+		retval = access_process_vm(task, src, page, this_len, 0);
+		if (!retval) {
+			if (!copied)
+				copied = -EIO;
+			break;
+		}
+		if (copy_to_user(buf, page, retval)) {
+			copied = -EFAULT;
+			break;
+		}
+		copied += retval;
+		src += retval;
+		buf += retval;
+		count -= retval;
+	}
+	*ppos = src;
+	mmput(mm);
+	free_page((unsigned long) page);
+	return copied;
+}
+
+#define mem_write NULL
+
+#ifndef mem_write
+/* This is a security hazard */
+static ssize_t mem_write(struct file * file, const char * buf,
+			 size_t count, loff_t *ppos)
+{
+	int copied = 0;
+	char *page;
+	struct task_struct *task = file->f_dentry->d_inode->u.proc_i.task;
+	unsigned long dst = *ppos;
+
+	if (!MAY_PTRACE(task))
+		return -ESRCH;
+
+	page = (char *)__get_free_page(GFP_USER);
+	if (!page)
+		return -ENOMEM;
+
+	while (count > 0) {
+		int this_len, retval;
+
+		this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
+		if (copy_from_user(page, buf, this_len)) {
+			copied = -EFAULT;
+			break;
+		}
+		retval = access_process_vm(task, dst, page, this_len, 1);
+		if (!retval) {
+			if (!copied)
+				copied = -EIO;
+			break;
+		}
+		copied += retval;
+		buf += retval;
+		dst += retval;
+		count -= retval;			
+	}
+	*ppos = dst;
+	free_page((unsigned long) page);
+	return copied;
+}
+#endif
+
+static struct file_operations proc_mem_operations = {
+	read:		mem_read,
+	write:		mem_write,
+	open:		mem_open,
+};
+
+static struct inode_operations proc_mem_inode_operations = {
+	permission:	proc_permission,
+};
+
+static int proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+	struct inode *inode = dentry->d_inode;
+	int error = -EACCES;
+
+	/* We don't need a base pointer in the /proc filesystem */
+	path_release(nd);
+
+	if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE))
+		goto out;
+	error = proc_check_root(inode);
+	if (error)
+		goto out;
+
+	error = inode->u.proc_i.op.proc_get_link(inode, &nd->dentry, &nd->mnt);
+	nd->last_type = LAST_BIND;
+out:
+	return error;
+}
+
+static int do_proc_readlink(struct dentry *dentry, struct vfsmount *mnt,
+			    char * buffer, int buflen)
+{
+	struct inode * inode;
+	char * tmp = (char*)__get_free_page(GFP_KERNEL), *path;
+	int len;
+
+	if (!tmp)
+		return -ENOMEM;
+		
+	inode = dentry->d_inode;
+	path = d_path(dentry, mnt, tmp, PAGE_SIZE);
+	len = tmp + PAGE_SIZE - 1 - path;
+
+	if (len < buflen)
+		buflen = len;
+	copy_to_user(buffer, path, buflen);
+	free_page((unsigned long)tmp);
+	return buflen;
+}
+
+static int proc_pid_readlink(struct dentry * dentry, char * buffer, int buflen)
+{
+	int error = -EACCES;
+	struct inode *inode = dentry->d_inode;
+	struct dentry *de;
+	struct vfsmount *mnt = NULL;
+
+	if (current->fsuid != inode->i_uid && !capable(CAP_DAC_OVERRIDE))
+		goto out;
+	error = proc_check_root(inode);
+	if (error)
+		goto out;
+
+	error = inode->u.proc_i.op.proc_get_link(inode, &de, &mnt);
+	if (error)
+		goto out;
+
+	error = do_proc_readlink(de, mnt, buffer, buflen);
+	dput(de);
+	mntput(mnt);
+out:
+	return error;
+}
+
+static struct inode_operations proc_pid_link_inode_operations = {
+	readlink:	proc_pid_readlink,
+	follow_link:	proc_pid_follow_link
+};
+
+struct pid_entry {
+	int type;
+	int len;
+	char *name;
+	mode_t mode;
+};
+
+enum pid_directory_inos {
+	PROC_PID_INO = 2,
+	PROC_PID_STATUS,
+	PROC_PID_MEM,
+	PROC_PID_CWD,
+	PROC_PID_ROOT,
+	PROC_PID_EXE,
+	PROC_PID_FD,
+	PROC_PID_ENVIRON,
+	PROC_PID_CMDLINE,
+	PROC_PID_STAT,
+	PROC_PID_STATM,
+	PROC_PID_MAPS,
+	PROC_PID_CPU,
+	PROC_PID_FD_DIR = 0x8000,	/* 0x8000-0xffff */
+};
+
+#define E(type,name,mode) {(type),sizeof(name)-1,(name),(mode)}
+static struct pid_entry base_stuff[] = {
+  E(PROC_PID_FD,	"fd",		S_IFDIR|S_IRUSR|S_IXUSR),
+  E(PROC_PID_ENVIRON,	"environ",	S_IFREG|S_IRUSR),
+  E(PROC_PID_STATUS,	"status",	S_IFREG|S_IRUGO),
+  E(PROC_PID_CMDLINE,	"cmdline",	S_IFREG|S_IRUGO),
+  E(PROC_PID_STAT,	"stat",		S_IFREG|S_IRUGO),
+  E(PROC_PID_STATM,	"statm",	S_IFREG|S_IRUGO),
+#ifdef CONFIG_SMP
+  E(PROC_PID_CPU,	"cpu",		S_IFREG|S_IRUGO),
+#endif
+  E(PROC_PID_MAPS,	"maps",		S_IFREG|S_IRUGO),
+  E(PROC_PID_MEM,	"mem",		S_IFREG|S_IRUSR|S_IWUSR),
+  E(PROC_PID_CWD,	"cwd",		S_IFLNK|S_IRWXUGO),
+  E(PROC_PID_ROOT,	"root",		S_IFLNK|S_IRWXUGO),
+  E(PROC_PID_EXE,	"exe",		S_IFLNK|S_IRWXUGO),
+  {0,0,NULL,0}
+};
+#undef E
+
+#define NUMBUF 10
+
+static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir)
+{
+	struct inode *inode = filp->f_dentry->d_inode;
+	struct task_struct *p = inode->u.proc_i.task;
+	unsigned int fd, pid, ino;
+	int retval;
+	char buf[NUMBUF];
+	struct files_struct * files;
+
+	retval = 0;
+	pid = p->pid;
+
+	fd = filp->f_pos;
+	switch (fd) {
+		case 0:
+			if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
+				goto out;
+			filp->f_pos++;
+		case 1:
+			ino = fake_ino(pid, PROC_PID_INO);
+			if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
+				goto out;
+			filp->f_pos++;
+		default:
+			task_lock(p);
+			files = p->files;
+			if (files)
+				atomic_inc(&files->count);
+			task_unlock(p);
+			if (!files)
+				goto out;
+			for (fd = filp->f_pos-2;
+			     fd < files->max_fds;
+			     fd++, filp->f_pos++) {
+				unsigned int i,j;
+
+				if (!fcheck_files(files, fd))
+					continue;
+
+				j = NUMBUF;
+				i = fd;
+				do {
+					j--;
+					buf[j] = '0' + (i % 10);
+					i /= 10;
+				} while (i);
+
+				ino = fake_ino(pid, PROC_PID_FD_DIR + fd);
+				if (filldir(dirent, buf+j, NUMBUF-j, fd+2, ino, DT_LNK) < 0)
+					break;
+			}
+			put_files_struct(files);
+	}
+out:
+	return retval;
+}
+
+static int proc_base_readdir(struct file * filp,
+	void * dirent, filldir_t filldir)
+{
+	int i;
+	int pid;
+	struct inode *inode = filp->f_dentry->d_inode;
+	struct pid_entry *p;
+
+	pid = inode->u.proc_i.task->pid;
+	if (!pid)
+		return -ENOENT;
+	i = filp->f_pos;
+	switch (i) {
+		case 0:
+			if (filldir(dirent, ".", 1, i, inode->i_ino, DT_DIR) < 0)
+				return 0;
+			i++;
+			filp->f_pos++;
+			/* fall through */
+		case 1:
+			if (filldir(dirent, "..", 2, i, PROC_ROOT_INO, DT_DIR) < 0)
+				return 0;
+			i++;
+			filp->f_pos++;
+			/* fall through */
+		default:
+			i -= 2;
+			if (i>=sizeof(base_stuff)/sizeof(base_stuff[0]))
+				return 1;
+			p = base_stuff + i;
+			while (p->name) {
+				if (filldir(dirent, p->name, p->len, filp->f_pos,
+					    fake_ino(pid, p->type), p->mode >> 12) < 0)
+					return 0;
+				filp->f_pos++;
+				p++;
+			}
+	}
+	return 1;
+}
+
+/* building an inode */
+
+static int task_dumpable(struct task_struct *task)
+{
+	int dumpable = 0;
+	struct mm_struct *mm;
+
+	task_lock(task);
+	mm = task->mm;
+	if (mm)
+		dumpable = mm->dumpable;
+	task_unlock(task);
+	return dumpable;
+}
+
+
+static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task, int ino)
+{
+	struct inode * inode;
+
+	/* We need a new inode */
+	
+	inode = new_inode(sb);
+	if (!inode)
+		goto out;
+
+	/* Common stuff */
+
+	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+	inode->i_ino = fake_ino(task->pid, ino);
+
+	if (!task->pid)
+		goto out_unlock;
+
+	/*
+	 * grab the reference to task.
+	 */
+	get_task_struct(task);
+	inode->u.proc_i.task = task;
+	inode->i_uid = 0;
+	inode->i_gid = 0;
+	if (ino == PROC_PID_INO || task_dumpable(task)) {
+		inode->i_uid = task->euid;
+		inode->i_gid = task->egid;
+	}
+
+out:
+	return inode;
+
+out_unlock:
+	iput(inode);
+	return NULL;
+}
+
+/* dentry stuff */
+
+static int pid_fd_revalidate(struct dentry * dentry, int flags)
+{
+	return 0;
+}
+
+/*
+ *	Exceptional case: normally we are not allowed to unhash a busy
+ * directory. In this case, however, we can do it - no aliasing problems
+ * due to the way we treat inodes.
+ */
+static int pid_base_revalidate(struct dentry * dentry, int flags)
+{
+	if (dentry->d_inode->u.proc_i.task->pid)
+		return 1;
+	d_drop(dentry);
+	return 0;
+}
+
+static int pid_delete_dentry(struct dentry * dentry)
+{
+	return 1;
+}
+
+static struct dentry_operations pid_fd_dentry_operations =
+{
+	d_revalidate:	pid_fd_revalidate,
+	d_delete:	pid_delete_dentry,
+};
+
+static struct dentry_operations pid_dentry_operations =
+{
+	d_delete:	pid_delete_dentry,
+};
+
+static struct dentry_operations pid_base_dentry_operations =
+{
+	d_revalidate:	pid_base_revalidate,
+	d_delete:	pid_delete_dentry,
+};
+
+/* Lookups */
+#define MAX_MULBY10	((~0U-9)/10)
+
+static struct dentry *proc_lookupfd(struct inode * dir, struct dentry * dentry)
+{
+	unsigned int fd, c;
+	struct task_struct *task = dir->u.proc_i.task;
+	struct file * file;
+	struct files_struct * files;
+	struct inode *inode;
+	const char *name;
+	int len;
+
+	fd = 0;
+	len = dentry->d_name.len;
+	name = dentry->d_name.name;
+	if (len > 1 && *name == '0') goto out;
+	while (len-- > 0) {
+		c = *name - '0';
+		name++;
+		if (c > 9)
+			goto out;
+		if (fd >= MAX_MULBY10)
+			goto out;
+		fd *= 10;
+		fd += c;
+	}
+
+	inode = proc_pid_make_inode(dir->i_sb, task, PROC_PID_FD_DIR+fd);
+	if (!inode)
+		goto out;
+	task_lock(task);
+	files = task->files;
+	if (files)
+		atomic_inc(&files->count);
+	task_unlock(task);
+	if (!files)
+		goto out_unlock;
+	read_lock(&files->file_lock);
+	file = inode->u.proc_i.file = fcheck_files(files, fd);
+	if (!file)
+		goto out_unlock2;
+	get_file(file);
+	read_unlock(&files->file_lock);
+	put_files_struct(files);
+	inode->i_op = &proc_pid_link_inode_operations;
+	inode->i_size = 64;
+	inode->i_mode = S_IFLNK;
+	inode->u.proc_i.op.proc_get_link = proc_fd_link;
+	if (file->f_mode & 1)
+		inode->i_mode |= S_IRUSR | S_IXUSR;
+	if (file->f_mode & 2)
+		inode->i_mode |= S_IWUSR | S_IXUSR;
+	dentry->d_op = &pid_fd_dentry_operations;
+	d_add(dentry, inode);
+	return NULL;
+
+out_unlock2:
+	put_files_struct(files);
+	read_unlock(&files->file_lock);
+out_unlock:
+	iput(inode);
+out:
+	return ERR_PTR(-ENOENT);
+}
+
+static struct file_operations proc_fd_operations = {
+	read:		generic_read_dir,
+	readdir:	proc_readfd,
+};
+
+/*
+ * proc directories can do almost nothing..
+ */
+static struct inode_operations proc_fd_inode_operations = {
+	lookup:		proc_lookupfd,
+	permission:	proc_permission,
+};
+
+static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry)
+{
+	struct inode *inode;
+	int error;
+	struct task_struct *task = dir->u.proc_i.task;
+	struct pid_entry *p;
+
+	error = -ENOENT;
+	inode = NULL;
+
+	for (p = base_stuff; p->name; p++) {
+		if (p->len != dentry->d_name.len)
+			continue;
+		if (!memcmp(dentry->d_name.name, p->name, p->len))
+			break;
+	}
+	if (!p->name)
+		goto out;
+
+	error = -EINVAL;
+	inode = proc_pid_make_inode(dir->i_sb, task, p->type);
+	if (!inode)
+		goto out;
+
+	inode->i_mode = p->mode;
+	/*
+	 * Yes, it does not scale. And it should not. Don't add
+	 * new entries into /proc/<pid>/ without very good reasons.
+	 */
+	switch(p->type) {
+		case PROC_PID_FD:
+			inode->i_nlink = 2;
+			inode->i_op = &proc_fd_inode_operations;
+			inode->i_fop = &proc_fd_operations;
+			break;
+		case PROC_PID_EXE:
+			inode->i_op = &proc_pid_link_inode_operations;
+			inode->u.proc_i.op.proc_get_link = proc_exe_link;
+			break;
+		case PROC_PID_CWD:
+			inode->i_op = &proc_pid_link_inode_operations;
+			inode->u.proc_i.op.proc_get_link = proc_cwd_link;
+			break;
+		case PROC_PID_ROOT:
+			inode->i_op = &proc_pid_link_inode_operations;
+			inode->u.proc_i.op.proc_get_link = proc_root_link;
+			break;
+		case PROC_PID_ENVIRON:
+			inode->i_fop = &proc_info_file_operations;
+			inode->u.proc_i.op.proc_read = proc_pid_environ;
+			break;
+		case PROC_PID_STATUS:
+			inode->i_fop = &proc_info_file_operations;
+			inode->u.proc_i.op.proc_read = proc_pid_status;
+			break;
+		case PROC_PID_STAT:
+			inode->i_fop = &proc_info_file_operations;
+			inode->u.proc_i.op.proc_read = proc_pid_stat;
+			break;
+		case PROC_PID_CMDLINE:
+			inode->i_fop = &proc_info_file_operations;
+			inode->u.proc_i.op.proc_read = proc_pid_cmdline;
+			break;
+		case PROC_PID_STATM:
+			inode->i_fop = &proc_info_file_operations;
+			inode->u.proc_i.op.proc_read = proc_pid_statm;
+			break;
+		case PROC_PID_MAPS:
+			inode->i_fop = &proc_maps_operations;
+			break;
+#ifdef CONFIG_SMP
+		case PROC_PID_CPU:
+			inode->i_fop = &proc_info_file_operations;
+			inode->u.proc_i.op.proc_read = proc_pid_cpu;
+			break;
+#endif
+		case PROC_PID_MEM:
+			inode->i_op = &proc_mem_inode_operations;
+			inode->i_fop = &proc_mem_operations;
+			break;
+		default:
+			printk("procfs: impossible type (%d)",p->type);
+			iput(inode);
+			return ERR_PTR(-EINVAL);
+	}
+	dentry->d_op = &pid_dentry_operations;
+	d_add(dentry, inode);
+	return NULL;
+
+out:
+	return ERR_PTR(error);
+}
+
+static struct file_operations proc_base_operations = {
+	read:		generic_read_dir,
+	readdir:	proc_base_readdir,
+};
+
+static struct inode_operations proc_base_inode_operations = {
+	lookup:		proc_base_lookup,
+};
+
+/*
+ * /proc/self:
+ */
+static int proc_self_readlink(struct dentry *dentry, char *buffer, int buflen)
+{
+	char tmp[30];
+	sprintf(tmp, "%d", current->pid);
+	return vfs_readlink(dentry,buffer,buflen,tmp);
+}
+
+static int proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+	char tmp[30];
+	sprintf(tmp, "%d", current->pid);
+	return vfs_follow_link(nd,tmp);
+}	
+
+static struct inode_operations proc_self_inode_operations = {
+	readlink:	proc_self_readlink,
+	follow_link:	proc_self_follow_link,
+};
+
+struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry)
+{
+	unsigned int pid, c;
+	struct task_struct *task;
+	const char *name;
+	struct inode *inode;
+	int len;
+
+	pid = 0;
+	name = dentry->d_name.name;
+	len = dentry->d_name.len;
+	if (len == 4 && !memcmp(name, "self", 4)) {
+		inode = new_inode(dir->i_sb);
+		if (!inode)
+			return ERR_PTR(-ENOMEM);
+		inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+		inode->i_ino = fake_ino(0, PROC_PID_INO);
+		inode->u.proc_i.file = NULL;
+		inode->u.proc_i.task = NULL;
+		inode->i_mode = S_IFLNK|S_IRWXUGO;
+		inode->i_uid = inode->i_gid = 0;
+		inode->i_size = 64;
+		inode->i_op = &proc_self_inode_operations;
+		d_add(dentry, inode);
+		return NULL;
+	}
+	while (len-- > 0) {
+		c = *name - '0';
+		name++;
+		if (c > 9)
+			goto out;
+		if (pid >= MAX_MULBY10)
+			goto out;
+		pid *= 10;
+		pid += c;
+		if (!pid)
+			goto out;
+	}
+
+	read_lock(&tasklist_lock);
+	task = find_task_by_pid(pid);
+	if (task)
+		get_task_struct(task);
+	read_unlock(&tasklist_lock);
+	if (!task)
+		goto out;
+
+	inode = proc_pid_make_inode(dir->i_sb, task, PROC_PID_INO);
+
+	free_task_struct(task);
+
+	if (!inode)
+		goto out;
+	inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
+	inode->i_op = &proc_base_inode_operations;
+	inode->i_fop = &proc_base_operations;
+	inode->i_nlink = 3;
+	inode->i_flags|=S_IMMUTABLE_FILE;
+
+	dentry->d_op = &pid_base_dentry_operations;
+	d_add(dentry, inode);
+	return NULL;
+out:
+	return ERR_PTR(-ENOENT);
+}
+
+void proc_pid_delete_inode(struct inode *inode)
+{
+	if (inode->u.proc_i.file)
+		fput(inode->u.proc_i.file);
+	if (inode->u.proc_i.task)
+		free_task_struct(inode->u.proc_i.task);
+}
+
+#define PROC_NUMBUF 10
+#define PROC_MAXPIDS 20
+
+/*
+ * Get a few pid's to return for filldir - we need to hold the
+ * tasklist lock while doing this, and we must release it before
+ * we actually do the filldir itself, so we use a temp buffer..
+ */
+static int get_pid_list(int index, unsigned int *pids)
+{
+	struct task_struct *p;
+	int nr_pids = 0;
+
+	index--;
+	read_lock(&tasklist_lock);
+	for_each_task(p) {
+		int pid = p->pid;
+		if (!pid)
+			continue;
+		if (--index >= 0)
+			continue;
+		pids[nr_pids] = pid;
+		nr_pids++;
+		if (nr_pids >= PROC_MAXPIDS)
+			break;
+	}
+	read_unlock(&tasklist_lock);
+	return nr_pids;
+}
+
+int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
+{
+	unsigned int pid_array[PROC_MAXPIDS];
+	char buf[PROC_NUMBUF];
+	unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY;
+	unsigned int nr_pids, i;
+
+	if (!nr) {
+		ino_t ino = fake_ino(0,PROC_PID_INO);
+		if (filldir(dirent, "self", 4, filp->f_pos, ino, DT_LNK) < 0)
+			return 0;
+		filp->f_pos++;
+		nr++;
+	}
+
+	nr_pids = get_pid_list(nr, pid_array);
+
+	for (i = 0; i < nr_pids; i++) {
+		int pid = pid_array[i];
+		ino_t ino = fake_ino(pid,PROC_PID_INO);
+		unsigned long j = PROC_NUMBUF;
+
+		do buf[--j] = '0' + (pid % 10); while (pid/=10);
+
+		if (filldir(dirent, buf+j, PROC_NUMBUF-j, filp->f_pos, ino, DT_DIR) < 0)
+			break;
+		filp->f_pos++;
+	}
+	return 0;
+}
diff -urN linux-2.4.16-reiserfspatches-immutable/fs/udf/inode.c~ linux-2.4.16-reiserfspatches-immutable-ctx4/fs/udf/inode.c~
--- linux-2.4.16-reiserfspatches-immutable/fs/udf/inode.c~	Thu Jan  1 01:00:00 1970
+++ linux-2.4.16-reiserfspatches-immutable-ctx4/fs/udf/inode.c~	Mon Dec 10 14:28:03 2001
@@ -0,0 +1,2220 @@
+/*
+ * inode.c
+ *
+ * PURPOSE
+ *  Inode handling routines for the OSTA-UDF(tm) filesystem.
+ *
+ * CONTACTS
+ *  E-mail regarding any portion of the Linux UDF file system should be
+ *  directed to the development team mailing list (run by majordomo):
+ *    linux_udf@hpesjro.fc.hp.com
+ *
+ * COPYRIGHT
+ *  This file is distributed under the terms of the GNU General Public
+ *  License (GPL). Copies of the GPL can be obtained from:
+ *    ftp://prep.ai.mit.edu/pub/gnu/GPL
+ *  Each contributing author retains all rights to their own work.
+ *
+ *  (C) 1998 Dave Boynton
+ *  (C) 1998-2000 Ben Fennema
+ *  (C) 1999-2000 Stelias Computing Inc
+ *
+ * HISTORY
+ *
+ *  10/04/98 dgb  Added rudimentary directory functions
+ *  10/07/98      Fully working udf_block_map! It works!
+ *  11/25/98      bmap altered to better support extents
+ *  12/06/98 blf  partition support in udf_iget, udf_block_map and udf_read_inode
+ *  12/12/98      rewrote udf_block_map to handle next extents and descs across
+ *                block boundaries (which is not actually allowed)
+ *  12/20/98      added support for strategy 4096
+ *  03/07/99      rewrote udf_block_map (again)
+ *                New funcs, inode_bmap, udf_next_aext
+ *  04/19/99      Support for writing device EA's for major/minor #
+ */
+
+#include "udfdecl.h"
+#include <linux/locks.h>
+#include <linux/mm.h>
+#include <linux/smp_lock.h>
+#include <linux/module.h>
+
+#include "udf_i.h"
+#include "udf_sb.h"
+
+MODULE_AUTHOR("Ben Fennema");
+MODULE_DESCRIPTION("Universal Disk Format Filesystem");
+MODULE_LICENSE("GPL");
+
+#define EXTENT_MERGE_SIZE 5
+
+static mode_t udf_convert_permissions(struct FileEntry *);
+static int udf_update_inode(struct inode *, int);
+static void udf_fill_inode(struct inode *, struct buffer_head *);
+static struct buffer_head *inode_getblk(struct inode *, long, int *, long *, int *);
+static void udf_split_extents(struct inode *, int *, int, int,
+	long_ad [EXTENT_MERGE_SIZE], int *);
+static void udf_prealloc_extents(struct inode *, int, int,
+	 long_ad [EXTENT_MERGE_SIZE], int *);
+static void udf_merge_extents(struct inode *,
+	 long_ad [EXTENT_MERGE_SIZE], int *);
+static void udf_update_extents(struct inode *,
+	long_ad [EXTENT_MERGE_SIZE], int, int,
+	lb_addr, Uint32, struct buffer_head **);
+static int udf_get_block(struct inode *, long, struct buffer_head *, int);
+
+/*
+ * udf_put_inode
+ *
+ * PURPOSE
+ *
+ * DESCRIPTION
+ *	This routine is called whenever the kernel no longer needs the inode.
+ *
+ * HISTORY
+ *	July 1, 1997 - Andrew E. Mileski
+ *	Written, tested, and released.
+ *
+ *  Called at each iput()
+ */
+void udf_put_inode(struct inode * inode)
+{
+	if (!(inode->i_sb->s_flags & MS_RDONLY))
+	{
+		lock_kernel();
+		udf_discard_prealloc(inode);
+		/* write the root inode on put, if dirty */
+		if (!inode->i_sb->s_root && inode->i_state & I_DIRTY)
+			udf_update_inode(inode, IS_SYNC(inode));
+		unlock_kernel();
+	}
+}
+
+/*
+ * udf_delete_inode
+ *
+ * PURPOSE
+ *	Clean-up before the specified inode is destroyed.
+ *
+ * DESCRIPTION
+ *	This routine is called when the kernel destroys an inode structure
+ *	ie. when iput() finds i_count == 0.
+ *
+ * HISTORY
+ *	July 1, 1997 - Andrew E. Mileski
+ *	Written, tested, and released.
+ *
+ *  Called at the last iput() if i_nlink is zero.
+ */
+void udf_delete_inode(struct inode * inode)
+{
+	lock_kernel();
+
+	if (is_bad_inode(inode))
+		goto no_delete;
+
+	inode->i_size = 0;
+	udf_truncate(inode);
+	udf_update_inode(inode, IS_SYNC(inode));
+	udf_free_inode(inode);
+
+	unlock_kernel();
+	return;
+no_delete:
+	unlock_kernel();
+	clear_inode(inode);
+}
+
+void udf_discard_prealloc(struct inode * inode)
+{
+	if (inode->i_size && inode->i_size != UDF_I_LENEXTENTS(inode) &&
+		UDF_I_ALLOCTYPE(inode) != ICB_FLAG_AD_IN_ICB)
+	{
+		udf_truncate_extents(inode);
+	}
+}
+
+static int udf_writepage(struct page *page)
+{
+	return block_write_full_page(page, udf_get_block);
+}
+
+static int udf_readpage(struct file *file, struct page *page)
+{
+	return block_read_full_page(page, udf_get_block);
+}
+
+static int udf_prepare_write(struct file *file, struct page *page, unsigned from, unsigned to)
+{
+	return block_prepare_write(page, from, to, udf_get_block);
+}
+
+static int udf_bmap(struct address_space *mapping, long block)
+{
+	return generic_block_bmap(mapping,block,udf_get_block);
+}
+
+struct address_space_operations udf_aops = {
+	readpage:		udf_readpage,
+	writepage:		udf_writepage,
+	sync_page:		block_sync_page,
+	prepare_write:		udf_prepare_write,
+	commit_write:		generic_commit_write,
+	bmap:			udf_bmap,
+};
+
+void udf_expand_file_adinicb(struct inode * inode, int newsize, int * err)
+{
+	struct buffer_head *bh = NULL;
+	struct page *page;
+	char *kaddr;
+	int block;
+
+	/* from now on we have normal address_space methods */
+	inode->i_data.a_ops = &udf_aops;
+
+	if (!UDF_I_LENALLOC(inode))
+	{
+		if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD))
+			UDF_I_ALLOCTYPE(inode) = ICB_FLAG_AD_SHORT;
+		else
+			UDF_I_ALLOCTYPE(inode) = ICB_FLAG_AD_LONG;
+		mark_inode_dirty(inode);
+		return;
+	}
+
+	block = udf_get_lb_pblock(inode->i_sb, UDF_I_LOCATION(inode), 0);
+	bh = udf_tread(inode->i_sb, block, inode->i_sb->s_blocksize);
+	if (!bh)
+		return;
+	page = grab_cache_page(inode->i_mapping, 0);
+	if (!PageLocked(page))
+		PAGE_BUG(page);
+	if (!Page_Uptodate(page))
+	{
+		kaddr = kmap(page);
+		memset(kaddr + UDF_I_LENALLOC(inode), 0x00,
+			PAGE_CACHE_SIZE - UDF_I_LENALLOC(inode));
+		memcpy(kaddr, bh->b_data + udf_file_entry_alloc_offset(inode),
+			UDF_I_LENALLOC(inode));
+		flush_dcache_page(page);
+		SetPageUptodate(page);
+		kunmap(page);
+	}
+	memset(bh->b_data + udf_file_entry_alloc_offset(inode),
+		0, UDF_I_LENALLOC(inode));
+	UDF_I_LENALLOC(inode) = 0;
+	if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD))
+		UDF_I_ALLOCTYPE(inode) = ICB_FLAG_AD_SHORT;
+	else
+		UDF_I_ALLOCTYPE(inode) = ICB_FLAG_AD_LONG;
+	mark_buffer_dirty_inode(bh, inode);
+	udf_release_data(bh);
+
+	inode->i_data.a_ops->writepage(page);
+	page_cache_release(page);
+
+	mark_inode_dirty(inode);
+	inode->i_version ++;
+}
+
+struct buffer_head * udf_expand_dir_adinicb(struct inode *inode, int *block, int *err)
+{
+	int newblock;
+	struct buffer_head *sbh = NULL, *dbh = NULL;
+	lb_addr bloc, eloc;
+	Uint32 elen, extoffset;
+
+	struct udf_fileident_bh sfibh, dfibh;
+	loff_t f_pos = udf_ext0_offset(inode) >> 2;
+	int size = (udf_ext0_offset(inode) + inode->i_size) >> 2;
+	struct FileIdentDesc cfi, *sfi, *dfi;
+
+	if (!inode->i_size)
+	{
+		if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD))
+			UDF_I_ALLOCTYPE(inode) = ICB_FLAG_AD_SHORT;
+		else
+			UDF_I_ALLOCTYPE(inode) = ICB_FLAG_AD_LONG;
+		mark_inode_dirty(inode);
+		return NULL;
+	}
+
+	/* alloc block, and copy data to it */
+	*block = udf_new_block(inode->i_sb, inode,
+		UDF_I_LOCATION(inode).partitionReferenceNum,
+		UDF_I_LOCATION(inode).logicalBlockNum, err);
+
+	if (!(*block))
+		return NULL;
+	newblock = udf_get_pblock(inode->i_sb, *block,
+		UDF_I_LOCATION(inode).partitionReferenceNum, 0);
+	if (!newblock)
+		return NULL;
+	sbh = udf_tread(inode->i_sb, inode->i_ino, inode->i_sb->s_blocksize);
+	if (!sbh)
+		return NULL;
+	dbh = udf_tgetblk(inode->i_sb, newblock, inode->i_sb->s_blocksize);
+	if (!dbh)
+		return NULL;
+	lock_buffer(dbh);
+	memset(dbh->b_data, 0x00, inode->i_sb->s_blocksize);
+	mark_buffer_uptodate(dbh, 1);
+	unlock_buffer(dbh);
+	mark_buffer_dirty_inode(dbh, inode);
+
+	sfibh.soffset = sfibh.eoffset = (f_pos & ((inode->i_sb->s_blocksize - 1) >> 2)) << 2;
+	sfibh.sbh = sfibh.ebh = sbh;
+	dfibh.soffset = dfibh.eoffset = 0;
+	dfibh.sbh = dfibh.ebh = dbh;
+	while ( (f_pos < size) )
+	{
+		sfi = udf_fileident_read(inode, &f_pos, &sfibh, &cfi, NULL, NULL, NULL, NULL, NULL, NULL);
+		if (!sfi)
+		{
+			udf_release_data(sbh);
+			udf_release_data(dbh);
+			return NULL;
+		}
+		sfi->descTag.tagLocation = *block;
+		dfibh.soffset = dfibh.eoffset;
+		dfibh.eoffset += (sfibh.eoffset - sfibh.soffset);
+		dfi = (struct FileIdentDesc *)(dbh->b_data + dfibh.soffset);
+		if (udf_write_fi(inode, sfi, dfi, &dfibh, sfi->impUse,
+			sfi->fileIdent + sfi->lengthOfImpUse))
+		{
+			udf_release_data(sbh);
+			udf_release_data(dbh);
+			return NULL;
+		}
+	}
+	mark_buffer_dirty_inode(dbh, inode);
+
+	memset(sbh->b_data + udf_file_entry_alloc_offset(inode),
+		0, UDF_I_LENALLOC(inode));
+
+	UDF_I_LENALLOC(inode) = 0;
+	if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD))
+		UDF_I_ALLOCTYPE(inode) = ICB_FLAG_AD_SHORT;
+	else
+		UDF_I_ALLOCTYPE(inode) = ICB_FLAG_AD_LONG;
+	bloc = UDF_I_LOCATION(inode);
+	eloc.logicalBlockNum = *block;
+	eloc.partitionReferenceNum = UDF_I_LOCATION(inode).partitionReferenceNum;
+	elen = inode->i_size;
+	UDF_I_LENEXTENTS(inode) = elen;
+	extoffset = udf_file_entry_alloc_offset(inode);
+	udf_add_aext(inode, &bloc, &extoffset, eloc, elen, &sbh, 0);
+	/* UniqueID stuff */
+
+	mark_buffer_dirty(sbh);
+	udf_release_data(sbh);
+	mark_inode_dirty(inode);
+	inode->i_version ++;
+	return dbh;
+}
+
+static int udf_get_block(struct inode *inode, long block, struct buffer_head *bh_result, int create)
+{
+	int err, new;
+	struct buffer_head *bh;
+	unsigned long phys;
+
+	if (!create)
+	{
+		phys = udf_block_map(inode, block);
+		if (phys)
+		{
+			bh_result->b_dev = inode->i_dev;
+			bh_result->b_blocknr = phys;
+			bh_result->b_state |= (1UL << BH_Mapped);
+		}
+		return 0;
+	}
+
+	err = -EIO;
+	new = 0;
+	bh = NULL;
+
+	lock_kernel();
+
+	if (block < 0)
+		goto abort_negative;
+
+	if (block == UDF_I_NEXT_ALLOC_BLOCK(inode) + 1)
+	{
+		UDF_I_NEXT_ALLOC_BLOCK(inode) ++;
+		UDF_I_NEXT_ALLOC_GOAL(inode) ++;
+	}
+
+	err = 0;
+
+	bh = inode_getblk(inode, block, &err, &phys, &new);
+	if (bh)
+		BUG();
+	if (err)
+		goto abort;
+	if (!phys)
+		BUG();
+
+	bh_result->b_dev = inode->i_dev;
+	bh_result->b_blocknr = phys;
+	bh_result->b_state |= (1UL << BH_Mapped);
+	if (new)
+		bh_result->b_state |= (1UL << BH_New);
+abort:
+	unlock_kernel();
+	return err;
+
+abort_negative:
+	udf_warning(inode->i_sb, "udf_get_block", "block < 0");
+	goto abort;
+}
+
+struct buffer_head * udf_getblk(struct inode * inode, long block,
+	int create, int * err)
+{
+	struct buffer_head dummy;
+
+	dummy.b_state = 0;
+	dummy.b_blocknr = -1000;
+	*err = udf_get_block(inode, block, &dummy, create);
+	if (!*err && buffer_mapped(&dummy))
+	{
+		struct buffer_head *bh;
+		bh = getblk(dummy.b_dev, dummy.b_blocknr, inode->i_sb->s_blocksize);
+		if (buffer_new(&dummy))
+		{
+			lock_buffer(bh);
+			memset(bh->b_data, 0x00, inode->i_sb->s_blocksize);
+			mark_buffer_uptodate(bh, 1);
+			unlock_buffer(bh);
+			mark_buffer_dirty_inode(bh, inode);
+		}
+		return bh;
+	}
+	return NULL;
+}
+
+static struct buffer_head * inode_getblk(struct inode * inode, long block,
+	int *err, long *phys, int *new)
+{
+	struct buffer_head *pbh = NULL, *cbh = NULL, *nbh = NULL, *result = NULL;
+	long_ad laarr[EXTENT_MERGE_SIZE];
+	Uint32 pextoffset = 0, cextoffset = 0, nextoffset = 0;
+	int count = 0, startnum = 0, endnum = 0;
+	Uint32 elen = 0;
+	lb_addr eloc, pbloc, cbloc, nbloc;
+	int c = 1;
+	Uint64 lbcount = 0, b_off = 0;
+	Uint32 newblocknum, newblock, offset = 0;
+	Sint8 etype;
+	int goal = 0, pgoal = UDF_I_LOCATION(inode).logicalBlockNum;
+	char lastblock = 0;
+
+	pextoffset = cextoffset = nextoffset = udf_file_entry_alloc_offset(inode);
+	b_off = (Uint64)block << inode->i_sb->s_blocksize_bits;
+	pbloc = cbloc = nbloc = UDF_I_LOCATION(inode);
+
+	/* find the extent which contains the block we are looking for.
+       alternate between laarr[0] and laarr[1] for locations of the
+       current extent, and the previous extent */
+	do
+	{
+		if (pbh != cbh)
+		{
+			udf_release_data(pbh);
+			atomic_inc(&cbh->b_count);
+			pbh = cbh;
+		}
+		if (cbh != nbh)
+		{
+			udf_release_data(cbh);
+			atomic_inc(&nbh->b_count);
+			cbh = nbh;
+		}
+
+		lbcount += elen;
+
+		pbloc = cbloc;
+		cbloc = nbloc;
+
+		pextoffset = cextoffset;
+		cextoffset = nextoffset;
+
+		if ((etype = udf_next_aext(inode, &nbloc, &nextoffset, &eloc, &elen, &nbh, 1)) == -1)
+			break;
+
+		c = !c;
+
+		laarr[c].extLength = (etype << 30) | elen;
+		laarr[c].extLocation = eloc;
+
+		if (etype != EXTENT_NOT_RECORDED_NOT_ALLOCATED)
+			pgoal = eloc.logicalBlockNum +
+				((elen + inode->i_sb->s_blocksize - 1) >>
+				inode->i_sb->s_blocksize_bits);
+
+		count ++;
+	} while (lbcount + elen <= b_off);
+
+	b_off -= lbcount;
+	offset = b_off >> inode->i_sb->s_blocksize_bits;
+
+	/* if the extent is allocated and recorded, return the block
+       if the extent is not a multiple of the blocksize, round up */
+
+	if (etype == EXTENT_RECORDED_ALLOCATED)
+	{
+		if (elen & (inode->i_sb->s_blocksize - 1))
+		{
+			elen = (EXTENT_RECORDED_ALLOCATED << 30) |
+				((elen + inode->i_sb->s_blocksize - 1) &
+				~(inode->i_sb->s_blocksize - 1));
+			etype = udf_write_aext(inode, nbloc, &cextoffset, eloc, elen, nbh, 1);
+		}
+		udf_release_data(pbh);
+		udf_release_data(cbh);
+		udf_release_data(nbh);
+		newblock = udf_get_lb_pblock(inode->i_sb, eloc, offset);
+		*phys = newblock;
+		return NULL;
+	}
+
+	if (etype == -1)
+	{
+		endnum = startnum = ((count > 1) ? 1 : count);
+		if (laarr[c].extLength & (inode->i_sb->s_blocksize - 1))
+		{
+			laarr[c].extLength =
+				(laarr[c].extLength & UDF_EXTENT_FLAG_MASK) |
+				(((laarr[c].extLength & UDF_EXTENT_LENGTH_MASK) +
+					inode->i_sb->s_blocksize - 1) &
+				~(inode->i_sb->s_blocksize - 1));
+			UDF_I_LENEXTENTS(inode) =
+				(UDF_I_LENEXTENTS(inode) + inode->i_sb->s_blocksize - 1) &
+					~(inode->i_sb->s_blocksize - 1);
+		}
+		c = !c;
+		laarr[c].extLength = (EXTENT_NOT_RECORDED_NOT_ALLOCATED << 30) |
+			((offset + 1) << inode->i_sb->s_blocksize_bits);
+		memset(&laarr[c].extLocation, 0x00, sizeof(lb_addr));
+		count ++;
+		endnum ++;
+		lastblock = 1;
+	}
+	else
+		endnum = startnum = ((count > 2) ? 2 : count);
+
+	/* if the current extent is in position 0, swap it with the previous */
+	if (!c && count != 1)
+	{
+		laarr[2] = laarr[0];
+		laarr[0] = laarr[1];
+		laarr[1] = laarr[2];
+		c = 1;
+	}
+
+	/* if the current block is located in a extent, read the next extent */
+	if (etype != -1)
+	{
+		if ((etype = udf_next_aext(inode, &nbloc, &nextoffset, &eloc, &elen, &nbh, 0)) != -1)
+		{
+			laarr[c+1].extLength = (etype << 30) | elen;
+			laarr[c+1].extLocation = eloc;
+			count ++;
+			startnum ++;
+			endnum ++;
+		}
+		else
+			lastblock = 1;
+	}
+	udf_release_data(nbh);
+	if (!pbh)
+		pbh = cbh;
+	else
+		udf_release_data(cbh);
+
+	/* if the current extent is not recorded but allocated, get the
+		block in the extent corresponding to the requested block */
+	if ((laarr[c].extLength >> 30) == EXTENT_NOT_RECORDED_ALLOCATED)
+		newblocknum = laarr[c].extLocation.logicalBlockNum + offset;
+	else /* otherwise, allocate a new block */
+	{
+		if (UDF_I_NEXT_ALLOC_BLOCK(inode) == block)
+			goal = UDF_I_NEXT_ALLOC_GOAL(inode);
+
+		if (!goal)
+		{
+			if (!(goal = pgoal))
+				goal = UDF_I_LOCATION(inode).logicalBlockNum + 1;
+		}
+
+		if (!(newblocknum = udf_new_block(inode->i_sb, inode,
+			UDF_I_LOCATION(inode).partitionReferenceNum, goal, err)))
+		{
+			udf_release_data(pbh);
+			*err = -ENOSPC;
+			return NULL;
+		}
+		UDF_I_LENEXTENTS(inode) += inode->i_sb->s_blocksize;
+	}
+
+	/* if the extent the requsted block is located in contains multiple blocks,
+       split the extent into at most three extents. blocks prior to requested
+       block, requested block, and blocks after requested block */
+	udf_split_extents(inode, &c, offset, newblocknum, laarr, &endnum);
+
+#ifdef UDF_PREALLOCATE
+	/* preallocate blocks */
+	udf_prealloc_extents(inode, c, lastblock, laarr, &endnum);
+#endif
+
+	/* merge any continuous blocks in laarr */
+	udf_merge_extents(inode, laarr, &endnum);
+
+	/* write back the new extents, inserting new extents if the new number
+       of extents is greater than the old number, and deleting extents if
+       the new number of extents is less than the old number */
+	udf_update_extents(inode, laarr, startnum, endnum, pbloc, pextoffset, &pbh);
+
+	udf_release_data(pbh);
+
+	if (!(newblock = udf_get_pblock(inode->i_sb, newblocknum,
+		UDF_I_LOCATION(inode).partitionReferenceNum, 0)))
+	{
+		return NULL;
+	}
+	*phys = newblock;
+	*err = 0;
+	*new = 1;
+	UDF_I_NEXT_ALLOC_BLOCK(inode) = block;
+	UDF_I_NEXT_ALLOC_GOAL(inode) = newblocknum;
+	inode->i_ctime = CURRENT_TIME;
+	UDF_I_UCTIME(inode) = CURRENT_UTIME;
+
+	if (IS_SYNC(inode))
+		udf_sync_inode(inode);
+	else
+		mark_inode_dirty(inode);
+	return result;
+}
+
+static void udf_split_extents(struct inode *inode, int *c, int offset, int newblocknum,
+	long_ad laarr[EXTENT_MERGE_SIZE], int *endnum)
+{
+	if ((laarr[*c].extLength >> 30) == EXTENT_NOT_RECORDED_ALLOCATED ||
+		(laarr[*c].extLength >> 30) == EXTENT_NOT_RECORDED_NOT_ALLOCATED)
+	{
+		int curr = *c;
+		int blen = ((laarr[curr].extLength & UDF_EXTENT_LENGTH_MASK) +
+			inode->i_sb->s_blocksize - 1) >> inode->i_sb->s_blocksize_bits;
+		int type = laarr[curr].extLength & ~UDF_EXTENT_LENGTH_MASK;
+
+		if (blen == 1)
+			;
+		else if (!offset || blen == offset + 1)
+		{
+			laarr[curr+2] = laarr[curr+1];
+			laarr[curr+1] = laarr[curr];
+		}
+		else
+		{
+			laarr[curr+3] = laarr[curr+1];
+			laarr[curr+2] = laarr[curr+1] = laarr[curr];
+		}
+
+		if (offset)
+		{
+			if ((type >> 30) == EXTENT_NOT_RECORDED_ALLOCATED)
+			{
+				udf_free_blocks(inode->i_sb, inode, laarr[curr].extLocation, 0, offset);
+				laarr[curr].extLength = (EXTENT_NOT_RECORDED_NOT_ALLOCATED << 30) |
+					(offset << inode->i_sb->s_blocksize_bits);
+				laarr[curr].extLocation.logicalBlockNum = 0;
+				laarr[curr].extLocation.partitionReferenceNum = 0;
+			}
+			else
+				laarr[curr].extLength = type |
+					(offset << inode->i_sb->s_blocksize_bits);
+			curr ++;
+			(*c) ++;
+			(*endnum) ++;
+		}
+		
+		laarr[curr].extLocation.logicalBlockNum = newblocknum;
+		if ((type >> 30) == EXTENT_NOT_RECORDED_NOT_ALLOCATED)
+			laarr[curr].extLocation.partitionReferenceNum =
+				UDF_I_LOCATION(inode).partitionReferenceNum;
+		laarr[curr].extLength = (EXTENT_RECORDED_ALLOCATED << 30) |
+			inode->i_sb->s_blocksize;
+		curr ++;
+
+		if (blen != offset + 1)
+		{
+			if ((type >> 30) == EXTENT_NOT_RECORDED_ALLOCATED)
+				laarr[curr].extLocation.logicalBlockNum += (offset + 1);
+			laarr[curr].extLength = type |
+				((blen - (offset + 1)) << inode->i_sb->s_blocksize_bits);
+			curr ++;
+			(*endnum) ++;
+		}
+	}
+}
+
+static void udf_prealloc_extents(struct inode *inode, int c, int lastblock,
+	 long_ad laarr[EXTENT_MERGE_SIZE], int *endnum)
+{
+	int start, length = 0, currlength = 0, i;
+
+	if (*endnum >= (c+1))
+	{
+		if (!lastblock)
+			return;
+		else
+			start = c;
+	}
+	else
+	{
+		if ((laarr[c+1].extLength >> 30) == EXTENT_NOT_RECORDED_ALLOCATED)
+		{
+			start = c+1;
+			length = currlength = (((laarr[c+1].extLength & UDF_EXTENT_LENGTH_MASK) +
+				inode->i_sb->s_blocksize - 1) >> inode->i_sb->s_blocksize_bits);
+		}
+		else
+			start = c;
+	}
+
+	for (i=start+1; i<=*endnum; i++)
+	{
+		if (i == *endnum)
+		{
+			if (lastblock)
+				length += UDF_DEFAULT_PREALLOC_BLOCKS;
+		}
+		else if ((laarr[i].extLength >> 30) == EXTENT_NOT_RECORDED_NOT_ALLOCATED)
+			length += (((laarr[i].extLength & UDF_EXTENT_LENGTH_MASK) +
+				inode->i_sb->s_blocksize - 1) >> inode->i_sb->s_blocksize_bits);
+		else
+			break;
+	}
+
+	if (length)
+	{
+		int next = laarr[start].extLocation.logicalBlockNum +
+			(((laarr[start].extLength & UDF_EXTENT_LENGTH_MASK) +
+			inode->i_sb->s_blocksize - 1) >> inode->i_sb->s_blocksize_bits);
+		int numalloc = udf_prealloc_blocks(inode->i_sb, inode,
+			laarr[start].extLocation.partitionReferenceNum,
+			next, (UDF_DEFAULT_PREALLOC_BLOCKS > length ? length :
+				UDF_DEFAULT_PREALLOC_BLOCKS) - currlength);
+
+		if (numalloc)
+		{
+			UDF_I_LENEXTENTS(inode) += numalloc << inode->i_sb->s_blocksize_bits;
+			if (start == (c+1))
+				laarr[start].extLength +=
+					(numalloc << inode->i_sb->s_blocksize_bits);
+			else
+			{
+				memmove(&laarr[c+2], &laarr[c+1],
+					sizeof(long_ad) * (*endnum - (c+1)));
+				(*endnum) ++;
+				laarr[c+1].extLocation.logicalBlockNum = next;
+				laarr[c+1].extLocation.partitionReferenceNum =
+					laarr[c].extLocation.partitionReferenceNum;
+				laarr[c+1].extLength = (EXTENT_NOT_RECORDED_ALLOCATED << 30) |
+					(numalloc << inode->i_sb->s_blocksize_bits);
+				start = c+1;
+			}
+
+			for (i=start+1; numalloc && i<*endnum; i++)
+			{
+				int elen = ((laarr[i].extLength & UDF_EXTENT_LENGTH_MASK) +
+					inode->i_sb->s_blocksize - 1) >> inode->i_sb->s_blocksize_bits;
+
+				if (elen > numalloc)
+				{
+					laarr[c+1].extLength -=
+						(numalloc << inode->i_sb->s_blocksize_bits);
+					numalloc = 0;
+				}
+				else
+				{
+					numalloc -= elen;
+					if (*endnum > (i+1))
+						memmove(&laarr[i], &laarr[i+1], 
+							sizeof(long_ad) * (*endnum - (i+1)));
+					i --;
+					(*endnum) --;
+				}
+			}
+		}
+	}
+}
+
+static void udf_merge_extents(struct inode *inode,
+	 long_ad laarr[EXTENT_MERGE_SIZE], int *endnum)
+{
+	int i;
+
+	for (i=0; i<(*endnum-1); i++)
+	{
+		if ((laarr[i].extLength >> 30) == (laarr[i+1].extLength >> 30))
+		{
+			if (((laarr[i].extLength >> 30) == EXTENT_NOT_RECORDED_NOT_ALLOCATED) ||
+				((laarr[i+1].extLocation.logicalBlockNum - laarr[i].extLocation.logicalBlockNum) ==
+				(((laarr[i].extLength & UDF_EXTENT_LENGTH_MASK) +
+				inode->i_sb->s_blocksize - 1) >> inode->i_sb->s_blocksize_bits)))
+			{
+				if (((laarr[i].extLength & UDF_EXTENT_LENGTH_MASK) +
+					(laarr[i+1].extLength & UDF_EXTENT_LENGTH_MASK) +
+					inode->i_sb->s_blocksize - 1) & ~UDF_EXTENT_LENGTH_MASK)
+				{
+					laarr[i+1].extLength = (laarr[i+1].extLength -
+						(laarr[i].extLength & UDF_EXTENT_LENGTH_MASK) +
+						UDF_EXTENT_LENGTH_MASK) & ~(inode->i_sb->s_blocksize-1);
+					laarr[i].extLength = (UDF_EXTENT_LENGTH_MASK + 1) -
+						inode->i_sb->s_blocksize;
+					laarr[i+1].extLocation.logicalBlockNum =
+						laarr[i].extLocation.logicalBlockNum +
+						((laarr[i].extLength & UDF_EXTENT_LENGTH_MASK) >>
+							inode->i_sb->s_blocksize_bits);
+				}
+				else
+				{
+					laarr[i].extLength = laarr[i+1].extLength +
+						(((laarr[i].extLength & UDF_EXTENT_LENGTH_MASK) +
+						inode->i_sb->s_blocksize - 1) & ~(inode->i_sb->s_blocksize-1));
+					if (*endnum > (i+2))
+						memmove(&laarr[i+1], &laarr[i+2],
+							sizeof(long_ad) * (*endnum - (i+2)));
+					i --;
+					(*endnum) --;
+				}
+			}
+		}
+	}
+}
+
+static void udf_update_extents(struct inode *inode,
+	long_ad laarr[EXTENT_MERGE_SIZE], int startnum, int endnum,
+	lb_addr pbloc, Uint32 pextoffset, struct buffer_head **pbh)
+{
+	int start = 0, i;
+	lb_addr tmploc;
+	Uint32 tmplen;
+
+	if (startnum > endnum)
+	{
+		for (i=0; i<(startnum-endnum); i++)
+		{
+			udf_delete_aext(inode, pbloc, pextoffset, laarr[i].extLocation,
+				laarr[i].extLength, *pbh);
+		}
+	}
+	else if (startnum < endnum)
+	{
+		for (i=0; i<(endnum-startnum); i++)
+		{
+			udf_insert_aext(inode, pbloc, pextoffset, laarr[i].extLocation,
+				laarr[i].extLength, *pbh);
+			udf_next_aext(inode, &pbloc, &pextoffset, &laarr[i].extLocation,
+				&laarr[i].extLength, pbh, 1);
+			start ++;
+		}
+	}
+
+	for (i=start; i<endnum; i++)
+	{
+		udf_next_aext(inode, &pbloc, &pextoffset, &tmploc, &tmplen, pbh, 0);
+		udf_write_aext(inode, pbloc, &pextoffset, laarr[i].extLocation,
+			laarr[i].extLength, *pbh, 1);
+	}
+}
+
+struct buffer_head * udf_bread(struct inode * inode, int block,
+	int create, int * err)
+{
+	struct buffer_head * bh = NULL;
+
+	bh = udf_getblk(inode, block, create, err);
+	if (!bh)
+		return NULL;
+
+	if (buffer_uptodate(bh))
+		return bh;
+	ll_rw_block(READ, 1, &bh);
+	wait_on_buffer(bh);
+	if (buffer_uptodate(bh))
+		return bh;
+	brelse(bh);
+	*err = -EIO;
+	return NULL;
+}
+
+void udf_truncate(struct inode * inode)
+{
+	int offset;
+	struct buffer_head *bh;
+	int err;
+
+	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+			S_ISLNK(inode->i_mode)))
+		return;
+	if (IS_APPEND(inode) || IS_IMMUTABLE_FILE(inode))
+		return;
+
+	if (UDF_I_ALLOCTYPE(inode) == ICB_FLAG_AD_IN_ICB)
+	{
+		if (inode->i_sb->s_blocksize < (udf_file_entry_alloc_offset(inode) +
+			inode->i_size))
+		{
+			udf_expand_file_adinicb(inode, inode->i_size, &err);
+			if (UDF_I_ALLOCTYPE(inode) == ICB_FLAG_AD_IN_ICB)
+			{
+				inode->i_size = UDF_I_LENALLOC(inode);
+				return;
+			}
+			else
+				udf_truncate_extents(inode);
+		}
+		else
+		{
+			offset = (inode->i_size & (inode->i_sb->s_blocksize - 1)) +
+				udf_file_entry_alloc_offset(inode);
+
+			if ((bh = udf_tread(inode->i_sb,
+				udf_get_lb_pblock(inode->i_sb, UDF_I_LOCATION(inode), 0),
+				inode->i_sb->s_blocksize)))
+			{
+				memset(bh->b_data + offset, 0x00, inode->i_sb->s_blocksize - offset);
+				mark_buffer_dirty(bh);
+				udf_release_data(bh);
+			}
+			UDF_I_LENALLOC(inode) = inode->i_size;
+		}
+	}
+	else
+	{
+		block_truncate_page(inode->i_mapping, inode->i_size, udf_get_block);
+		udf_truncate_extents(inode);
+	}	
+
+	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+	UDF_I_UMTIME(inode) = UDF_I_UCTIME(inode) = CURRENT_UTIME;
+	if (IS_SYNC(inode))
+		udf_sync_inode (inode);
+	else
+		mark_inode_dirty(inode);
+}
+
+/*
+ * udf_read_inode
+ *
+ * PURPOSE
+ *	Read an inode.
+ *
+ * DESCRIPTION
+ *	This routine is called by iget() [which is called by udf_iget()]
+ *      (clean_inode() will have been called first)
+ *	when an inode is first read into memory.
+ *
+ * HISTORY
+ *	July 1, 1997 - Andrew E. Mileski
+ *	Written, tested, and released.
+ *
+ * 12/19/98 dgb  Updated to fix size problems.
+ */
+
+void
+udf_read_inode(struct inode *inode)
+{
+	memset(&UDF_I_LOCATION(inode), 0xFF, sizeof(lb_addr));
+}
+
+void
+__udf_read_inode(struct inode *inode)
+{
+	struct buffer_head *bh = NULL;
+	struct FileEntry *fe;
+	Uint16 ident;
+
+	/*
+	 * Set defaults, but the inode is still incomplete!
+	 * Note: get_new_inode() sets the following on a new inode:
+	 *      i_sb = sb
+	 *      i_dev = sb->s_dev;
+	 *      i_no = ino
+	 *      i_flags = sb->s_flags
+	 *      i_state = 0
+	 * clean_inode(): zero fills and sets
+	 *      i_count = 1
+	 *      i_nlink = 1
+	 *      i_op = NULL;
+	 */
+
+	inode->i_blksize = PAGE_SIZE;
+
+	bh = udf_read_ptagged(inode->i_sb, UDF_I_LOCATION(inode), 0, &ident);
+
+	if (!bh)
+	{
+		printk(KERN_ERR "udf: udf_read_inode(ino %ld) failed !bh\n",
+			inode->i_ino);
+		make_bad_inode(inode);
+		return;
+	}
+
+	if (ident != TID_FILE_ENTRY && ident != TID_EXTENDED_FILE_ENTRY &&
+		ident != TID_UNALLOCATED_SPACE_ENTRY)
+	{
+		printk(KERN_ERR "udf: udf_read_inode(ino %ld) failed ident=%d\n",
+			inode->i_ino, ident);
+		udf_release_data(bh);
+		make_bad_inode(inode);
+		return;
+	}
+
+	fe = (struct FileEntry *)bh->b_data;
+
+	if (le16_to_cpu(fe->icbTag.strategyType) == 4096)
+	{
+		struct buffer_head *ibh = NULL, *nbh = NULL;
+		struct IndirectEntry *ie;
+
+		ibh = udf_read_ptagged(inode->i_sb, UDF_I_LOCATION(inode), 1, &ident);
+		if (ident == TID_INDIRECT_ENTRY)
+		{
+			if (ibh)
+			{
+				lb_addr loc;
+				ie = (struct IndirectEntry *)ibh->b_data;
+	
+				loc = lelb_to_cpu(ie->indirectICB.extLocation);
+	
+				if (ie->indirectICB.extLength && 
+					(nbh = udf_read_ptagged(inode->i_sb, loc, 0, &ident)))
+				{
+					if (ident == TID_FILE_ENTRY ||
+						ident == TID_EXTENDED_FILE_ENTRY)
+					{
+						memcpy(&UDF_I_LOCATION(inode), &loc, sizeof(lb_addr));
+						udf_release_data(bh);
+						udf_release_data(ibh);
+						udf_release_data(nbh);
+						__udf_read_inode(inode);
+						return;
+					}
+					else
+					{
+						udf_release_data(nbh);
+						udf_release_data(ibh);
+					}
+				}
+				else
+					udf_release_data(ibh);
+			}
+		}
+		else
+			udf_release_data(ibh);
+	}
+	else if (le16_to_cpu(fe->icbTag.strategyType) != 4)
+	{
+		printk(KERN_ERR "udf: unsupported strategy type: %d\n",
+			le16_to_cpu(fe->icbTag.strategyType));
+		udf_release_data(bh);
+		make_bad_inode(inode);
+		return;
+	}
+	udf_fill_inode(inode, bh);
+	udf_release_data(bh);
+}
+
+static void udf_fill_inode(struct inode *inode, struct buffer_head *bh)
+{
+	struct FileEntry *fe;
+	struct ExtendedFileEntry *efe;
+	time_t convtime;
+	long convtime_usec;
+	int offset, alen;
+
+	inode->i_version = ++event;
+	UDF_I_NEW_INODE(inode) = 0;
+
+	fe = (struct FileEntry *)bh->b_data;
+	efe = (struct ExtendedFileEntry *)bh->b_data;
+
+	if (le16_to_cpu(fe->icbTag.strategyType) == 4)
+		UDF_I_STRAT4096(inode) = 0;
+	else /* if (le16_to_cpu(fe->icbTag.strategyType) == 4096) */
+		UDF_I_STRAT4096(inode) = 1;
+
+	UDF_I_ALLOCTYPE(inode) = le16_to_cpu(fe->icbTag.flags) & ICB_FLAG_ALLOC_MASK;
+	if (fe->descTag.tagIdent == TID_EXTENDED_FILE_ENTRY)
+		UDF_I_EXTENDED_FE(inode) = 1;
+	else if (fe->descTag.tagIdent == TID_FILE_ENTRY)
+		UDF_I_EXTENDED_FE(inode) = 0;
+	else if (fe->descTag.tagIdent == TID_UNALLOCATED_SPACE_ENTRY)
+	{
+		UDF_I_LENALLOC(inode) =
+			le32_to_cpu(
+				((struct UnallocatedSpaceEntry *)bh->b_data)->lengthAllocDescs);
+		return;
+	}
+
+	inode->i_uid = le32_to_cpu(fe->uid);
+	if ( inode->i_uid == -1 ) inode->i_uid = UDF_SB(inode->i_sb)->s_uid;
+
+	inode->i_gid = le32_to_cpu(fe->gid);
+	if ( inode->i_gid == -1 ) inode->i_gid = UDF_SB(inode->i_sb)->s_gid;
+
+	inode->i_nlink = le16_to_cpu(fe->fileLinkCount);
+	if (!inode->i_nlink)
+		inode->i_nlink = 1;
+	
+	inode->i_size = le64_to_cpu(fe->informationLength);
+	UDF_I_LENEXTENTS(inode) = inode->i_size;
+
+	inode->i_mode = udf_convert_permissions(fe);
+	inode->i_mode &= ~UDF_SB(inode->i_sb)->s_umask;
+
+	UDF_I_NEXT_ALLOC_BLOCK(inode) = 0;
+	UDF_I_NEXT_ALLOC_GOAL(inode) = 0;
+
+	if (UDF_I_EXTENDED_FE(inode) == 0)
+	{
+		inode->i_blocks = le64_to_cpu(fe->logicalBlocksRecorded) <<
+			(inode->i_sb->s_blocksize_bits - 9);
+
+		if ( udf_stamp_to_time(&convtime, &convtime_usec,
+			lets_to_cpu(fe->accessTime)) )
+		{
+			inode->i_atime = convtime;
+		}
+		else
+		{
+			inode->i_atime = UDF_SB_RECORDTIME(inode->i_sb);
+		}
+
+		if ( udf_stamp_to_time(&convtime, &convtime_usec,
+			lets_to_cpu(fe->modificationTime)) )
+		{
+			inode->i_mtime = convtime;
+			UDF_I_UMTIME(inode) = convtime_usec;
+		}
+		else
+		{
+			inode->i_mtime = UDF_SB_RECORDTIME(inode->i_sb);
+			UDF_I_UMTIME(inode) = 0;
+		}
+
+		if ( udf_stamp_to_time(&convtime, &convtime_usec,
+			lets_to_cpu(fe->attrTime)) )
+		{
+			inode->i_ctime = convtime;
+			UDF_I_UCTIME(inode) = convtime_usec;
+		}
+		else
+		{
+			inode->i_ctime = UDF_SB_RECORDTIME(inode->i_sb);
+			UDF_I_UCTIME(inode) = 0;
+		}
+
+		UDF_I_UNIQUE(inode) = le64_to_cpu(fe->uniqueID);
+		UDF_I_LENEATTR(inode) = le32_to_cpu(fe->lengthExtendedAttr);
+		UDF_I_LENALLOC(inode) = le32_to_cpu(fe->lengthAllocDescs);
+		offset = sizeof(struct FileEntry) + UDF_I_LENEATTR(inode);
+		alen = offset + UDF_I_LENALLOC(inode);
+	}
+	else
+	{
+		inode->i_blocks = le64_to_cpu(efe->logicalBlocksRecorded) << 
+			(inode->i_sb->s_blocksize_bits - 9);
+
+		if ( udf_stamp_to_time(&convtime, &convtime_usec,
+			lets_to_cpu(efe->accessTime)) )
+		{
+			inode->i_atime = convtime;
+		}
+		else
+		{
+			inode->i_atime = UDF_SB_RECORDTIME(inode->i_sb);
+		}
+
+		if ( udf_stamp_to_time(&convtime, &convtime_usec,
+			lets_to_cpu(efe->modificationTime)) )
+		{
+			inode->i_mtime = convtime;
+			UDF_I_UMTIME(inode) = convtime_usec;
+		}
+		else
+		{
+			inode->i_mtime = UDF_SB_RECORDTIME(inode->i_sb);
+			UDF_I_UMTIME(inode) = 0;
+		}
+
+		if ( udf_stamp_to_time(&convtime, &convtime_usec,
+			lets_to_cpu(efe->createTime)) )
+		{
+			UDF_I_CRTIME(inode) = convtime;
+			UDF_I_UCRTIME(inode) = convtime_usec;
+		}
+		else
+		{
+			UDF_I_CRTIME(inode) = UDF_SB_RECORDTIME(inode->i_sb);
+			UDF_I_UCRTIME(inode) = 0;
+		}
+
+		if ( udf_stamp_to_time(&convtime, &convtime_usec,
+			lets_to_cpu(efe->attrTime)) )
+		{
+			inode->i_ctime = convtime;
+			UDF_I_UCTIME(inode) = convtime_usec;
+		}
+		else
+		{
+			inode->i_ctime = UDF_SB_RECORDTIME(inode->i_sb);
+			UDF_I_UCTIME(inode) = 0;
+		}
+
+		UDF_I_UNIQUE(inode) = le64_to_cpu(efe->uniqueID);
+		UDF_I_LENEATTR(inode) = le32_to_cpu(efe->lengthExtendedAttr);
+		UDF_I_LENALLOC(inode) = le32_to_cpu(efe->lengthAllocDescs);
+		offset = sizeof(struct ExtendedFileEntry) + UDF_I_LENEATTR(inode);
+		alen = offset + UDF_I_LENALLOC(inode);
+	}
+
+	switch (fe->icbTag.fileType)
+	{
+		case FILE_TYPE_DIRECTORY:
+		{
+			inode->i_op = &udf_dir_inode_operations;
+			inode->i_fop = &udf_dir_operations;
+			inode->i_mode |= S_IFDIR;
+			inode->i_nlink ++;
+			break;
+		}
+		case FILE_TYPE_REALTIME:
+		case FILE_TYPE_REGULAR:
+		case FILE_TYPE_NONE:
+		{
+			if (UDF_I_ALLOCTYPE(inode) == ICB_FLAG_AD_IN_ICB)
+				inode->i_data.a_ops = &udf_adinicb_aops;
+			else
+				inode->i_data.a_ops = &udf_aops;
+			inode->i_op = &udf_file_inode_operations;
+			inode->i_fop = &udf_file_operations;
+			inode->i_mode |= S_IFREG;
+			break;
+		}
+		case FILE_TYPE_BLOCK:
+		{
+			inode->i_mode |= S_IFBLK;
+			break;
+		}
+		case FILE_TYPE_CHAR:
+		{
+			inode->i_mode |= S_IFCHR;
+			break;
+		}
+		case FILE_TYPE_FIFO:
+		{
+			init_special_inode(inode, inode->i_mode | S_IFIFO, 0);
+			break;
+		}
+		case FILE_TYPE_SYMLINK:
+		{
+			inode->i_data.a_ops = &udf_symlink_aops;
+			inode->i_op = &page_symlink_inode_operations;
+			inode->i_mode = S_IFLNK|S_IRWXUGO;
+			break;
+		}
+		default:
+		{
+			printk(KERN_ERR "udf: udf_fill_inode(ino %ld) failed unknown file type=%d\n",
+				inode->i_ino, fe->icbTag.fileType);
+			make_bad_inode(inode);
+			return;
+		}
+	}
+	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
+	{
+		struct buffer_head *tbh = NULL;
+		struct DeviceSpecificationExtendedAttr *dsea =
+			(struct DeviceSpecificationExtendedAttr *)
+				udf_get_extendedattr(inode, 12, 1, &tbh);
+
+		if (dsea)
+		{
+			init_special_inode(inode, inode->i_mode,
+				((le32_to_cpu(dsea->majorDeviceIdent)) << 8) |
+				(le32_to_cpu(dsea->minorDeviceIdent) & 0xFF));
+			/* Developer ID ??? */
+			udf_release_data(tbh);
+		}
+		else
+		{
+			make_bad_inode(inode);
+		}
+	}
+}
+
+static mode_t
+udf_convert_permissions(struct FileEntry *fe)
+{
+	mode_t mode;
+	Uint32 permissions;
+	Uint32 flags;
+
+	permissions = le32_to_cpu(fe->permissions);
+	flags = le16_to_cpu(fe->icbTag.flags);
+
+	mode =	(( permissions      ) & S_IRWXO) |
+		(( permissions >> 2 ) & S_IRWXG) |
+		(( permissions >> 4 ) & S_IRWXU) |
+		(( flags & ICB_FLAG_SETUID) ? S_ISUID : 0) |
+		(( flags & ICB_FLAG_SETGID) ? S_ISGID : 0) |
+		(( flags & ICB_FLAG_STICKY) ? S_ISVTX : 0);
+
+	return mode;
+}
+
+/*
+ * udf_write_inode
+ *
+ * PURPOSE
+ *	Write out the specified inode.
+ *
+ * DESCRIPTION
+ *	This routine is called whenever an inode is synced.
+ *	Currently this routine is just a placeholder.
+ *
+ * HISTORY
+ *	July 1, 1997 - Andrew E. Mileski
+ *	Written, tested, and released.
+ */
+
+void udf_write_inode(struct inode * inode, int sync)
+{
+	lock_kernel();
+	udf_update_inode(inode, sync);
+	unlock_kernel();
+}
+
+int udf_sync_inode(struct inode * inode)
+{
+	return udf_update_inode(inode, 1);
+}
+
+static int
+udf_update_inode(struct inode *inode, int do_sync)
+{
+	struct buffer_head *bh = NULL;
+	struct FileEntry *fe;
+	struct ExtendedFileEntry *efe;
+	Uint32 udfperms;
+	Uint16 icbflags;
+	Uint16 crclen;
+	int i;
+	timestamp cpu_time;
+	int err = 0;
+
+	bh = udf_tread(inode->i_sb,
+		udf_get_lb_pblock(inode->i_sb, UDF_I_LOCATION(inode), 0),
+		inode->i_sb->s_blocksize);
+
+	if (!bh)
+	{
+		udf_debug("bread failure\n");
+		return -EIO;
+	}
+	fe = (struct FileEntry *)bh->b_data;
+	efe = (struct ExtendedFileEntry *)bh->b_data;
+	if (UDF_I_NEW_INODE(inode) == 1)
+	{
+		if (UDF_I_EXTENDED_FE(inode) == 0)
+			memset(bh->b_data, 0x00, sizeof(struct FileEntry));
+		else
+			memset(bh->b_data, 0x00, sizeof(struct ExtendedFileEntry));
+		memset(bh->b_data + udf_file_entry_alloc_offset(inode) +
+			UDF_I_LENALLOC(inode), 0x0, inode->i_sb->s_blocksize -
+			udf_file_entry_alloc_offset(inode) - UDF_I_LENALLOC(inode));
+		UDF_I_NEW_INODE(inode) = 0;
+	}
+
+	if (fe->descTag.tagIdent == TID_UNALLOCATED_SPACE_ENTRY)
+	{
+		struct UnallocatedSpaceEntry *use =
+			(struct UnallocatedSpaceEntry *)bh->b_data;
+
+		use->lengthAllocDescs = cpu_to_le32(UDF_I_LENALLOC(inode));
+		crclen = sizeof(struct UnallocatedSpaceEntry) + UDF_I_LENALLOC(inode) -
+			sizeof(tag);
+		use->descTag.descCRCLength = cpu_to_le16(crclen);
+		use->descTag.descCRC = cpu_to_le16(udf_crc((char *)use + sizeof(tag), crclen, 0));
+
+		use->descTag.tagChecksum = 0;
+		for (i=0; i<16; i++)
+			if (i != 4)
+				use->descTag.tagChecksum += ((Uint8 *)&(use->descTag))[i];
+
+		mark_buffer_dirty(bh);
+		udf_release_data(bh);
+		return err;
+	}
+
+	if (inode->i_uid != UDF_SB(inode->i_sb)->s_uid)
+		fe->uid = cpu_to_le32(inode->i_uid);
+
+	if (inode->i_gid != UDF_SB(inode->i_sb)->s_gid)
+		fe->gid = cpu_to_le32(inode->i_gid);
+
+	udfperms =	((inode->i_mode & S_IRWXO)     ) |
+			((inode->i_mode & S_IRWXG) << 2) |
+			((inode->i_mode & S_IRWXU) << 4);
+
+	udfperms |=	(le32_to_cpu(fe->permissions) &
+			(PERM_O_DELETE | PERM_O_CHATTR |
+			 PERM_G_DELETE | PERM_G_CHATTR |
+			 PERM_U_DELETE | PERM_U_CHATTR));
+	fe->permissions = cpu_to_le32(udfperms);
+
+	if (S_ISDIR(inode->i_mode))
+		fe->fileLinkCount = cpu_to_le16(inode->i_nlink - 1);
+	else
+		fe->fileLinkCount = cpu_to_le16(inode->i_nlink);
+
+	fe->informationLength = cpu_to_le64(inode->i_size);
+
+	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
+	{
+		EntityID *eid;
+		struct buffer_head *tbh = NULL;
+		struct DeviceSpecificationExtendedAttr *dsea =
+			(struct DeviceSpecificationExtendedAttr *)
+				udf_get_extendedattr(inode, 12, 1, &tbh);	
+
+		if (!dsea)
+		{
+			dsea = (struct DeviceSpecificationExtendedAttr *)
+				udf_add_extendedattr(inode,
+					sizeof(struct DeviceSpecificationExtendedAttr) +
+					sizeof(EntityID), 12, 0x3, &tbh);
+			dsea->attrType = 12;
+			dsea->attrSubtype = 1;
+			dsea->attrLength = sizeof(struct DeviceSpecificationExtendedAttr) +
+				sizeof(EntityID);
+			dsea->impUseLength = sizeof(EntityID);
+		}
+		eid = (EntityID *)dsea->impUse;
+		memset(eid, 0, sizeof(EntityID));
+		strcpy(eid->ident, UDF_ID_DEVELOPER);
+		eid->identSuffix[0] = UDF_OS_CLASS_UNIX;
+		eid->identSuffix[1] = UDF_OS_ID_LINUX;
+		dsea->majorDeviceIdent = kdev_t_to_nr(inode->i_rdev) >> 8;
+		dsea->minorDeviceIdent = kdev_t_to_nr(inode->i_rdev) & 0xFF;
+		mark_buffer_dirty_inode(tbh, inode);
+		udf_release_data(tbh);
+	}
+
+	if (UDF_I_EXTENDED_FE(inode) == 0)
+	{
+		fe->logicalBlocksRecorded = cpu_to_le64(
+			(inode->i_blocks + (1 << (inode->i_sb->s_blocksize_bits - 9)) - 1) >>
+			(inode->i_sb->s_blocksize_bits - 9));
+
+		if (udf_time_to_stamp(&cpu_time, inode->i_atime, 0))
+			fe->accessTime = cpu_to_lets(cpu_time);
+		if (udf_time_to_stamp(&cpu_time, inode->i_mtime, UDF_I_UMTIME(inode)))
+			fe->modificationTime = cpu_to_lets(cpu_time);
+		if (udf_time_to_stamp(&cpu_time, inode->i_ctime, UDF_I_UCTIME(inode)))
+			fe->attrTime = cpu_to_lets(cpu_time);
+		memset(&(fe->impIdent), 0, sizeof(EntityID));
+		strcpy(fe->impIdent.ident, UDF_ID_DEVELOPER);
+		fe->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX;
+		fe->impIdent.identSuffix[1] = UDF_OS_ID_LINUX;
+		fe->uniqueID = cpu_to_le64(UDF_I_UNIQUE(inode));
+		fe->lengthExtendedAttr = cpu_to_le32(UDF_I_LENEATTR(inode));
+		fe->lengthAllocDescs = cpu_to_le32(UDF_I_LENALLOC(inode));
+		fe->descTag.tagIdent = le16_to_cpu(TID_FILE_ENTRY);
+		crclen = sizeof(struct FileEntry);
+	}
+	else
+	{
+		efe->objectSize = cpu_to_le64(inode->i_size);
+		efe->logicalBlocksRecorded = cpu_to_le64(
+			(inode->i_blocks + (1 << (inode->i_sb->s_blocksize_bits - 9)) - 1) >>
+			(inode->i_sb->s_blocksize_bits - 9));
+
+		if (UDF_I_CRTIME(inode) >= inode->i_atime)
+		{
+			UDF_I_CRTIME(inode) = inode->i_atime;
+			UDF_I_UCRTIME(inode) = 0;
+		}
+		if (UDF_I_CRTIME(inode) > inode->i_mtime ||
+			(UDF_I_CRTIME(inode) == inode->i_mtime &&
+			 UDF_I_UCRTIME(inode) > UDF_I_UMTIME(inode)))
+		{
+			UDF_I_CRTIME(inode) = inode->i_mtime;
+			UDF_I_UCRTIME(inode) = UDF_I_UMTIME(inode);
+		}
+		if (UDF_I_CRTIME(inode) > inode->i_ctime ||
+			(UDF_I_CRTIME(inode) == inode->i_ctime &&
+			 UDF_I_UCRTIME(inode) > UDF_I_UCTIME(inode)))
+		{
+			UDF_I_CRTIME(inode) = inode->i_ctime;
+			UDF_I_UCRTIME(inode) = UDF_I_UCTIME(inode);
+		}
+
+		if (udf_time_to_stamp(&cpu_time, inode->i_atime, 0))
+			efe->accessTime = cpu_to_lets(cpu_time);
+		if (udf_time_to_stamp(&cpu_time, inode->i_mtime, UDF_I_UMTIME(inode)))
+			efe->modificationTime = cpu_to_lets(cpu_time);
+		if (udf_time_to_stamp(&cpu_time, UDF_I_CRTIME(inode), UDF_I_UCRTIME(inode)))
+			efe->createTime = cpu_to_lets(cpu_time);
+		if (udf_time_to_stamp(&cpu_time, inode->i_ctime, UDF_I_UCTIME(inode)))
+			efe->attrTime = cpu_to_lets(cpu_time);
+
+		memset(&(efe->impIdent), 0, sizeof(EntityID));
+		strcpy(efe->impIdent.ident, UDF_ID_DEVELOPER);
+		efe->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX;
+		efe->impIdent.identSuffix[1] = UDF_OS_ID_LINUX;
+		efe->uniqueID = cpu_to_le64(UDF_I_UNIQUE(inode));
+		efe->lengthExtendedAttr = cpu_to_le32(UDF_I_LENEATTR(inode));
+		efe->lengthAllocDescs = cpu_to_le32(UDF_I_LENALLOC(inode));
+		efe->descTag.tagIdent = le16_to_cpu(TID_EXTENDED_FILE_ENTRY);
+		crclen = sizeof(struct ExtendedFileEntry);
+	}
+	if (UDF_I_STRAT4096(inode))
+	{
+		fe->icbTag.strategyType = cpu_to_le16(4096);
+		fe->icbTag.strategyParameter = cpu_to_le16(1);
+		fe->icbTag.numEntries = cpu_to_le16(2);
+	}
+	else
+	{
+		fe->icbTag.strategyType = cpu_to_le16(4);
+		fe->icbTag.numEntries = cpu_to_le16(1);
+	}
+
+	if (S_ISDIR(inode->i_mode))
+		fe->icbTag.fileType = FILE_TYPE_DIRECTORY;
+	else if (S_ISREG(inode->i_mode))
+		fe->icbTag.fileType = FILE_TYPE_REGULAR;
+	else if (S_ISLNK(inode->i_mode))
+		fe->icbTag.fileType = FILE_TYPE_SYMLINK;
+	else if (S_ISBLK(inode->i_mode))
+		fe->icbTag.fileType = FILE_TYPE_BLOCK;
+	else if (S_ISCHR(inode->i_mode))
+		fe->icbTag.fileType = FILE_TYPE_CHAR;
+	else if (S_ISFIFO(inode->i_mode))
+		fe->icbTag.fileType = FILE_TYPE_FIFO;
+
+	icbflags =	UDF_I_ALLOCTYPE(inode) |
+			((inode->i_mode & S_ISUID) ? ICB_FLAG_SETUID : 0) |
+			((inode->i_mode & S_ISGID) ? ICB_FLAG_SETGID : 0) |
+			((inode->i_mode & S_ISVTX) ? ICB_FLAG_STICKY : 0) |
+			(le16_to_cpu(fe->icbTag.flags) &
+				~(ICB_FLAG_ALLOC_MASK | ICB_FLAG_SETUID |
+				ICB_FLAG_SETGID | ICB_FLAG_STICKY));
+
+	fe->icbTag.flags = cpu_to_le16(icbflags);
+	fe->descTag.descVersion = cpu_to_le16(2);
+	fe->descTag.tagSerialNum = cpu_to_le16(UDF_SB_SERIALNUM(inode->i_sb));
+	fe->descTag.tagLocation = cpu_to_le32(UDF_I_LOCATION(inode).logicalBlockNum);
+	crclen += UDF_I_LENEATTR(inode) + UDF_I_LENALLOC(inode) - sizeof(tag);
+	fe->descTag.descCRCLength = cpu_to_le16(crclen);
+	fe->descTag.descCRC = cpu_to_le16(udf_crc((char *)fe + sizeof(tag), crclen, 0));
+
+	fe->descTag.tagChecksum = 0;
+	for (i=0; i<16; i++)
+		if (i != 4)
+			fe->descTag.tagChecksum += ((Uint8 *)&(fe->descTag))[i];
+
+	/* write the data blocks */
+	mark_buffer_dirty(bh);
+	if (do_sync)
+	{
+		ll_rw_block(WRITE, 1, &bh);
+		wait_on_buffer(bh);
+		if (buffer_req(bh) && !buffer_uptodate(bh))
+		{
+			printk("IO error syncing udf inode [%s:%08lx]\n",
+				bdevname(inode->i_dev), inode->i_ino);
+			err = -EIO;
+		}
+	}
+	udf_release_data(bh);
+	return err;
+}
+
+/*
+ * udf_iget
+ *
+ * PURPOSE
+ *	Get an inode.
+ *
+ * DESCRIPTION
+ *	This routine replaces iget() and read_inode().
+ *
+ * HISTORY
+ *	October 3, 1997 - Andrew E. Mileski
+ *	Written, tested, and released.
+ *
+ * 12/19/98 dgb  Added semaphore and changed to be a wrapper of iget
+ */
+struct inode *
+udf_iget(struct super_block *sb, lb_addr ino)
+{
+	struct inode *inode;
+	unsigned long block;
+
+	block = udf_get_lb_pblock(sb, ino, 0);
+
+	/* Get the inode */
+
+	inode = iget(sb, block);
+		/* calls udf_read_inode() ! */
+
+	if (!inode)
+	{
+		printk(KERN_ERR "udf: iget() failed\n");
+		return NULL;
+	}
+	else if (is_bad_inode(inode))
+	{
+		iput(inode);
+		return NULL;
+	}
+	else if (UDF_I_LOCATION(inode).logicalBlockNum == 0xFFFFFFFF &&
+		UDF_I_LOCATION(inode).partitionReferenceNum == 0xFFFF)
+	{
+		memcpy(&UDF_I_LOCATION(inode), &ino, sizeof(lb_addr));
+		__udf_read_inode(inode);
+		if (is_bad_inode(inode))
+		{
+			iput(inode);
+			return NULL;
+		}
+	}
+
+	if ( ino.logicalBlockNum >= UDF_SB_PARTLEN(sb, ino.partitionReferenceNum) )
+	{
+		udf_debug("block=%d, partition=%d out of range\n",
+			ino.logicalBlockNum, ino.partitionReferenceNum);
+		make_bad_inode(inode);
+		iput(inode);
+		return NULL;
+ 	}
+
+	return inode;
+}
+
+Sint8 udf_add_aext(struct inode *inode, lb_addr *bloc, int *extoffset,
+	lb_addr eloc, Uint32 elen, struct buffer_head **bh, int inc)
+{
+	int adsize;
+	short_ad *sad = NULL;
+	long_ad *lad = NULL;
+	struct AllocExtDesc *aed;
+	int ret;
+
+	if (!(*bh))
+	{
+		if (!(*bh = udf_tread(inode->i_sb,
+			udf_get_lb_pblock(inode->i_sb, *bloc, 0),
+			inode->i_sb->s_blocksize)))
+		{
+			udf_debug("reading block %d failed!\n",
+				udf_get_lb_pblock(inode->i_sb, *bloc, 0));
+			return -1;
+		}
+	}
+
+	if (UDF_I_ALLOCTYPE(inode) == ICB_FLAG_AD_SHORT)
+		adsize = sizeof(short_ad);
+	else if (UDF_I_ALLOCTYPE(inode) == ICB_FLAG_AD_LONG)
+		adsize = sizeof(long_ad);
+	else
+		return -1;
+
+	if (*extoffset + (2 * adsize) > inode->i_sb->s_blocksize)
+	{
+		char *sptr, *dptr;
+		struct buffer_head *nbh;
+		int err, loffset;
+		lb_addr obloc = *bloc;
+
+		if (!(bloc->logicalBlockNum = udf_new_block(inode->i_sb, inode,
+			obloc.partitionReferenceNum, obloc.logicalBlockNum, &err)))
+		{
+			return -1;
+		}
+		if (!(nbh = udf_tgetblk(inode->i_sb, udf_get_lb_pblock(inode->i_sb,
+			*bloc, 0), inode->i_sb->s_blocksize)))
+		{
+			return -1;
+		}
+		lock_buffer(nbh);
+		memset(nbh->b_data, 0x00, inode->i_sb->s_blocksize);
+		mark_buffer_uptodate(nbh, 1);
+		unlock_buffer(nbh);
+		mark_buffer_dirty_inode(nbh, inode);
+
+		aed = (struct AllocExtDesc *)(nbh->b_data);
+		if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT))
+			aed->previousAllocExtLocation = cpu_to_le32(obloc.logicalBlockNum);
+		if (*extoffset + adsize > inode->i_sb->s_blocksize)
+		{
+			loffset = *extoffset;
+			aed->lengthAllocDescs = cpu_to_le32(adsize);
+			sptr = (*bh)->b_data + *extoffset - adsize;
+			dptr = nbh->b_data + sizeof(struct AllocExtDesc);
+			memcpy(dptr, sptr, adsize);
+			*extoffset = sizeof(struct AllocExtDesc) + adsize;
+		}
+		else
+		{
+			loffset = *extoffset + adsize;
+			aed->lengthAllocDescs = cpu_to_le32(0);
+			sptr = (*bh)->b_data + *extoffset;
+			*extoffset = sizeof(struct AllocExtDesc);
+
+			if (memcmp(&UDF_I_LOCATION(inode), &obloc, sizeof(lb_addr)))
+			{
+				aed = (struct AllocExtDesc *)(*bh)->b_data;
+				aed->lengthAllocDescs =
+					cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) + adsize);
+			}
+			else
+			{
+				UDF_I_LENALLOC(inode) += adsize;
+				mark_inode_dirty(inode);
+			}
+		}
+		udf_new_tag(nbh->b_data, TID_ALLOC_EXTENT_DESC, 2, 1,
+			bloc->logicalBlockNum, sizeof(tag));
+		switch (UDF_I_ALLOCTYPE(inode))
+		{
+			case ICB_FLAG_AD_SHORT:
+			{
+				sad = (short_ad *)sptr;
+				sad->extLength = cpu_to_le32(
+					EXTENT_NEXT_EXTENT_ALLOCDECS << 30 |
+					inode->i_sb->s_blocksize);
+				sad->extPosition = cpu_to_le32(bloc->logicalBlockNum);
+				break;
+			}
+			case ICB_FLAG_AD_LONG:
+			{
+				lad = (long_ad *)sptr;
+				lad->extLength = cpu_to_le32(
+					EXTENT_NEXT_EXTENT_ALLOCDECS << 30 |
+					inode->i_sb->s_blocksize);
+				lad->extLocation = cpu_to_lelb(*bloc);
+				memset(lad->impUse, 0x00, sizeof(lad->impUse));
+				break;
+			}
+		}
+		if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(inode->i_sb) >= 0x0201)
+			udf_update_tag((*bh)->b_data, loffset);
+		else
+			udf_update_tag((*bh)->b_data, sizeof(struct AllocExtDesc));
+		mark_buffer_dirty_inode(*bh, inode);
+		udf_release_data(*bh);
+		*bh = nbh;
+	}
+
+	ret = udf_write_aext(inode, *bloc, extoffset, eloc, elen, *bh, inc);
+
+	if (!memcmp(&UDF_I_LOCATION(inode), bloc, sizeof(lb_addr)))
+	{
+		UDF_I_LENALLOC(inode) += adsize;
+		mark_inode_dirty(inode);
+	}
+	else
+	{
+		aed = (struct AllocExtDesc *)(*bh)->b_data;
+		aed->lengthAllocDescs =
+			cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) + adsize);
+		if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(inode->i_sb) >= 0x0201)
+			udf_update_tag((*bh)->b_data, *extoffset + (inc ? 0 : adsize));
+		else
+			udf_update_tag((*bh)->b_data, sizeof(struct AllocExtDesc));
+		mark_buffer_dirty_inode(*bh, inode);
+	}
+
+	return ret;
+}
+
+Sint8 udf_write_aext(struct inode *inode, lb_addr bloc, int *extoffset,
+    lb_addr eloc, Uint32 elen, struct buffer_head *bh, int inc)
+{
+	int adsize;
+	short_ad *sad = NULL;
+	long_ad *lad = NULL;
+
+	if (!(bh))
+	{
+		if (!(bh = udf_tread(inode->i_sb,
+			udf_get_lb_pblock(inode->i_sb, bloc, 0),
+			inode->i_sb->s_blocksize)))
+		{
+			udf_debug("reading block %d failed!\n",
+				udf_get_lb_pblock(inode->i_sb, bloc, 0));
+			return -1;
+		}
+	}
+	else
+		atomic_inc(&bh->b_count);
+
+	if (UDF_I_ALLOCTYPE(inode) == ICB_FLAG_AD_SHORT)
+		adsize = sizeof(short_ad);
+	else if (UDF_I_ALLOCTYPE(inode) == ICB_FLAG_AD_LONG)
+		adsize = sizeof(long_ad);
+	else
+		return -1;
+
+	switch (UDF_I_ALLOCTYPE(inode))
+	{
+		case ICB_FLAG_AD_SHORT:
+		{
+			sad = (short_ad *)((bh)->b_data + *extoffset);
+			sad->extLength = cpu_to_le32(elen);
+			sad->extPosition = cpu_to_le32(eloc.logicalBlockNum);
+			break;
+		}
+		case ICB_FLAG_AD_LONG:
+		{
+			lad = (long_ad *)((bh)->b_data + *extoffset);
+			lad->extLength = cpu_to_le32(elen);
+			lad->extLocation = cpu_to_lelb(eloc);
+			memset(lad->impUse, 0x00, sizeof(lad->impUse));
+			break;
+		}
+	}
+
+	if (memcmp(&UDF_I_LOCATION(inode), &bloc, sizeof(lb_addr)))
+	{
+		if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(inode->i_sb) >= 0x0201)
+		{
+			struct AllocExtDesc *aed = (struct AllocExtDesc *)(bh)->b_data;
+			udf_update_tag((bh)->b_data,
+				le32_to_cpu(aed->lengthAllocDescs) + sizeof(struct AllocExtDesc));
+		}
+		mark_buffer_dirty_inode(bh, inode);
+	}
+	else
+	{
+		mark_inode_dirty(inode);
+		mark_buffer_dirty(bh);
+	}
+
+	if (inc)
+		*extoffset += adsize;
+	udf_release_data(bh);
+	return (elen >> 30);
+}
+
+Sint8 udf_next_aext(struct inode *inode, lb_addr *bloc, int *extoffset,
+	lb_addr *eloc, Uint32 *elen, struct buffer_head **bh, int inc)
+{
+	Uint16 tagIdent;
+	int pos, alen;
+	Sint8 etype;
+
+	if (!(*bh))
+	{
+		if (!(*bh = udf_tread(inode->i_sb,
+			udf_get_lb_pblock(inode->i_sb, *bloc, 0),
+			inode->i_sb->s_blocksize)))
+		{
+			udf_debug("reading block %d failed!\n",
+				udf_get_lb_pblock(inode->i_sb, *bloc, 0));
+			return -1;
+		}
+	}
+
+	tagIdent = ((tag *)(*bh)->b_data)->tagIdent;
+
+	if (!memcmp(&UDF_I_LOCATION(inode), bloc, sizeof(lb_addr)))
+	{
+		if (tagIdent == TID_FILE_ENTRY || tagIdent == TID_EXTENDED_FILE_ENTRY ||
+			UDF_I_NEW_INODE(inode))
+		{
+			pos = udf_file_entry_alloc_offset(inode);
+			alen = UDF_I_LENALLOC(inode) + pos;
+		}
+		else if (tagIdent == TID_UNALLOCATED_SPACE_ENTRY)
+		{
+			pos = sizeof(struct UnallocatedSpaceEntry);
+			alen = UDF_I_LENALLOC(inode) + pos;
+		}
+		else
+			return -1;
+	}
+	else if (tagIdent == TID_ALLOC_EXTENT_DESC)
+	{
+		struct AllocExtDesc *aed = (struct AllocExtDesc *)(*bh)->b_data;
+
+		pos = sizeof(struct AllocExtDesc);
+		alen = le32_to_cpu(aed->lengthAllocDescs) + pos;
+	}
+	else
+		return -1;
+
+	if (!(*extoffset))
+		*extoffset = pos;
+
+	switch (UDF_I_ALLOCTYPE(inode))
+	{
+		case ICB_FLAG_AD_SHORT:
+		{
+			short_ad *sad;
+
+			if (!(sad = udf_get_fileshortad((*bh)->b_data, alen, extoffset, inc)))
+				return -1;
+
+			if ((etype = le32_to_cpu(sad->extLength) >> 30) == EXTENT_NEXT_EXTENT_ALLOCDECS)
+			{
+				bloc->logicalBlockNum = le32_to_cpu(sad->extPosition);
+				*extoffset = 0;
+				udf_release_data(*bh);
+				*bh = NULL;
+				return udf_next_aext(inode, bloc, extoffset, eloc, elen, bh, inc);
+			}
+			else
+			{
+				eloc->logicalBlockNum = le32_to_cpu(sad->extPosition);
+				eloc->partitionReferenceNum = UDF_I_LOCATION(inode).partitionReferenceNum;
+				*elen = le32_to_cpu(sad->extLength) & UDF_EXTENT_LENGTH_MASK;
+			}
+			break;
+		}
+		case ICB_FLAG_AD_LONG:
+		{
+			long_ad *lad;
+
+			if (!(lad = udf_get_filelongad((*bh)->b_data, alen, extoffset, inc)))
+				return -1;
+
+			if ((etype = le32_to_cpu(lad->extLength) >> 30) == EXTENT_NEXT_EXTENT_ALLOCDECS)
+			{
+				*bloc = lelb_to_cpu(lad->extLocation);
+				*extoffset = 0;
+				udf_release_data(*bh);
+				*bh = NULL;
+				return udf_next_aext(inode, bloc, extoffset, eloc, elen, bh, inc);
+			}
+			else
+			{
+				*eloc = lelb_to_cpu(lad->extLocation);
+				*elen = le32_to_cpu(lad->extLength) & UDF_EXTENT_LENGTH_MASK;
+			}
+			break;
+		}
+		case ICB_FLAG_AD_IN_ICB:
+		{
+			if (UDF_I_LENALLOC(inode) == 0)
+				return -1;
+			etype = EXTENT_RECORDED_ALLOCATED;
+			*eloc = UDF_I_LOCATION(inode);
+			*elen = UDF_I_LENALLOC(inode);
+			break;
+		}
+		default:
+		{
+			udf_debug("alloc_type = %d unsupported\n", UDF_I_ALLOCTYPE(inode));
+			return -1;
+		}
+	}
+	if (*elen)
+		return etype;
+
+	udf_debug("Empty Extent, inode=%ld, alloctype=%d, eloc=%d, elen=%d, etype=%d, extoffset=%d\n",
+		inode->i_ino, UDF_I_ALLOCTYPE(inode), eloc->logicalBlockNum, *elen, etype, *extoffset);
+	if (UDF_I_ALLOCTYPE(inode) == ICB_FLAG_AD_SHORT)
+		*extoffset -= sizeof(short_ad);
+	else if (UDF_I_ALLOCTYPE(inode) == ICB_FLAG_AD_LONG)
+		*extoffset -= sizeof(long_ad);
+	return -1;
+}
+
+Sint8 udf_current_aext(struct inode *inode, lb_addr *bloc, int *extoffset,
+	lb_addr *eloc, Uint32 *elen, struct buffer_head **bh, int inc)
+{
+	int pos, alen;
+	Sint8 etype;
+
+	if (!(*bh))
+	{
+		if (!(*bh = udf_tread(inode->i_sb,
+			udf_get_lb_pblock(inode->i_sb, *bloc, 0),
+			inode->i_sb->s_blocksize)))
+		{
+			udf_debug("reading block %d failed!\n",
+				udf_get_lb_pblock(inode->i_sb, *bloc, 0));
+			return -1;
+		}
+	}
+
+	if (!memcmp(&UDF_I_LOCATION(inode), bloc, sizeof(lb_addr)))
+	{
+		if (!(UDF_I_EXTENDED_FE(inode)))
+			pos = sizeof(struct FileEntry) + UDF_I_LENEATTR(inode);
+		else
+			pos = sizeof(struct ExtendedFileEntry) + UDF_I_LENEATTR(inode);
+		alen = UDF_I_LENALLOC(inode) + pos;
+	}
+	else
+	{
+		struct AllocExtDesc *aed = (struct AllocExtDesc *)(*bh)->b_data;
+
+		pos = sizeof(struct AllocExtDesc);
+		alen = le32_to_cpu(aed->lengthAllocDescs) + pos;
+	}
+
+	if (!(*extoffset))
+		*extoffset = pos;
+
+	switch (UDF_I_ALLOCTYPE(inode))
+	{
+		case ICB_FLAG_AD_SHORT:
+		{
+			short_ad *sad;
+
+			if (!(sad = udf_get_fileshortad((*bh)->b_data, alen, extoffset, inc)))
+				return -1;
+
+			etype = le32_to_cpu(sad->extLength) >> 30;
+			eloc->logicalBlockNum = le32_to_cpu(sad->extPosition);
+			eloc->partitionReferenceNum = UDF_I_LOCATION(inode).partitionReferenceNum;
+			*elen = le32_to_cpu(sad->extLength) & UDF_EXTENT_LENGTH_MASK;
+			break;
+		}
+		case ICB_FLAG_AD_LONG:
+		{
+			long_ad *lad;
+
+			if (!(lad = udf_get_filelongad((*bh)->b_data, alen, extoffset, inc)))
+				return -1;
+
+			etype = le32_to_cpu(lad->extLength) >> 30;
+			*eloc = lelb_to_cpu(lad->extLocation);
+			*elen = le32_to_cpu(lad->extLength) & UDF_EXTENT_LENGTH_MASK;
+			break;
+		}
+		default:
+		{
+			udf_debug("alloc_type = %d unsupported\n", UDF_I_ALLOCTYPE(inode));
+			return -1;
+		}
+	}
+	if (*elen)
+		return etype;
+
+	udf_debug("Empty Extent!\n");
+	if (UDF_I_ALLOCTYPE(inode) == ICB_FLAG_AD_SHORT)
+		*extoffset -= sizeof(short_ad);
+	else if (UDF_I_ALLOCTYPE(inode) == ICB_FLAG_AD_LONG)
+		*extoffset -= sizeof(long_ad);
+	return -1;
+}
+
+Sint8 udf_insert_aext(struct inode *inode, lb_addr bloc, int extoffset,
+	lb_addr neloc, Uint32 nelen, struct buffer_head *bh)
+{
+	lb_addr oeloc;
+	Uint32 oelen;
+	Sint8 etype;
+
+	if (!bh)
+	{
+		if (!(bh = udf_tread(inode->i_sb,
+			udf_get_lb_pblock(inode->i_sb, bloc, 0),
+			inode->i_sb->s_blocksize)))
+		{
+			udf_debug("reading block %d failed!\n",
+				udf_get_lb_pblock(inode->i_sb, bloc, 0));
+			return -1;
+		}
+	}
+	else
+		atomic_inc(&bh->b_count);
+
+	while ((etype = udf_next_aext(inode, &bloc, &extoffset, &oeloc, &oelen, &bh, 0)) != -1)
+	{
+		udf_write_aext(inode, bloc, &extoffset, neloc, nelen, bh, 1);
+
+		neloc = oeloc;
+		nelen = (etype << 30) | oelen;
+	}
+	udf_add_aext(inode, &bloc, &extoffset, neloc, nelen, &bh, 1);
+	udf_release_data(bh);
+	return (nelen >> 30);
+}
+
+Sint8 udf_delete_aext(struct inode *inode, lb_addr nbloc, int nextoffset,
+	lb_addr eloc, Uint32 elen, struct buffer_head *nbh)
+{
+	struct buffer_head *obh;
+	lb_addr obloc;
+	int oextoffset, adsize;
+	Sint8 etype;
+	struct AllocExtDesc *aed;
+
+	if (!(nbh))
+	{
+		if (!(nbh = udf_tread(inode->i_sb,
+			udf_get_lb_pblock(inode->i_sb, nbloc, 0),
+			inode->i_sb->s_blocksize)))
+		{
+			udf_debug("reading block %d failed!\n",
+				udf_get_lb_pblock(inode->i_sb, nbloc, 0));
+			return -1;
+		}
+	}
+	else
+		atomic_inc(&nbh->b_count);
+	atomic_inc(&nbh->b_count);
+
+	if (UDF_I_ALLOCTYPE(inode) == ICB_FLAG_AD_SHORT)
+		adsize = sizeof(short_ad);
+	else if (UDF_I_ALLOCTYPE(inode) == ICB_FLAG_AD_LONG)
+		adsize = sizeof(long_ad);
+	else
+		adsize = 0;
+
+	obh = nbh;
+	obloc = nbloc;
+	oextoffset = nextoffset;
+
+	if (udf_next_aext(inode, &nbloc, &nextoffset, &eloc, &elen, &nbh, 1) == -1)
+		return -1;
+
+	while ((etype = udf_next_aext(inode, &nbloc, &nextoffset, &eloc, &elen, &nbh, 1)) != -1)
+	{
+		udf_write_aext(inode, obloc, &oextoffset, eloc, (etype << 30) | elen, obh, 1);
+		if (memcmp(&nbloc, &obloc, sizeof(lb_addr)))
+		{
+			obloc = nbloc;
+			udf_release_data(obh);
+			atomic_inc(&nbh->b_count);
+			obh = nbh;
+			oextoffset = nextoffset - adsize;
+		}
+	}
+	memset(&eloc, 0x00, sizeof(lb_addr));
+	elen = 0;
+
+	if (memcmp(&nbloc, &obloc, sizeof(lb_addr)))
+	{
+		udf_free_blocks(inode->i_sb, inode, nbloc, 0, 1);
+		udf_write_aext(inode, obloc, &oextoffset, eloc, elen, obh, 1);
+		udf_write_aext(inode, obloc, &oextoffset, eloc, elen, obh, 1);
+		if (!memcmp(&UDF_I_LOCATION(inode), &obloc, sizeof(lb_addr)))
+		{
+			UDF_I_LENALLOC(inode) -= (adsize * 2);
+			mark_inode_dirty(inode);
+		}
+		else
+		{
+			aed = (struct AllocExtDesc *)(obh)->b_data;
+			aed->lengthAllocDescs =
+				cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) - (2*adsize));
+			if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(inode->i_sb) >= 0x0201)
+				udf_update_tag((obh)->b_data, oextoffset - (2*adsize));
+			else
+				udf_update_tag((obh)->b_data, sizeof(struct AllocExtDesc));
+			mark_buffer_dirty_inode(obh, inode);
+		}
+	}
+	else
+	{
+		udf_write_aext(inode, obloc, &oextoffset, eloc, elen, obh, 1);
+		if (!memcmp(&UDF_I_LOCATION(inode), &obloc, sizeof(lb_addr)))
+		{
+			UDF_I_LENALLOC(inode) -= adsize;
+			mark_inode_dirty(inode);
+		}
+		else
+		{
+			aed = (struct AllocExtDesc *)(obh)->b_data;
+			aed->lengthAllocDescs =
+				cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) - adsize);
+			if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || UDF_SB_UDFREV(inode->i_sb) >= 0x0201)
+				udf_update_tag((obh)->b_data, oextoffset - adsize);
+			else
+				udf_update_tag((obh)->b_data, sizeof(struct AllocExtDesc));
+			mark_buffer_dirty_inode(obh, inode);
+		}
+	}
+	
+	udf_release_data(nbh);
+	udf_release_data(obh);
+	return (elen >> 30);
+}
+
+Sint8 inode_bmap(struct inode *inode, int block, lb_addr *bloc, Uint32 *extoffset,
+	lb_addr *eloc, Uint32 *elen, Uint32 *offset, struct buffer_head **bh)
+{
+	Uint64 lbcount = 0, bcount = (Uint64)block << inode->i_sb->s_blocksize_bits;
+	Sint8 etype;
+
+	if (block < 0)
+	{
+		printk(KERN_ERR "udf: inode_bmap: block < 0\n");
+		return -1;
+	}
+	if (!inode)
+	{
+		printk(KERN_ERR "udf: inode_bmap: NULL inode\n");
+		return -1;
+	}
+
+	*extoffset = 0;
+	*elen = 0;
+	*bloc = UDF_I_LOCATION(inode);
+
+	do
+	{
+		if ((etype = udf_next_aext(inode, bloc, extoffset, eloc, elen, bh, 1)) == -1)
+		{
+			*offset = bcount - lbcount;
+			UDF_I_LENEXTENTS(inode) = lbcount;
+			return -1;
+		}
+		lbcount += *elen;
+	} while (lbcount <= bcount);
+
+	*offset = bcount + *elen - lbcount;
+
+	return etype;
+}
+
+long udf_block_map(struct inode *inode, long block)
+{
+	lb_addr eloc, bloc;
+	Uint32 offset, extoffset, elen;
+	struct buffer_head *bh = NULL;
+	int ret;
+
+	lock_kernel();
+
+	if (inode_bmap(inode, block, &bloc, &extoffset, &eloc, &elen, &offset, &bh) == EXTENT_RECORDED_ALLOCATED)
+		ret = udf_get_lb_pblock(inode->i_sb, eloc, offset >> inode->i_sb->s_blocksize_bits);
+	else
+		ret = 0;
+
+	unlock_kernel();
+
+	if (bh)
+		udf_release_data(bh);
+
+	if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_VARCONV))
+		return udf_fixed_to_variable(ret);
+	else
+		return ret;
+}
diff -urN linux-2.4.16-reiserfspatches-immutable/fs/ufs/truncate.c~ linux-2.4.16-reiserfspatches-immutable-ctx4/fs/ufs/truncate.c~
--- linux-2.4.16-reiserfspatches-immutable/fs/ufs/truncate.c~	Thu Jan  1 01:00:00 1970
+++ linux-2.4.16-reiserfspatches-immutable-ctx4/fs/ufs/truncate.c~	Mon Dec 10 14:28:03 2001
@@ -0,0 +1,469 @@
+/*
+ *  linux/fs/ufs/truncate.c
+ *
+ * Copyright (C) 1998
+ * Daniel Pirkl <daniel.pirkl@email.cz>
+ * Charles University, Faculty of Mathematics and Physics
+ *
+ *  from
+ *
+ *  linux/fs/ext2/truncate.c
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ *  from
+ *
+ *  linux/fs/minix/truncate.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  Big-endian to little-endian byte-swapping/bitmaps by
+ *        David S. Miller (davem@caip.rutgers.edu), 1995
+ */
+
+/*
+ * Real random numbers for secure rm added 94/02/18
+ * Idea from Pierre del Perugia <delperug@gla.ecoledoc.ibp.fr>
+ */
+
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/ufs_fs.h>
+#include <linux/fcntl.h>
+#include <linux/sched.h>
+#include <linux/stat.h>
+#include <linux/locks.h>
+#include <linux/string.h>
+
+#include "swab.h"
+#include "util.h"
+
+#undef UFS_TRUNCATE_DEBUG
+
+#ifdef UFS_TRUNCATE_DEBUG
+#define UFSD(x) printk("(%s, %d), %s: ", __FILE__, __LINE__, __FUNCTION__); printk x;
+#else
+#define UFSD(x)
+#endif
+ 
+/*
+ * Secure deletion currently doesn't work. It interacts very badly
+ * with buffers shared with memory mappings, and for that reason
+ * can't be done in the truncate() routines. It should instead be
+ * done separately in "release()" before calling the truncate routines
+ * that will release the actual file blocks.
+ *
+ *		Linus
+ */
+
+#define DIRECT_BLOCK ((inode->i_size + uspi->s_bsize - 1) >> uspi->s_bshift)
+#define DIRECT_FRAGMENT ((inode->i_size + uspi->s_fsize - 1) >> uspi->s_fshift)
+
+#define DATA_BUFFER_USED(bh) \
+	(atomic_read(&bh->b_count)>1 || buffer_locked(bh))
+
+static int ufs_trunc_direct (struct inode * inode)
+{
+	struct super_block * sb;
+	struct ufs_sb_private_info * uspi;
+	struct buffer_head * bh;
+	u32 * p;
+	unsigned frag1, frag2, frag3, frag4, block1, block2;
+	unsigned frag_to_free, free_count;
+	unsigned i, j, tmp;
+	int retry;
+	
+	UFSD(("ENTER\n"))
+
+	sb = inode->i_sb;
+	uspi = sb->u.ufs_sb.s_uspi;
+	
+	frag_to_free = 0;
+	free_count = 0;
+	retry = 0;
+	
+	frag1 = DIRECT_FRAGMENT;
+	frag4 = min_t(u32, UFS_NDIR_FRAGMENT, inode->u.ufs_i.i_lastfrag);
+	frag2 = ((frag1 & uspi->s_fpbmask) ? ((frag1 | uspi->s_fpbmask) + 1) : frag1);
+	frag3 = frag4 & ~uspi->s_fpbmask;
+	block1 = block2 = 0;
+	if (frag2 > frag3) {
+		frag2 = frag4;
+		frag3 = frag4 = 0;
+	}
+	else if (frag2 < frag3) {
+		block1 = ufs_fragstoblks (frag2);
+		block2 = ufs_fragstoblks (frag3);
+	}
+
+	UFSD(("frag1 %u, frag2 %u, block1 %u, block2 %u, frag3 %u, frag4 %u\n", frag1, frag2, block1, block2, frag3, frag4))
+
+	if (frag1 >= frag2)
+		goto next1;		
+
+	/*
+	 * Free first free fragments
+	 */
+	p = inode->u.ufs_i.i_u1.i_data + ufs_fragstoblks (frag1);
+	tmp = fs32_to_cpu(sb, *p);
+	if (!tmp )
+		ufs_panic (sb, "ufs_trunc_direct", "internal error");
+	frag1 = ufs_fragnum (frag1);
+	frag2 = ufs_fragnum (frag2);
+	for (j = frag1; j < frag2; j++) {
+		bh = get_hash_table (sb->s_dev, tmp + j, uspi->s_fsize);
+		if ((bh && DATA_BUFFER_USED(bh)) || tmp != fs32_to_cpu(sb, *p)) {
+			retry = 1;
+			brelse (bh);
+			goto next1;
+		}
+		bforget (bh);
+	}
+	inode->i_blocks -= (frag2-frag1) << uspi->s_nspfshift;
+	mark_inode_dirty(inode);
+	ufs_free_fragments (inode, tmp + frag1, frag2 - frag1);
+	frag_to_free = tmp + frag1;
+
+next1:
+	/*
+	 * Free whole blocks
+	 */
+	for (i = block1 ; i < block2; i++) {
+		p = inode->u.ufs_i.i_u1.i_data + i;
+		tmp = fs32_to_cpu(sb, *p);
+		if (!tmp)
+			continue;
+		for (j = 0; j < uspi->s_fpb; j++) {
+			bh = get_hash_table (sb->s_dev, tmp + j, uspi->s_fsize);
+			if ((bh && DATA_BUFFER_USED(bh)) || tmp != fs32_to_cpu(sb, *p)) {
+				retry = 1;
+				brelse (bh);
+				goto next2;
+			}
+			bforget (bh);
+		}
+		*p = 0;
+		inode->i_blocks -= uspi->s_nspb;
+		mark_inode_dirty(inode);
+		if (free_count == 0) {
+			frag_to_free = tmp;
+			free_count = uspi->s_fpb;
+		} else if (free_count > 0 && frag_to_free == tmp - free_count)
+			free_count += uspi->s_fpb;
+		else {
+			ufs_free_blocks (inode, frag_to_free, free_count);
+			frag_to_free = tmp;
+			free_count = uspi->s_fpb;
+		}
+next2:;
+	}
+	
+	if (free_count > 0)
+		ufs_free_blocks (inode, frag_to_free, free_count);
+
+	if (frag3 >= frag4)
+		goto next3;
+
+	/*
+	 * Free last free fragments
+	 */
+	p = inode->u.ufs_i.i_u1.i_data + ufs_fragstoblks (frag3);
+	tmp = fs32_to_cpu(sb, *p);
+	if (!tmp )
+		ufs_panic(sb, "ufs_truncate_direct", "internal error");
+	frag4 = ufs_fragnum (frag4);
+	for (j = 0; j < frag4; j++) {
+		bh = get_hash_table (sb->s_dev, tmp + j, uspi->s_fsize);
+		if ((bh && DATA_BUFFER_USED(bh)) || tmp != fs32_to_cpu(sb, *p)) {
+			retry = 1;
+			brelse (bh);
+			goto next1;
+		}
+		bforget (bh);
+	}
+	*p = 0;
+	inode->i_blocks -= frag4 << uspi->s_nspfshift;
+	mark_inode_dirty(inode);
+	ufs_free_fragments (inode, tmp, frag4);
+ next3:
+
+	UFSD(("EXIT\n"))
+	return retry;
+}
+
+
+static int ufs_trunc_indirect (struct inode * inode, unsigned offset, u32 * p)
+{
+	struct super_block * sb;
+	struct ufs_sb_private_info * uspi;
+	struct ufs_buffer_head * ind_ubh;
+	struct buffer_head * bh;
+	u32 * ind;
+	unsigned indirect_block, i, j, tmp;
+	unsigned frag_to_free, free_count;
+	int retry;
+
+	UFSD(("ENTER\n"))
+		
+	sb = inode->i_sb;
+	uspi = sb->u.ufs_sb.s_uspi;
+
+	frag_to_free = 0;
+	free_count = 0;
+	retry = 0;
+	
+	tmp = fs32_to_cpu(sb, *p);
+	if (!tmp)
+		return 0;
+	ind_ubh = ubh_bread (sb->s_dev, tmp, uspi->s_bsize);
+	if (tmp != fs32_to_cpu(sb, *p)) {
+		ubh_brelse (ind_ubh);
+		return 1;
+	}
+	if (!ind_ubh) {
+		*p = 0;
+		return 0;
+	}
+
+	indirect_block = (DIRECT_BLOCK > offset) ? (DIRECT_BLOCK - offset) : 0;
+	for (i = indirect_block; i < uspi->s_apb; i++) {
+		ind = ubh_get_addr32 (ind_ubh, i);
+		tmp = fs32_to_cpu(sb, *ind);
+		if (!tmp)
+			continue;
+		for (j = 0; j < uspi->s_fpb; j++) {
+			bh = get_hash_table (sb->s_dev, tmp + j, uspi->s_fsize);
+			if ((bh && DATA_BUFFER_USED(bh)) || tmp != fs32_to_cpu(sb, *ind)) {
+				retry = 1;
+				brelse (bh);
+				goto next;
+			}
+			bforget (bh);
+		}	
+		*ind = 0;
+		ubh_mark_buffer_dirty(ind_ubh);
+		if (free_count == 0) {
+			frag_to_free = tmp;
+			free_count = uspi->s_fpb;
+		} else if (free_count > 0 && frag_to_free == tmp - free_count)
+			free_count += uspi->s_fpb;
+		else {
+			ufs_free_blocks (inode, frag_to_free, free_count);
+			frag_to_free = tmp;
+			free_count = uspi->s_fpb;
+		}
+		inode->i_blocks -= uspi->s_nspb;
+		mark_inode_dirty(inode);
+next:;
+	}
+
+	if (free_count > 0) {
+		ufs_free_blocks (inode, frag_to_free, free_count);
+	}
+	for (i = 0; i < uspi->s_apb; i++)
+		if (*ubh_get_addr32(ind_ubh,i))
+			break;
+	if (i >= uspi->s_apb) {
+		if (ubh_max_bcount(ind_ubh) != 1) {
+			retry = 1;
+		}
+		else {
+			tmp = fs32_to_cpu(sb, *p);
+			*p = 0;
+			inode->i_blocks -= uspi->s_nspb;
+			mark_inode_dirty(inode);
+			ufs_free_blocks (inode, tmp, uspi->s_fpb);
+			ubh_bforget(ind_ubh);
+			ind_ubh = NULL;
+		}
+	}
+	if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh)) {
+		ubh_ll_rw_block (WRITE, 1, &ind_ubh);
+		ubh_wait_on_buffer (ind_ubh);
+	}
+	ubh_brelse (ind_ubh);
+	
+	UFSD(("EXIT\n"))
+	
+	return retry;
+}
+
+static int ufs_trunc_dindirect (struct inode * inode, unsigned offset, u32 * p)
+{
+	struct super_block * sb;
+	struct ufs_sb_private_info * uspi;
+	struct ufs_buffer_head * dind_bh;
+	unsigned i, tmp, dindirect_block;
+	u32 * dind;
+	int retry = 0;
+	
+	UFSD(("ENTER\n"))
+	
+	sb = inode->i_sb;
+	uspi = sb->u.ufs_sb.s_uspi;
+
+	dindirect_block = (DIRECT_BLOCK > offset) 
+		? ((DIRECT_BLOCK - offset) >> uspi->s_apbshift) : 0;
+	retry = 0;
+	
+	tmp = fs32_to_cpu(sb, *p);
+	if (!tmp)
+		return 0;
+	dind_bh = ubh_bread (inode->i_dev, tmp, uspi->s_bsize);
+	if (tmp != fs32_to_cpu(sb, *p)) {
+		ubh_brelse (dind_bh);
+		return 1;
+	}
+	if (!dind_bh) {
+		*p = 0;
+		return 0;
+	}
+
+	for (i = dindirect_block ; i < uspi->s_apb ; i++) {
+		dind = ubh_get_addr32 (dind_bh, i);
+		tmp = fs32_to_cpu(sb, *dind);
+		if (!tmp)
+			continue;
+		retry |= ufs_trunc_indirect (inode, offset + (i << uspi->s_apbshift), dind);
+		ubh_mark_buffer_dirty(dind_bh);
+	}
+
+	for (i = 0; i < uspi->s_apb; i++)
+		if (*ubh_get_addr32 (dind_bh, i))
+			break;
+	if (i >= uspi->s_apb) {
+		if (ubh_max_bcount(dind_bh) != 1)
+			retry = 1;
+		else {
+			tmp = fs32_to_cpu(sb, *p);
+			*p = 0;
+			inode->i_blocks -= uspi->s_nspb;
+			mark_inode_dirty(inode);
+			ufs_free_blocks (inode, tmp, uspi->s_fpb);
+			ubh_bforget(dind_bh);
+			dind_bh = NULL;
+		}
+	}
+	if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh)) {
+		ubh_ll_rw_block (WRITE, 1, &dind_bh);
+		ubh_wait_on_buffer (dind_bh);
+	}
+	ubh_brelse (dind_bh);
+	
+	UFSD(("EXIT\n"))
+	
+	return retry;
+}
+
+static int ufs_trunc_tindirect (struct inode * inode)
+{
+	struct super_block * sb;
+	struct ufs_sb_private_info * uspi;
+	struct ufs_buffer_head * tind_bh;
+	unsigned tindirect_block, tmp, i;
+	u32 * tind, * p;
+	int retry;
+	
+	UFSD(("ENTER\n"))
+
+	sb = inode->i_sb;
+	uspi = sb->u.ufs_sb.s_uspi;
+	retry = 0;
+	
+	tindirect_block = (DIRECT_BLOCK > (UFS_NDADDR + uspi->s_apb + uspi->s_2apb))
+		? ((DIRECT_BLOCK - UFS_NDADDR - uspi->s_apb - uspi->s_2apb) >> uspi->s_2apbshift) : 0;
+	p = inode->u.ufs_i.i_u1.i_data + UFS_TIND_BLOCK;
+	if (!(tmp = fs32_to_cpu(sb, *p)))
+		return 0;
+	tind_bh = ubh_bread (sb->s_dev, tmp, uspi->s_bsize);
+	if (tmp != fs32_to_cpu(sb, *p)) {
+		ubh_brelse (tind_bh);
+		return 1;
+	}
+	if (!tind_bh) {
+		*p = 0;
+		return 0;
+	}
+
+	for (i = tindirect_block ; i < uspi->s_apb ; i++) {
+		tind = ubh_get_addr32 (tind_bh, i);
+		retry |= ufs_trunc_dindirect(inode, UFS_NDADDR + 
+			uspi->s_apb + ((i + 1) << uspi->s_2apbshift), tind);
+		ubh_mark_buffer_dirty(tind_bh);
+	}
+	for (i = 0; i < uspi->s_apb; i++)
+		if (*ubh_get_addr32 (tind_bh, i))
+			break;
+	if (i >= uspi->s_apb) {
+		if (ubh_max_bcount(tind_bh) != 1)
+			retry = 1;
+		else {
+			tmp = fs32_to_cpu(sb, *p);
+			*p = 0;
+			inode->i_blocks -= uspi->s_nspb;
+			mark_inode_dirty(inode);
+			ufs_free_blocks (inode, tmp, uspi->s_fpb);
+			ubh_bforget(tind_bh);
+			tind_bh = NULL;
+		}
+	}
+	if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh)) {
+		ubh_ll_rw_block (WRITE, 1, &tind_bh);
+		ubh_wait_on_buffer (tind_bh);
+	}
+	ubh_brelse (tind_bh);
+	
+	UFSD(("EXIT\n"))
+	return retry;
+}
+		
+void ufs_truncate (struct inode * inode)
+{
+	struct super_block * sb;
+	struct ufs_sb_private_info * uspi;
+	struct buffer_head * bh;
+	unsigned offset;
+	int err, retry;
+	
+	UFSD(("ENTER\n"))
+	sb = inode->i_sb;
+	uspi = sb->u.ufs_sb.s_uspi;
+
+	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)))
+		return;
+	if (IS_APPEND(inode) || IS_IMMUTABLE_FILE(inode))
+		return;
+	while (1) {
+		retry = ufs_trunc_direct(inode);
+		retry |= ufs_trunc_indirect (inode, UFS_IND_BLOCK,
+			(u32 *) &inode->u.ufs_i.i_u1.i_data[UFS_IND_BLOCK]);
+		retry |= ufs_trunc_dindirect (inode, UFS_IND_BLOCK + uspi->s_apb,
+			(u32 *) &inode->u.ufs_i.i_u1.i_data[UFS_DIND_BLOCK]);
+		retry |= ufs_trunc_tindirect (inode);
+		if (!retry)
+			break;
+		if (IS_SYNC(inode) && (inode->i_state & I_DIRTY))
+			ufs_sync_inode (inode);
+		run_task_queue(&tq_disk);
+		current->policy |= SCHED_YIELD;
+		schedule ();
+
+
+	}
+	offset = inode->i_size & uspi->s_fshift;
+	if (offset) {
+		bh = ufs_bread (inode, inode->i_size >> uspi->s_fshift, 0, &err);
+		if (bh) {
+			memset (bh->b_data + offset, 0, uspi->s_fsize - offset);
+			mark_buffer_dirty (bh);
+			brelse (bh);
+		}
+	}
+	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+	inode->u.ufs_i.i_lastfrag = DIRECT_FRAGMENT;
+	mark_inode_dirty(inode);
+	UFSD(("EXIT\n"))
+}
diff -urN linux-2.4.16-reiserfspatches-immutable/include/asm-i386/unistd.h linux-2.4.16-reiserfspatches-immutable-ctx4/include/asm-i386/unistd.h
--- linux-2.4.16-reiserfspatches-immutable/include/asm-i386/unistd.h	Mon Dec 10 13:12:23 2001
+++ linux-2.4.16-reiserfspatches-immutable-ctx4/include/asm-i386/unistd.h	Mon Dec 10 15:01:49 2001
@@ -230,6 +230,8 @@
 #define __NR_security		223	/* syscall for security modules */
 #define __NR_gettid		224
 #define __NR_readahead		225
+#define __NR_new_s_context      226
+#define __NR_set_ipv4root       227
 
 /* user-visible error numbers are in the range -1 - -124: see <asm-i386/errno.h> */
 
diff -urN linux-2.4.16-reiserfspatches-immutable/include/linux/capability.h linux-2.4.16-reiserfspatches-immutable-ctx4/include/linux/capability.h
--- linux-2.4.16-reiserfspatches-immutable/include/linux/capability.h	Mon Dec 10 13:12:49 2001
+++ linux-2.4.16-reiserfspatches-immutable-ctx4/include/linux/capability.h	Mon Dec 10 15:01:49 2001
@@ -231,6 +231,7 @@
 /* Allow enabling/disabling tagged queuing on SCSI controllers and sending
    arbitrary SCSI commands */
 /* Allow setting encryption key on loopback filesystem */
+/* Allow the selection of a security context */
 
 #define CAP_SYS_ADMIN        21
 
@@ -278,6 +279,10 @@
 /* Allow taking of leases on files */
 
 #define CAP_LEASE            28
+
+/* Allow opening special device file */
+
+#define CAP_OPENDEV          29
 
 #ifdef __KERNEL__
 /* 
diff -urN linux-2.4.16-reiserfspatches-immutable/include/linux/ext2_fs.h~ linux-2.4.16-reiserfspatches-immutable-ctx4/include/linux/ext2_fs.h~
--- linux-2.4.16-reiserfspatches-immutable/include/linux/ext2_fs.h~	Thu Jan  1 01:00:00 1970
+++ linux-2.4.16-reiserfspatches-immutable-ctx4/include/linux/ext2_fs.h~	Mon Dec 10 14:28:03 2001
@@ -0,0 +1,633 @@
+/*
+ *  linux/include/linux/ext2_fs.h
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ *  from
+ *
+ *  linux/include/linux/minix_fs.h
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+#ifndef _LINUX_EXT2_FS_H
+#define _LINUX_EXT2_FS_H
+
+#include <linux/types.h>
+
+/*
+ * The second extended filesystem constants/structures
+ */
+
+/*
+ * Define EXT2FS_DEBUG to produce debug messages
+ */
+#undef EXT2FS_DEBUG
+
+/*
+ * Define EXT2_PREALLOCATE to preallocate data blocks for expanding files
+ */
+#define EXT2_PREALLOCATE
+#define EXT2_DEFAULT_PREALLOC_BLOCKS	8
+
+/*
+ * The second extended file system version
+ */
+#define EXT2FS_DATE		"95/08/09"
+#define EXT2FS_VERSION		"0.5b"
+
+/*
+ * Debug code
+ */
+#ifdef EXT2FS_DEBUG
+#	define ext2_debug(f, a...)	{ \
+					printk ("EXT2-fs DEBUG (%s, %d): %s:", \
+						__FILE__, __LINE__, __FUNCTION__); \
+				  	printk (f, ## a); \
+					}
+#else
+#	define ext2_debug(f, a...)	/**/
+#endif
+
+/*
+ * Special inode numbers
+ */
+#define	EXT2_BAD_INO		 1	/* Bad blocks inode */
+#define EXT2_ROOT_INO		 2	/* Root inode */
+#define EXT2_ACL_IDX_INO	 3	/* ACL inode */
+#define EXT2_ACL_DATA_INO	 4	/* ACL inode */
+#define EXT2_BOOT_LOADER_INO	 5	/* Boot loader inode */
+#define EXT2_UNDEL_DIR_INO	 6	/* Undelete directory inode */
+
+/* First non-reserved inode for old ext2 filesystems */
+#define EXT2_GOOD_OLD_FIRST_INO	11
+
+/*
+ * The second extended file system magic number
+ */
+#define EXT2_SUPER_MAGIC	0xEF53
+
+/*
+ * Maximal count of links to a file
+ */
+#define EXT2_LINK_MAX		32000
+
+/*
+ * Macro-instructions used to manage several block sizes
+ */
+#define EXT2_MIN_BLOCK_SIZE		1024
+#define	EXT2_MAX_BLOCK_SIZE		4096
+#define EXT2_MIN_BLOCK_LOG_SIZE		  10
+#ifdef __KERNEL__
+# define EXT2_BLOCK_SIZE(s)		((s)->s_blocksize)
+#else
+# define EXT2_BLOCK_SIZE(s)		(EXT2_MIN_BLOCK_SIZE << (s)->s_log_block_size)
+#endif
+#define EXT2_ACLE_PER_BLOCK(s)		(EXT2_BLOCK_SIZE(s) / sizeof (struct ext2_acl_entry))
+#define	EXT2_ADDR_PER_BLOCK(s)		(EXT2_BLOCK_SIZE(s) / sizeof (__u32))
+#ifdef __KERNEL__
+# define EXT2_BLOCK_SIZE_BITS(s)	((s)->s_blocksize_bits)
+#else
+# define EXT2_BLOCK_SIZE_BITS(s)	((s)->s_log_block_size + 10)
+#endif
+#ifdef __KERNEL__
+#define	EXT2_ADDR_PER_BLOCK_BITS(s)	((s)->u.ext2_sb.s_addr_per_block_bits)
+#define EXT2_INODE_SIZE(s)		((s)->u.ext2_sb.s_inode_size)
+#define EXT2_FIRST_INO(s)		((s)->u.ext2_sb.s_first_ino)
+#else
+#define EXT2_INODE_SIZE(s)	(((s)->s_rev_level == EXT2_GOOD_OLD_REV) ? \
+				 EXT2_GOOD_OLD_INODE_SIZE : \
+				 (s)->s_inode_size)
+#define EXT2_FIRST_INO(s)	(((s)->s_rev_level == EXT2_GOOD_OLD_REV) ? \
+				 EXT2_GOOD_OLD_FIRST_INO : \
+				 (s)->s_first_ino)
+#endif
+
+/*
+ * Macro-instructions used to manage fragments
+ */
+#define EXT2_MIN_FRAG_SIZE		1024
+#define	EXT2_MAX_FRAG_SIZE		4096
+#define EXT2_MIN_FRAG_LOG_SIZE		  10
+#ifdef __KERNEL__
+# define EXT2_FRAG_SIZE(s)		((s)->u.ext2_sb.s_frag_size)
+# define EXT2_FRAGS_PER_BLOCK(s)	((s)->u.ext2_sb.s_frags_per_block)
+#else
+# define EXT2_FRAG_SIZE(s)		(EXT2_MIN_FRAG_SIZE << (s)->s_log_frag_size)
+# define EXT2_FRAGS_PER_BLOCK(s)	(EXT2_BLOCK_SIZE(s) / EXT2_FRAG_SIZE(s))
+#endif
+
+/*
+ * ACL structures
+ */
+struct ext2_acl_header	/* Header of Access Control Lists */
+{
+	__u32	aclh_size;
+	__u32	aclh_file_count;
+	__u32	aclh_acle_count;
+	__u32	aclh_first_acle;
+};
+
+struct ext2_acl_entry	/* Access Control List Entry */
+{
+	__u32	acle_size;
+	__u16	acle_perms;	/* Access permissions */
+	__u16	acle_type;	/* Type of entry */
+	__u16	acle_tag;	/* User or group identity */
+	__u16	acle_pad1;
+	__u32	acle_next;	/* Pointer on next entry for the */
+					/* same inode or on next free entry */
+};
+
+/*
+ * Structure of a blocks group descriptor
+ */
+struct ext2_group_desc
+{
+	__u32	bg_block_bitmap;		/* Blocks bitmap block */
+	__u32	bg_inode_bitmap;		/* Inodes bitmap block */
+	__u32	bg_inode_table;		/* Inodes table block */
+	__u16	bg_free_blocks_count;	/* Free blocks count */
+	__u16	bg_free_inodes_count;	/* Free inodes count */
+	__u16	bg_used_dirs_count;	/* Directories count */
+	__u16	bg_pad;
+	__u32	bg_reserved[3];
+};
+
+/*
+ * Macro-instructions used to manage group descriptors
+ */
+#ifdef __KERNEL__
+# define EXT2_BLOCKS_PER_GROUP(s)	((s)->u.ext2_sb.s_blocks_per_group)
+# define EXT2_DESC_PER_BLOCK(s)		((s)->u.ext2_sb.s_desc_per_block)
+# define EXT2_INODES_PER_GROUP(s)	((s)->u.ext2_sb.s_inodes_per_group)
+# define EXT2_DESC_PER_BLOCK_BITS(s)	((s)->u.ext2_sb.s_desc_per_block_bits)
+#else
+# define EXT2_BLOCKS_PER_GROUP(s)	((s)->s_blocks_per_group)
+# define EXT2_DESC_PER_BLOCK(s)		(EXT2_BLOCK_SIZE(s) / sizeof (struct ext2_group_desc))
+# define EXT2_INODES_PER_GROUP(s)	((s)->s_inodes_per_group)
+#endif
+
+/*
+ * Constants relative to the data blocks
+ */
+#define	EXT2_NDIR_BLOCKS		12
+#define	EXT2_IND_BLOCK			EXT2_NDIR_BLOCKS
+#define	EXT2_DIND_BLOCK			(EXT2_IND_BLOCK + 1)
+#define	EXT2_TIND_BLOCK			(EXT2_DIND_BLOCK + 1)
+#define	EXT2_N_BLOCKS			(EXT2_TIND_BLOCK + 1)
+
+/*
+ * Inode flags
+ */
+#define	EXT2_SECRM_FL			0x00000001 /* Secure deletion */
+#define	EXT2_UNRM_FL			0x00000002 /* Undelete */
+#define	EXT2_COMPR_FL			0x00000004 /* Compress file */
+#define EXT2_SYNC_FL			0x00000008 /* Synchronous updates */
+#define EXT2_IMMUTABLE_FILE_FL		0x00000010 /* Immutable file */
+#define EXT2_APPEND_FL			0x00000020 /* writes to file may only append */
+#define EXT2_NODUMP_FL			0x00000040 /* do not dump file */
+#define EXT2_NOATIME_FL			0x00000080 /* do not update atime */
+/* Reserved for compression usage... */
+#define EXT2_DIRTY_FL			0x00000100
+#define EXT2_COMPRBLK_FL		0x00000200 /* One or more compressed clusters */
+#define EXT2_NOCOMP_FL			0x00000400 /* Don't compress */
+#define EXT2_ECOMPR_FL			0x00000800 /* Compression error */
+/* End compression flags --- maybe not all used */	
+#define EXT2_BTREE_FL			0x00001000 /* btree format dir */
+#define EXT2_IMMUTABLE_LINK_FL          0x00008000 /* Immutable link */
+#define EXT2_RESERVED_FL		0x80000000 /* reserved for ext2 lib */
+
+#define EXT2_FL_USER_VISIBLE		0x00009FFF /* User visible flags */
+#define EXT2_FL_USER_MODIFIABLE		0x000080FF /* User modifiable flags */
+
+/*
+ * ioctl commands
+ */
+#define	EXT2_IOC_GETFLAGS		_IOR('f', 1, long)
+#define	EXT2_IOC_SETFLAGS		_IOW('f', 2, long)
+#define	EXT2_IOC_GETVERSION		_IOR('v', 1, long)
+#define	EXT2_IOC_SETVERSION		_IOW('v', 2, long)
+
+/*
+ * Structure of an inode on the disk
+ */
+struct ext2_inode {
+	__u16	i_mode;		/* File mode */
+	__u16	i_uid;		/* Low 16 bits of Owner Uid */
+	__u32	i_size;		/* Size in bytes */
+	__u32	i_atime;	/* Access time */
+	__u32	i_ctime;	/* Creation time */
+	__u32	i_mtime;	/* Modification time */
+	__u32	i_dtime;	/* Deletion Time */
+	__u16	i_gid;		/* Low 16 bits of Group Id */
+	__u16	i_links_count;	/* Links count */
+	__u32	i_blocks;	/* Blocks count */
+	__u32	i_flags;	/* File flags */
+	union {
+		struct {
+			__u32  l_i_reserved1;
+		} linux1;
+		struct {
+			__u32  h_i_translator;
+		} hurd1;
+		struct {
+			__u32  m_i_reserved1;
+		} masix1;
+	} osd1;				/* OS dependent 1 */
+	__u32	i_block[EXT2_N_BLOCKS];/* Pointers to blocks */
+	__u32	i_generation;	/* File version (for NFS) */
+	__u32	i_file_acl;	/* File ACL */
+	__u32	i_dir_acl;	/* Directory ACL */
+	__u32	i_faddr;	/* Fragment address */
+	union {
+		struct {
+			__u8	l_i_frag;	/* Fragment number */
+			__u8	l_i_fsize;	/* Fragment size */
+			__u16	i_pad1;
+			__u16	l_i_uid_high;	/* these 2 fields    */
+			__u16	l_i_gid_high;	/* were reserved2[0] */
+			__u32	l_i_reserved2;
+		} linux2;
+		struct {
+			__u8	h_i_frag;	/* Fragment number */
+			__u8	h_i_fsize;	/* Fragment size */
+			__u16	h_i_mode_high;
+			__u16	h_i_uid_high;
+			__u16	h_i_gid_high;
+			__u32	h_i_author;
+		} hurd2;
+		struct {
+			__u8	m_i_frag;	/* Fragment number */
+			__u8	m_i_fsize;	/* Fragment size */
+			__u16	m_pad1;
+			__u32	m_i_reserved2[2];
+		} masix2;
+	} osd2;				/* OS dependent 2 */
+};
+
+#define i_size_high	i_dir_acl
+
+#if defined(__KERNEL__) || defined(__linux__)
+#define i_reserved1	osd1.linux1.l_i_reserved1
+#define i_frag		osd2.linux2.l_i_frag
+#define i_fsize		osd2.linux2.l_i_fsize
+#define i_uid_low	i_uid
+#define i_gid_low	i_gid
+#define i_uid_high	osd2.linux2.l_i_uid_high
+#define i_gid_high	osd2.linux2.l_i_gid_high
+#define i_reserved2	osd2.linux2.l_i_reserved2
+#endif
+
+#ifdef	__hurd__
+#define i_translator	osd1.hurd1.h_i_translator
+#define i_frag		osd2.hurd2.h_i_frag;
+#define i_fsize		osd2.hurd2.h_i_fsize;
+#define i_uid_high	osd2.hurd2.h_i_uid_high
+#define i_gid_high	osd2.hurd2.h_i_gid_high
+#define i_author	osd2.hurd2.h_i_author
+#endif
+
+#ifdef	__masix__
+#define i_reserved1	osd1.masix1.m_i_reserved1
+#define i_frag		osd2.masix2.m_i_frag
+#define i_fsize		osd2.masix2.m_i_fsize
+#define i_reserved2	osd2.masix2.m_i_reserved2
+#endif
+
+/*
+ * File system states
+ */
+#define	EXT2_VALID_FS			0x0001	/* Unmounted cleanly */
+#define	EXT2_ERROR_FS			0x0002	/* Errors detected */
+
+/*
+ * Mount flags
+ */
+#define EXT2_MOUNT_CHECK		0x0001	/* Do mount-time checks */
+#define EXT2_MOUNT_GRPID		0x0004	/* Create files with directory's group */
+#define EXT2_MOUNT_DEBUG		0x0008	/* Some debugging messages */
+#define EXT2_MOUNT_ERRORS_CONT		0x0010	/* Continue on errors */
+#define EXT2_MOUNT_ERRORS_RO		0x0020	/* Remount fs ro on errors */
+#define EXT2_MOUNT_ERRORS_PANIC		0x0040	/* Panic on errors */
+#define EXT2_MOUNT_MINIX_DF		0x0080	/* Mimics the Minix statfs */
+#define EXT2_MOUNT_NO_UID32		0x0200  /* Disable 32-bit UIDs */
+
+#define clear_opt(o, opt)		o &= ~EXT2_MOUNT_##opt
+#define set_opt(o, opt)			o |= EXT2_MOUNT_##opt
+#define test_opt(sb, opt)		((sb)->u.ext2_sb.s_mount_opt & \
+					 EXT2_MOUNT_##opt)
+/*
+ * Maximal mount counts between two filesystem checks
+ */
+#define EXT2_DFL_MAX_MNT_COUNT		20	/* Allow 20 mounts */
+#define EXT2_DFL_CHECKINTERVAL		0	/* Don't use interval check */
+
+/*
+ * Behaviour when detecting errors
+ */
+#define EXT2_ERRORS_CONTINUE		1	/* Continue execution */
+#define EXT2_ERRORS_RO			2	/* Remount fs read-only */
+#define EXT2_ERRORS_PANIC		3	/* Panic */
+#define EXT2_ERRORS_DEFAULT		EXT2_ERRORS_CONTINUE
+
+/*
+ * Structure of the super block
+ */
+struct ext2_super_block {
+	__u32	s_inodes_count;		/* Inodes count */
+	__u32	s_blocks_count;		/* Blocks count */
+	__u32	s_r_blocks_count;	/* Reserved blocks count */
+	__u32	s_free_blocks_count;	/* Free blocks count */
+	__u32	s_free_inodes_count;	/* Free inodes count */
+	__u32	s_first_data_block;	/* First Data Block */
+	__u32	s_log_block_size;	/* Block size */
+	__s32	s_log_frag_size;	/* Fragment size */
+	__u32	s_blocks_per_group;	/* # Blocks per group */
+	__u32	s_frags_per_group;	/* # Fragments per group */
+	__u32	s_inodes_per_group;	/* # Inodes per group */
+	__u32	s_mtime;		/* Mount time */
+	__u32	s_wtime;		/* Write time */
+	__u16	s_mnt_count;		/* Mount count */
+	__s16	s_max_mnt_count;	/* Maximal mount count */
+	__u16	s_magic;		/* Magic signature */
+	__u16	s_state;		/* File system state */
+	__u16	s_errors;		/* Behaviour when detecting errors */
+	__u16	s_minor_rev_level; 	/* minor revision level */
+	__u32	s_lastcheck;		/* time of last check */
+	__u32	s_checkinterval;	/* max. time between checks */
+	__u32	s_creator_os;		/* OS */
+	__u32	s_rev_level;		/* Revision level */
+	__u16	s_def_resuid;		/* Default uid for reserved blocks */
+	__u16	s_def_resgid;		/* Default gid for reserved blocks */
+	/*
+	 * These fields are for EXT2_DYNAMIC_REV superblocks only.
+	 *
+	 * Note: the difference between the compatible feature set and
+	 * the incompatible feature set is that if there is a bit set
+	 * in the incompatible feature set that the kernel doesn't
+	 * know about, it should refuse to mount the filesystem.
+	 * 
+	 * e2fsck's requirements are more strict; if it doesn't know
+	 * about a feature in either the compatible or incompatible
+	 * feature set, it must abort and not try to meddle with
+	 * things it doesn't understand...
+	 */
+	__u32	s_first_ino; 		/* First non-reserved inode */
+	__u16   s_inode_size; 		/* size of inode structure */
+	__u16	s_block_group_nr; 	/* block group # of this superblock */
+	__u32	s_feature_compat; 	/* compatible feature set */
+	__u32	s_feature_incompat; 	/* incompatible feature set */
+	__u32	s_feature_ro_compat; 	/* readonly-compatible feature set */
+	__u8	s_uuid[16];		/* 128-bit uuid for volume */
+	char	s_volume_name[16]; 	/* volume name */
+	char	s_last_mounted[64]; 	/* directory where last mounted */
+	__u32	s_algorithm_usage_bitmap; /* For compression */
+	/*
+	 * Performance hints.  Directory preallocation should only
+	 * happen if the EXT2_COMPAT_PREALLOC flag is on.
+	 */
+	__u8	s_prealloc_blocks;	/* Nr of blocks to try to preallocate*/
+	__u8	s_prealloc_dir_blocks;	/* Nr to preallocate for dirs */
+	__u16	s_padding1;
+	__u32	s_reserved[204];	/* Padding to the end of the block */
+};
+
+#ifdef __KERNEL__
+#define EXT2_SB(sb)	(&((sb)->u.ext2_sb))
+#else
+/* Assume that user mode programs are passing in an ext2fs superblock, not
+ * a kernel struct super_block.  This will allow us to call the feature-test
+ * macros from user land. */
+#define EXT2_SB(sb)	(sb)
+#endif
+
+/*
+ * Codes for operating systems
+ */
+#define EXT2_OS_LINUX		0
+#define EXT2_OS_HURD		1
+#define EXT2_OS_MASIX		2
+#define EXT2_OS_FREEBSD		3
+#define EXT2_OS_LITES		4
+
+/*
+ * Revision levels
+ */
+#define EXT2_GOOD_OLD_REV	0	/* The good old (original) format */
+#define EXT2_DYNAMIC_REV	1 	/* V2 format w/ dynamic inode sizes */
+
+#define EXT2_CURRENT_REV	EXT2_GOOD_OLD_REV
+#define EXT2_MAX_SUPP_REV	EXT2_DYNAMIC_REV
+
+#define EXT2_GOOD_OLD_INODE_SIZE 128
+
+/*
+ * Feature set definitions
+ */
+
+#define EXT2_HAS_COMPAT_FEATURE(sb,mask)			\
+	( EXT2_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask) )
+#define EXT2_HAS_RO_COMPAT_FEATURE(sb,mask)			\
+	( EXT2_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask) )
+#define EXT2_HAS_INCOMPAT_FEATURE(sb,mask)			\
+	( EXT2_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask) )
+#define EXT2_SET_COMPAT_FEATURE(sb,mask)			\
+	EXT2_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask)
+#define EXT2_SET_RO_COMPAT_FEATURE(sb,mask)			\
+	EXT2_SB(sb)->s_es->s_feature_ro_compat |= cpu_to_le32(mask)
+#define EXT2_SET_INCOMPAT_FEATURE(sb,mask)			\
+	EXT2_SB(sb)->s_es->s_feature_incompat |= cpu_to_le32(mask)
+#define EXT2_CLEAR_COMPAT_FEATURE(sb,mask)			\
+	EXT2_SB(sb)->s_es->s_feature_compat &= ~cpu_to_le32(mask)
+#define EXT2_CLEAR_RO_COMPAT_FEATURE(sb,mask)			\
+	EXT2_SB(sb)->s_es->s_feature_ro_compat &= ~cpu_to_le32(mask)
+#define EXT2_CLEAR_INCOMPAT_FEATURE(sb,mask)			\
+	EXT2_SB(sb)->s_es->s_feature_incompat &= ~cpu_to_le32(mask)
+
+#define EXT2_FEATURE_COMPAT_DIR_PREALLOC	0x0001
+#define EXT2_FEATURE_COMPAT_IMAGIC_INODES	0x0002
+#define EXT3_FEATURE_COMPAT_HAS_JOURNAL		0x0004
+#define EXT2_FEATURE_COMPAT_EXT_ATTR		0x0008
+#define EXT2_FEATURE_COMPAT_RESIZE_INO		0x0010
+#define EXT2_FEATURE_COMPAT_DIR_INDEX		0x0020
+#define EXT2_FEATURE_COMPAT_ANY			0xffffffff
+
+#define EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER	0x0001
+#define EXT2_FEATURE_RO_COMPAT_LARGE_FILE	0x0002
+#define EXT2_FEATURE_RO_COMPAT_BTREE_DIR	0x0004
+#define EXT2_FEATURE_RO_COMPAT_ANY		0xffffffff
+
+#define EXT2_FEATURE_INCOMPAT_COMPRESSION	0x0001
+#define EXT2_FEATURE_INCOMPAT_FILETYPE		0x0002
+#define EXT3_FEATURE_INCOMPAT_RECOVER		0x0004
+#define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV	0x0008
+#define EXT2_FEATURE_INCOMPAT_ANY		0xffffffff
+
+#define EXT2_FEATURE_COMPAT_SUPP	0
+#define EXT2_FEATURE_INCOMPAT_SUPP	EXT2_FEATURE_INCOMPAT_FILETYPE
+#define EXT2_FEATURE_RO_COMPAT_SUPP	(EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER| \
+					 EXT2_FEATURE_RO_COMPAT_LARGE_FILE| \
+					 EXT2_FEATURE_RO_COMPAT_BTREE_DIR)
+#define EXT2_FEATURE_RO_COMPAT_UNSUPPORTED	~EXT2_FEATURE_RO_COMPAT_SUPP
+#define EXT2_FEATURE_INCOMPAT_UNSUPPORTED	~EXT2_FEATURE_INCOMPAT_SUPP
+
+/*
+ * Default values for user and/or group using reserved blocks
+ */
+#define	EXT2_DEF_RESUID		0
+#define	EXT2_DEF_RESGID		0
+
+/*
+ * Structure of a directory entry
+ */
+#define EXT2_NAME_LEN 255
+
+struct ext2_dir_entry {
+	__u32	inode;			/* Inode number */
+	__u16	rec_len;		/* Directory entry length */
+	__u16	name_len;		/* Name length */
+	char	name[EXT2_NAME_LEN];	/* File name */
+};
+
+/*
+ * The new version of the directory entry.  Since EXT2 structures are
+ * stored in intel byte order, and the name_len field could never be
+ * bigger than 255 chars, it's safe to reclaim the extra byte for the
+ * file_type field.
+ */
+struct ext2_dir_entry_2 {
+	__u32	inode;			/* Inode number */
+	__u16	rec_len;		/* Directory entry length */
+	__u8	name_len;		/* Name length */
+	__u8	file_type;
+	char	name[EXT2_NAME_LEN];	/* File name */
+};
+
+/*
+ * Ext2 directory file types.  Only the low 3 bits are used.  The
+ * other bits are reserved for now.
+ */
+enum {
+	EXT2_FT_UNKNOWN,
+	EXT2_FT_REG_FILE,
+	EXT2_FT_DIR,
+	EXT2_FT_CHRDEV,
+	EXT2_FT_BLKDEV,
+	EXT2_FT_FIFO,
+	EXT2_FT_SOCK,
+	EXT2_FT_SYMLINK,
+	EXT2_FT_MAX
+};
+
+/*
+ * EXT2_DIR_PAD defines the directory entries boundaries
+ *
+ * NOTE: It must be a multiple of 4
+ */
+#define EXT2_DIR_PAD		 	4
+#define EXT2_DIR_ROUND 			(EXT2_DIR_PAD - 1)
+#define EXT2_DIR_REC_LEN(name_len)	(((name_len) + 8 + EXT2_DIR_ROUND) & \
+					 ~EXT2_DIR_ROUND)
+
+#ifdef __KERNEL__
+/*
+ * Function prototypes
+ */
+
+/*
+ * Ok, these declarations are also in <linux/kernel.h> but none of the
+ * ext2 source programs needs to include it so they are duplicated here.
+ */
+# define NORET_TYPE    /**/
+# define ATTRIB_NORET  __attribute__((noreturn))
+# define NORET_AND     noreturn,
+
+/* balloc.c */
+extern int ext2_bg_has_super(struct super_block *sb, int group);
+extern unsigned long ext2_bg_num_gdb(struct super_block *sb, int group);
+extern int ext2_new_block (struct inode *, unsigned long,
+			   __u32 *, __u32 *, int *);
+extern void ext2_free_blocks (struct inode *, unsigned long,
+			      unsigned long);
+extern unsigned long ext2_count_free_blocks (struct super_block *);
+extern void ext2_check_blocks_bitmap (struct super_block *);
+extern struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb,
+						    unsigned int block_group,
+						    struct buffer_head ** bh);
+
+/* dir.c */
+extern int ext2_add_link (struct dentry *, struct inode *);
+extern ino_t ext2_inode_by_name(struct inode *, struct dentry *);
+extern int ext2_make_empty(struct inode *, struct inode *);
+extern struct ext2_dir_entry_2 * ext2_find_entry (struct inode *,struct dentry *, struct page **);
+extern int ext2_delete_entry (struct ext2_dir_entry_2 *, struct page *);
+extern int ext2_empty_dir (struct inode *);
+extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **);
+extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *);
+
+/* fsync.c */
+extern int ext2_sync_file (struct file *, struct dentry *, int);
+extern int ext2_fsync_inode (struct inode *, int);
+
+/* ialloc.c */
+extern struct inode * ext2_new_inode (const struct inode *, int);
+extern void ext2_free_inode (struct inode *);
+extern unsigned long ext2_count_free_inodes (struct super_block *);
+extern void ext2_check_inodes_bitmap (struct super_block *);
+extern unsigned long ext2_count_free (struct buffer_head *, unsigned);
+
+/* inode.c */
+extern void ext2_read_inode (struct inode *);
+extern void ext2_write_inode (struct inode *, int);
+extern void ext2_put_inode (struct inode *);
+extern void ext2_delete_inode (struct inode *);
+extern int ext2_sync_inode (struct inode *);
+extern void ext2_discard_prealloc (struct inode *);
+extern void ext2_truncate (struct inode *);
+
+/* ioctl.c */
+extern int ext2_ioctl (struct inode *, struct file *, unsigned int,
+		       unsigned long);
+
+/* super.c */
+extern void ext2_error (struct super_block *, const char *, const char *, ...)
+	__attribute__ ((format (printf, 3, 4)));
+extern NORET_TYPE void ext2_panic (struct super_block *, const char *,
+				   const char *, ...)
+	__attribute__ ((NORET_AND format (printf, 3, 4)));
+extern void ext2_warning (struct super_block *, const char *, const char *, ...)
+	__attribute__ ((format (printf, 3, 4)));
+extern void ext2_update_dynamic_rev (struct super_block *sb);
+extern void ext2_put_super (struct super_block *);
+extern void ext2_write_super (struct super_block *);
+extern int ext2_remount (struct super_block *, int *, char *);
+extern struct super_block * ext2_read_super (struct super_block *,void *,int);
+extern int ext2_statfs (struct super_block *, struct statfs *);
+
+/*
+ * Inodes and files operations
+ */
+
+/* dir.c */
+extern struct file_operations ext2_dir_operations;
+
+/* file.c */
+extern struct inode_operations ext2_file_inode_operations;
+extern struct file_operations ext2_file_operations;
+
+/* inode.c */
+extern struct address_space_operations ext2_aops;
+
+/* namei.c */
+extern struct inode_operations ext2_dir_inode_operations;
+
+/* symlink.c */
+extern struct inode_operations ext2_fast_symlink_inode_operations;
+
+#endif	/* __KERNEL__ */
+
+#endif	/* _LINUX_EXT2_FS_H */
diff -urN linux-2.4.16-reiserfspatches-immutable/include/linux/ext3_fs.h~ linux-2.4.16-reiserfspatches-immutable-ctx4/include/linux/ext3_fs.h~
--- linux-2.4.16-reiserfspatches-immutable/include/linux/ext3_fs.h~	Thu Jan  1 01:00:00 1970
+++ linux-2.4.16-reiserfspatches-immutable-ctx4/include/linux/ext3_fs.h~	Mon Dec 10 14:28:03 2001
@@ -0,0 +1,716 @@
+/*
+ *  linux/include/linux/ext3_fs.h
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ *  from
+ *
+ *  linux/include/linux/minix_fs.h
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+#ifndef _LINUX_EXT3_FS_H
+#define _LINUX_EXT3_FS_H
+
+#include <linux/types.h>
+
+/*
+ * The second extended filesystem constants/structures
+ */
+
+/*
+ * Define EXT3FS_DEBUG to produce debug messages
+ */
+#undef EXT3FS_DEBUG
+
+/*
+ * Define EXT3_PREALLOCATE to preallocate data blocks for expanding files
+ */
+#undef  EXT3_PREALLOCATE /* @@@ Fix this! */
+#define EXT3_DEFAULT_PREALLOC_BLOCKS	8
+
+/*
+ * The second extended file system version
+ */
+#define EXT3FS_DATE		"06 Nov 2001"
+#define EXT3FS_VERSION		"2.4-0.9.15"
+
+/*
+ * Debug code
+ */
+#ifdef EXT3FS_DEBUG
+#define ext3_debug(f, a...)						\
+	do {								\
+		printk (KERN_DEBUG "EXT3-fs DEBUG (%s, %d): %s:",	\
+			__FILE__, __LINE__, __FUNCTION__);		\
+		printk (KERN_DEBUG f, ## a);				\
+	} while (0)
+#else
+#define ext3_debug(f, a...)	do {} while (0)
+#endif
+
+/*
+ * Special inodes numbers
+ */
+#define	EXT3_BAD_INO		 1	/* Bad blocks inode */
+#define EXT3_ROOT_INO		 2	/* Root inode */
+#define EXT3_ACL_IDX_INO	 3	/* ACL inode */
+#define EXT3_ACL_DATA_INO	 4	/* ACL inode */
+#define EXT3_BOOT_LOADER_INO	 5	/* Boot loader inode */
+#define EXT3_UNDEL_DIR_INO	 6	/* Undelete directory inode */
+#define EXT3_RESIZE_INO		 7	/* Reserved group descriptors inode */
+#define EXT3_JOURNAL_INO	 8	/* Journal inode */
+
+/* First non-reserved inode for old ext3 filesystems */
+#define EXT3_GOOD_OLD_FIRST_INO	11
+
+/*
+ * The second extended file system magic number
+ */
+#define EXT3_SUPER_MAGIC	0xEF53
+
+/*
+ * Maximal count of links to a file
+ */
+#define EXT3_LINK_MAX		32000
+
+/*
+ * Macro-instructions used to manage several block sizes
+ */
+#define EXT3_MIN_BLOCK_SIZE		1024
+#define	EXT3_MAX_BLOCK_SIZE		4096
+#define EXT3_MIN_BLOCK_LOG_SIZE		  10
+#ifdef __KERNEL__
+# define EXT3_BLOCK_SIZE(s)		((s)->s_blocksize)
+#else
+# define EXT3_BLOCK_SIZE(s)		(EXT3_MIN_BLOCK_SIZE << (s)->s_log_block_size)
+#endif
+#define EXT3_ACLE_PER_BLOCK(s)		(EXT3_BLOCK_SIZE(s) / sizeof (struct ext3_acl_entry))
+#define	EXT3_ADDR_PER_BLOCK(s)		(EXT3_BLOCK_SIZE(s) / sizeof (__u32))
+#ifdef __KERNEL__
+# define EXT3_BLOCK_SIZE_BITS(s)	((s)->s_blocksize_bits)
+#else
+# define EXT3_BLOCK_SIZE_BITS(s)	((s)->s_log_block_size + 10)
+#endif
+#ifdef __KERNEL__
+#define	EXT3_ADDR_PER_BLOCK_BITS(s)	((s)->u.ext3_sb.s_addr_per_block_bits)
+#define EXT3_INODE_SIZE(s)		((s)->u.ext3_sb.s_inode_size)
+#define EXT3_FIRST_INO(s)		((s)->u.ext3_sb.s_first_ino)
+#else
+#define EXT3_INODE_SIZE(s)	(((s)->s_rev_level == EXT3_GOOD_OLD_REV) ? \
+				 EXT3_GOOD_OLD_INODE_SIZE : \
+				 (s)->s_inode_size)
+#define EXT3_FIRST_INO(s)	(((s)->s_rev_level == EXT3_GOOD_OLD_REV) ? \
+				 EXT3_GOOD_OLD_FIRST_INO : \
+				 (s)->s_first_ino)
+#endif
+
+/*
+ * Macro-instructions used to manage fragments
+ */
+#define EXT3_MIN_FRAG_SIZE		1024
+#define	EXT3_MAX_FRAG_SIZE		4096
+#define EXT3_MIN_FRAG_LOG_SIZE		  10
+#ifdef __KERNEL__
+# define EXT3_FRAG_SIZE(s)		((s)->u.ext3_sb.s_frag_size)
+# define EXT3_FRAGS_PER_BLOCK(s)	((s)->u.ext3_sb.s_frags_per_block)
+#else
+# define EXT3_FRAG_SIZE(s)		(EXT3_MIN_FRAG_SIZE << (s)->s_log_frag_size)
+# define EXT3_FRAGS_PER_BLOCK(s)	(EXT3_BLOCK_SIZE(s) / EXT3_FRAG_SIZE(s))
+#endif
+
+/*
+ * ACL structures
+ */
+struct ext3_acl_header	/* Header of Access Control Lists */
+{
+	__u32	aclh_size;
+	__u32	aclh_file_count;
+	__u32	aclh_acle_count;
+	__u32	aclh_first_acle;
+};
+
+struct ext3_acl_entry	/* Access Control List Entry */
+{
+	__u32	acle_size;
+	__u16	acle_perms;	/* Access permissions */
+	__u16	acle_type;	/* Type of entry */
+	__u16	acle_tag;	/* User or group identity */
+	__u16	acle_pad1;
+	__u32	acle_next;	/* Pointer on next entry for the */
+					/* same inode or on next free entry */
+};
+
+/*
+ * Structure of a blocks group descriptor
+ */
+struct ext3_group_desc
+{
+	__u32	bg_block_bitmap;		/* Blocks bitmap block */
+	__u32	bg_inode_bitmap;		/* Inodes bitmap block */
+	__u32	bg_inode_table;		/* Inodes table block */
+	__u16	bg_free_blocks_count;	/* Free blocks count */
+	__u16	bg_free_inodes_count;	/* Free inodes count */
+	__u16	bg_used_dirs_count;	/* Directories count */
+	__u16	bg_pad;
+	__u32	bg_reserved[3];
+};
+
+/*
+ * Macro-instructions used to manage group descriptors
+ */
+#ifdef __KERNEL__
+# define EXT3_BLOCKS_PER_GROUP(s)	((s)->u.ext3_sb.s_blocks_per_group)
+# define EXT3_DESC_PER_BLOCK(s)		((s)->u.ext3_sb.s_desc_per_block)
+# define EXT3_INODES_PER_GROUP(s)	((s)->u.ext3_sb.s_inodes_per_group)
+# define EXT3_DESC_PER_BLOCK_BITS(s)	((s)->u.ext3_sb.s_desc_per_block_bits)
+#else
+# define EXT3_BLOCKS_PER_GROUP(s)	((s)->s_blocks_per_group)
+# define EXT3_DESC_PER_BLOCK(s)		(EXT3_BLOCK_SIZE(s) / sizeof (struct ext3_group_desc))
+# define EXT3_INODES_PER_GROUP(s)	((s)->s_inodes_per_group)
+#endif
+
+/*
+ * Constants relative to the data blocks
+ */
+#define	EXT3_NDIR_BLOCKS		12
+#define	EXT3_IND_BLOCK			EXT3_NDIR_BLOCKS
+#define	EXT3_DIND_BLOCK			(EXT3_IND_BLOCK + 1)
+#define	EXT3_TIND_BLOCK			(EXT3_DIND_BLOCK + 1)
+#define	EXT3_N_BLOCKS			(EXT3_TIND_BLOCK + 1)
+
+/*
+ * Inode flags
+ */
+#define	EXT3_SECRM_FL			0x00000001 /* Secure deletion */
+#define	EXT3_UNRM_FL			0x00000002 /* Undelete */
+#define	EXT3_COMPR_FL			0x00000004 /* Compress file */
+#define EXT3_SYNC_FL			0x00000008 /* Synchronous updates */
+#define EXT3_IMMUTABLE_FILE_FL		0x00000010 /* Immutable file */
+#define EXT3_APPEND_FL			0x00000020 /* writes to file may only append */
+#define EXT3_NODUMP_FL			0x00000040 /* do not dump file */
+#define EXT3_NOATIME_FL			0x00000080 /* do not update atime */
+/* Reserved for compression usage... */
+#define EXT3_DIRTY_FL			0x00000100
+#define EXT3_COMPRBLK_FL		0x00000200 /* One or more compressed clusters */
+#define EXT3_NOCOMPR_FL			0x00000400 /* Don't compress */
+#define EXT3_ECOMPR_FL			0x00000800 /* Compression error */
+/* End compression flags --- maybe not all used */
+#define EXT3_INDEX_FL			0x00001000 /* hash-indexed directory */
+#define EXT3_IMAGIC_FL			0x00002000 /* AFS directory */
+#define EXT3_JOURNAL_DATA_FL		0x00004000 /* file data should be journaled */
+#define EXT3_IMMUTABLE_LINK_FL          0x00008000 /* Immutable link */
+#define EXT3_RESERVED_FL		0x80000000 /* reserved for ext3 lib */
+
+#define EXT3_FL_USER_VISIBLE		0x0000DFFF /* User visible flags */
+#define EXT3_FL_USER_MODIFIABLE		0x000080FF /* User modifiable flags */
+
+/*
+ * Inode dynamic state flags
+ */
+#define EXT3_STATE_JDATA		0x00000001 /* journaled data exists */
+#define EXT3_STATE_NEW			0x00000002 /* inode is newly created */
+
+/*
+ * ioctl commands
+ */
+#define	EXT3_IOC_GETFLAGS		_IOR('f', 1, long)
+#define	EXT3_IOC_SETFLAGS		_IOW('f', 2, long)
+#define	EXT3_IOC_GETVERSION		_IOR('f', 3, long)
+#define	EXT3_IOC_SETVERSION		_IOW('f', 4, long)
+#define	EXT3_IOC_GETVERSION_OLD		_IOR('v', 1, long)
+#define	EXT3_IOC_SETVERSION_OLD		_IOW('v', 2, long)
+#ifdef CONFIG_JBD_DEBUG
+#define EXT3_IOC_WAIT_FOR_READONLY	_IOR('f', 99, long)
+#endif
+
+/*
+ * Structure of an inode on the disk
+ */
+struct ext3_inode {
+	__u16	i_mode;		/* File mode */
+	__u16	i_uid;		/* Low 16 bits of Owner Uid */
+	__u32	i_size;		/* Size in bytes */
+	__u32	i_atime;	/* Access time */
+	__u32	i_ctime;	/* Creation time */
+	__u32	i_mtime;	/* Modification time */
+	__u32	i_dtime;	/* Deletion Time */
+	__u16	i_gid;		/* Low 16 bits of Group Id */
+	__u16	i_links_count;	/* Links count */
+	__u32	i_blocks;	/* Blocks count */
+	__u32	i_flags;	/* File flags */
+	union {
+		struct {
+			__u32  l_i_reserved1;
+		} linux1;
+		struct {
+			__u32  h_i_translator;
+		} hurd1;
+		struct {
+			__u32  m_i_reserved1;
+		} masix1;
+	} osd1;				/* OS dependent 1 */
+	__u32	i_block[EXT3_N_BLOCKS];/* Pointers to blocks */
+	__u32	i_generation;	/* File version (for NFS) */
+	__u32	i_file_acl;	/* File ACL */
+	__u32	i_dir_acl;	/* Directory ACL */
+	__u32	i_faddr;	/* Fragment address */
+	union {
+		struct {
+			__u8	l_i_frag;	/* Fragment number */
+			__u8	l_i_fsize;	/* Fragment size */
+			__u16	i_pad1;
+			__u16	l_i_uid_high;	/* these 2 fields    */
+			__u16	l_i_gid_high;	/* were reserved2[0] */
+			__u32	l_i_reserved2;
+		} linux2;
+		struct {
+			__u8	h_i_frag;	/* Fragment number */
+			__u8	h_i_fsize;	/* Fragment size */
+			__u16	h_i_mode_high;
+			__u16	h_i_uid_high;
+			__u16	h_i_gid_high;
+			__u32	h_i_author;
+		} hurd2;
+		struct {
+			__u8	m_i_frag;	/* Fragment number */
+			__u8	m_i_fsize;	/* Fragment size */
+			__u16	m_pad1;
+			__u32	m_i_reserved2[2];
+		} masix2;
+	} osd2;				/* OS dependent 2 */
+};
+
+#define i_size_high	i_dir_acl
+
+#if defined(__KERNEL__) || defined(__linux__)
+#define i_reserved1	osd1.linux1.l_i_reserved1
+#define i_frag		osd2.linux2.l_i_frag
+#define i_fsize		osd2.linux2.l_i_fsize
+#define i_uid_low	i_uid
+#define i_gid_low	i_gid
+#define i_uid_high	osd2.linux2.l_i_uid_high
+#define i_gid_high	osd2.linux2.l_i_gid_high
+#define i_reserved2	osd2.linux2.l_i_reserved2
+
+#elif defined(__GNU__)
+
+#define i_translator	osd1.hurd1.h_i_translator
+#define i_frag		osd2.hurd2.h_i_frag;
+#define i_fsize		osd2.hurd2.h_i_fsize;
+#define i_uid_high	osd2.hurd2.h_i_uid_high
+#define i_gid_high	osd2.hurd2.h_i_gid_high
+#define i_author	osd2.hurd2.h_i_author
+
+#elif defined(__masix__)
+
+#define i_reserved1	osd1.masix1.m_i_reserved1
+#define i_frag		osd2.masix2.m_i_frag
+#define i_fsize		osd2.masix2.m_i_fsize
+#define i_reserved2	osd2.masix2.m_i_reserved2
+
+#endif /* defined(__KERNEL__) || defined(__linux__) */
+
+/*
+ * File system states
+ */
+#define	EXT3_VALID_FS			0x0001	/* Unmounted cleanly */
+#define	EXT3_ERROR_FS			0x0002	/* Errors detected */
+#define	EXT3_ORPHAN_FS			0x0004	/* Orphans being recovered */
+
+/*
+ * Mount flags
+ */
+#define EXT3_MOUNT_CHECK		0x0001	/* Do mount-time checks */
+#define EXT3_MOUNT_GRPID		0x0004	/* Create files with directory's group */
+#define EXT3_MOUNT_DEBUG		0x0008	/* Some debugging messages */
+#define EXT3_MOUNT_ERRORS_CONT		0x0010	/* Continue on errors */
+#define EXT3_MOUNT_ERRORS_RO		0x0020	/* Remount fs ro on errors */
+#define EXT3_MOUNT_ERRORS_PANIC		0x0040	/* Panic on errors */
+#define EXT3_MOUNT_MINIX_DF		0x0080	/* Mimics the Minix statfs */
+#define EXT3_MOUNT_NOLOAD		0x0100	/* Don't use existing journal*/
+#define EXT3_MOUNT_ABORT		0x0200	/* Fatal error detected */
+#define EXT3_MOUNT_DATA_FLAGS		0x0C00	/* Mode for data writes: */
+  #define EXT3_MOUNT_JOURNAL_DATA	0x0400	/* Write data to journal */
+  #define EXT3_MOUNT_ORDERED_DATA	0x0800	/* Flush data before commit */
+  #define EXT3_MOUNT_WRITEBACK_DATA	0x0C00	/* No data ordering */
+#define EXT3_MOUNT_UPDATE_JOURNAL	0x1000	/* Update the journal format */
+#define EXT3_MOUNT_NO_UID32		0x2000  /* Disable 32-bit UIDs */
+
+/* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
+#ifndef _LINUX_EXT2_FS_H
+#define clear_opt(o, opt)		o &= ~EXT3_MOUNT_##opt
+#define set_opt(o, opt)			o |= EXT3_MOUNT_##opt
+#define test_opt(sb, opt)		((sb)->u.ext3_sb.s_mount_opt & \
+					 EXT3_MOUNT_##opt)
+#else
+#define EXT2_MOUNT_NOLOAD		EXT3_MOUNT_NOLOAD
+#define EXT2_MOUNT_ABORT		EXT3_MOUNT_ABORT
+#endif
+
+#define ext3_set_bit			ext2_set_bit
+#define ext3_clear_bit			ext2_clear_bit
+#define ext3_test_bit			ext2_test_bit
+#define ext3_find_first_zero_bit	ext2_find_first_zero_bit
+#define ext3_find_next_zero_bit		ext2_find_next_zero_bit
+
+/*
+ * Maximal mount counts between two filesystem checks
+ */
+#define EXT3_DFL_MAX_MNT_COUNT		20	/* Allow 20 mounts */
+#define EXT3_DFL_CHECKINTERVAL		0	/* Don't use interval check */
+
+/*
+ * Behaviour when detecting errors
+ */
+#define EXT3_ERRORS_CONTINUE		1	/* Continue execution */
+#define EXT3_ERRORS_RO			2	/* Remount fs read-only */
+#define EXT3_ERRORS_PANIC		3	/* Panic */
+#define EXT3_ERRORS_DEFAULT		EXT3_ERRORS_CONTINUE
+
+/*
+ * Structure of the super block
+ */
+struct ext3_super_block {
+/*00*/	__u32	s_inodes_count;		/* Inodes count */
+	__u32	s_blocks_count;		/* Blocks count */
+	__u32	s_r_blocks_count;	/* Reserved blocks count */
+	__u32	s_free_blocks_count;	/* Free blocks count */
+/*10*/	__u32	s_free_inodes_count;	/* Free inodes count */
+	__u32	s_first_data_block;	/* First Data Block */
+	__u32	s_log_block_size;	/* Block size */
+	__s32	s_log_frag_size;	/* Fragment size */
+/*20*/	__u32	s_blocks_per_group;	/* # Blocks per group */
+	__u32	s_frags_per_group;	/* # Fragments per group */
+	__u32	s_inodes_per_group;	/* # Inodes per group */
+	__u32	s_mtime;		/* Mount time */
+/*30*/	__u32	s_wtime;		/* Write time */
+	__u16	s_mnt_count;		/* Mount count */
+	__s16	s_max_mnt_count;	/* Maximal mount count */
+	__u16	s_magic;		/* Magic signature */
+	__u16	s_state;		/* File system state */
+	__u16	s_errors;		/* Behaviour when detecting errors */
+	__u16	s_minor_rev_level;	/* minor revision level */
+/*40*/	__u32	s_lastcheck;		/* time of last check */
+	__u32	s_checkinterval;	/* max. time between checks */
+	__u32	s_creator_os;		/* OS */
+	__u32	s_rev_level;		/* Revision level */
+/*50*/	__u16	s_def_resuid;		/* Default uid for reserved blocks */
+	__u16	s_def_resgid;		/* Default gid for reserved blocks */
+	/*
+	 * These fields are for EXT3_DYNAMIC_REV superblocks only.
+	 *
+	 * Note: the difference between the compatible feature set and
+	 * the incompatible feature set is that if there is a bit set
+	 * in the incompatible feature set that the kernel doesn't
+	 * know about, it should refuse to mount the filesystem.
+	 *
+	 * e2fsck's requirements are more strict; if it doesn't know
+	 * about a feature in either the compatible or incompatible
+	 * feature set, it must abort and not try to meddle with
+	 * things it doesn't understand...
+	 */
+	__u32	s_first_ino;		/* First non-reserved inode */
+	__u16   s_inode_size;		/* size of inode structure */
+	__u16	s_block_group_nr;	/* block group # of this superblock */
+	__u32	s_feature_compat;	/* compatible feature set */
+/*60*/	__u32	s_feature_incompat;	/* incompatible feature set */
+	__u32	s_feature_ro_compat;	/* readonly-compatible feature set */
+/*68*/	__u8	s_uuid[16];		/* 128-bit uuid for volume */
+/*78*/	char	s_volume_name[16];	/* volume name */
+/*88*/	char	s_last_mounted[64];	/* directory where last mounted */
+/*C8*/	__u32	s_algorithm_usage_bitmap; /* For compression */
+	/*
+	 * Performance hints.  Directory preallocation should only
+	 * happen if the EXT3_FEATURE_COMPAT_DIR_PREALLOC flag is on.
+	 */
+	__u8	s_prealloc_blocks;	/* Nr of blocks to try to preallocate*/
+	__u8	s_prealloc_dir_blocks;	/* Nr to preallocate for dirs */
+	__u16	s_padding1;
+	/*
+	 * Journaling support valid if EXT3_FEATURE_COMPAT_HAS_JOURNAL set.
+	 */
+/*D0*/	__u8	s_journal_uuid[16];	/* uuid of journal superblock */
+/*E0*/	__u32	s_journal_inum;		/* inode number of journal file */
+	__u32	s_journal_dev;		/* device number of journal file */
+	__u32	s_last_orphan;		/* start of list of inodes to delete */
+
+/*EC*/	__u32	s_reserved[197];	/* Padding to the end of the block */
+};
+
+#ifdef __KERNEL__
+#define EXT3_SB(sb)	(&((sb)->u.ext3_sb))
+#define EXT3_I(inode)	(&((inode)->u.ext3_i))
+#else
+/* Assume that user mode programs are passing in an ext3fs superblock, not
+ * a kernel struct super_block.  This will allow us to call the feature-test
+ * macros from user land. */
+#define EXT3_SB(sb)	(sb)
+#endif
+
+#define NEXT_ORPHAN(inode) (inode)->u.ext3_i.i_dtime
+
+/*
+ * Codes for operating systems
+ */
+#define EXT3_OS_LINUX		0
+#define EXT3_OS_HURD		1
+#define EXT3_OS_MASIX		2
+#define EXT3_OS_FREEBSD		3
+#define EXT3_OS_LITES		4
+
+/*
+ * Revision levels
+ */
+#define EXT3_GOOD_OLD_REV	0	/* The good old (original) format */
+#define EXT3_DYNAMIC_REV	1	/* V2 format w/ dynamic inode sizes */
+
+#define EXT3_CURRENT_REV	EXT3_GOOD_OLD_REV
+#define EXT3_MAX_SUPP_REV	EXT3_DYNAMIC_REV
+
+#define EXT3_GOOD_OLD_INODE_SIZE 128
+
+/*
+ * Feature set definitions
+ */
+
+#define EXT3_HAS_COMPAT_FEATURE(sb,mask)			\
+	( EXT3_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask) )
+#define EXT3_HAS_RO_COMPAT_FEATURE(sb,mask)			\
+	( EXT3_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask) )
+#define EXT3_HAS_INCOMPAT_FEATURE(sb,mask)			\
+	( EXT3_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask) )
+#define EXT3_SET_COMPAT_FEATURE(sb,mask)			\
+	EXT3_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask)
+#define EXT3_SET_RO_COMPAT_FEATURE(sb,mask)			\
+	EXT3_SB(sb)->s_es->s_feature_ro_compat |= cpu_to_le32(mask)
+#define EXT3_SET_INCOMPAT_FEATURE(sb,mask)			\
+	EXT3_SB(sb)->s_es->s_feature_incompat |= cpu_to_le32(mask)
+#define EXT3_CLEAR_COMPAT_FEATURE(sb,mask)			\
+	EXT3_SB(sb)->s_es->s_feature_compat &= ~cpu_to_le32(mask)
+#define EXT3_CLEAR_RO_COMPAT_FEATURE(sb,mask)			\
+	EXT3_SB(sb)->s_es->s_feature_ro_compat &= ~cpu_to_le32(mask)
+#define EXT3_CLEAR_INCOMPAT_FEATURE(sb,mask)			\
+	EXT3_SB(sb)->s_es->s_feature_incompat &= ~cpu_to_le32(mask)
+
+#define EXT3_FEATURE_COMPAT_DIR_PREALLOC	0x0001
+#define EXT3_FEATURE_COMPAT_IMAGIC_INODES	0x0002
+#define EXT3_FEATURE_COMPAT_HAS_JOURNAL		0x0004
+#define EXT3_FEATURE_COMPAT_EXT_ATTR		0x0008
+#define EXT3_FEATURE_COMPAT_RESIZE_INODE	0x0010
+#define EXT3_FEATURE_COMPAT_DIR_INDEX		0x0020
+
+#define EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER	0x0001
+#define EXT3_FEATURE_RO_COMPAT_LARGE_FILE	0x0002
+#define EXT3_FEATURE_RO_COMPAT_BTREE_DIR	0x0004
+
+#define EXT3_FEATURE_INCOMPAT_COMPRESSION	0x0001
+#define EXT3_FEATURE_INCOMPAT_FILETYPE		0x0002
+#define EXT3_FEATURE_INCOMPAT_RECOVER		0x0004 /* Needs recovery */
+#define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV	0x0008 /* Journal device */
+
+#define EXT3_FEATURE_COMPAT_SUPP	0
+#define EXT3_FEATURE_INCOMPAT_SUPP	(EXT3_FEATURE_INCOMPAT_FILETYPE| \
+					 EXT3_FEATURE_INCOMPAT_RECOVER)
+#define EXT3_FEATURE_RO_COMPAT_SUPP	(EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \
+					 EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \
+					 EXT3_FEATURE_RO_COMPAT_BTREE_DIR)
+
+/*
+ * Default values for user and/or group using reserved blocks
+ */
+#define	EXT3_DEF_RESUID		0
+#define	EXT3_DEF_RESGID		0
+
+/*
+ * Structure of a directory entry
+ */
+#define EXT3_NAME_LEN 255
+
+struct ext3_dir_entry {
+	__u32	inode;			/* Inode number */
+	__u16	rec_len;		/* Directory entry length */
+	__u16	name_len;		/* Name length */
+	char	name[EXT3_NAME_LEN];	/* File name */
+};
+
+/*
+ * The new version of the directory entry.  Since EXT3 structures are
+ * stored in intel byte order, and the name_len field could never be
+ * bigger than 255 chars, it's safe to reclaim the extra byte for the
+ * file_type field.
+ */
+struct ext3_dir_entry_2 {
+	__u32	inode;			/* Inode number */
+	__u16	rec_len;		/* Directory entry length */
+	__u8	name_len;		/* Name length */
+	__u8	file_type;
+	char	name[EXT3_NAME_LEN];	/* File name */
+};
+
+/*
+ * Ext3 directory file types.  Only the low 3 bits are used.  The
+ * other bits are reserved for now.
+ */
+#define EXT3_FT_UNKNOWN		0
+#define EXT3_FT_REG_FILE	1
+#define EXT3_FT_DIR		2
+#define EXT3_FT_CHRDEV		3
+#define EXT3_FT_BLKDEV		4
+#define EXT3_FT_FIFO		5
+#define EXT3_FT_SOCK		6
+#define EXT3_FT_SYMLINK		7
+
+#define EXT3_FT_MAX		8
+
+/*
+ * EXT3_DIR_PAD defines the directory entries boundaries
+ *
+ * NOTE: It must be a multiple of 4
+ */
+#define EXT3_DIR_PAD			4
+#define EXT3_DIR_ROUND			(EXT3_DIR_PAD - 1)
+#define EXT3_DIR_REC_LEN(name_len)	(((name_len) + 8 + EXT3_DIR_ROUND) & \
+					 ~EXT3_DIR_ROUND)
+
+#ifdef __KERNEL__
+
+/* Filesize hard limits for 64-bit file offsets */
+extern long long ext3_max_sizes[];
+
+/*
+ * Describe an inode's exact location on disk and in memory
+ */
+struct ext3_iloc
+{
+	struct buffer_head *bh;
+	struct ext3_inode *raw_inode;
+	unsigned long block_group;
+};
+
+/*
+ * Function prototypes
+ */
+
+/*
+ * Ok, these declarations are also in <linux/kernel.h> but none of the
+ * ext3 source programs needs to include it so they are duplicated here.
+ */
+# define NORET_TYPE    /**/
+# define ATTRIB_NORET  __attribute__((noreturn))
+# define NORET_AND     noreturn,
+
+/* acl.c */
+extern int ext3_permission (struct inode *, int);
+
+/* balloc.c */
+extern int ext3_bg_has_super(struct super_block *sb, int group);
+extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
+extern int ext3_new_block (handle_t *, struct inode *, unsigned long,
+					    __u32 *, __u32 *, int *);
+extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
+			      unsigned long);
+extern unsigned long ext3_count_free_blocks (struct super_block *);
+extern void ext3_check_blocks_bitmap (struct super_block *);
+extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
+						    unsigned int block_group,
+						    struct buffer_head ** bh);
+
+/* bitmap.c */
+extern unsigned long ext3_count_free (struct buffer_head *, unsigned);
+
+/* dir.c */
+extern int ext3_check_dir_entry(const char *, struct inode *,
+				struct ext3_dir_entry_2 *, struct buffer_head *,
+				unsigned long);
+
+/* file.c */
+
+/* fsync.c */
+extern int ext3_sync_file (struct file *, struct dentry *, int);
+
+/* ialloc.c */
+extern struct inode * ext3_new_inode (handle_t *, const struct inode *, int);
+extern void ext3_free_inode (handle_t *, struct inode *);
+extern struct inode * ext3_orphan_get (struct super_block *, ino_t);
+extern unsigned long ext3_count_free_inodes (struct super_block *);
+extern void ext3_check_inodes_bitmap (struct super_block *);
+
+/* inode.c */
+
+extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
+extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
+
+extern int  ext3_get_inode_loc (struct inode *, struct ext3_iloc *);
+extern void ext3_read_inode (struct inode *);
+extern void ext3_write_inode (struct inode *, int);
+extern int  ext3_setattr (struct dentry *, struct iattr *);
+extern void ext3_put_inode (struct inode *);
+extern void ext3_delete_inode (struct inode *);
+extern int  ext3_sync_inode (handle_t *, struct inode *);
+extern void ext3_discard_prealloc (struct inode *);
+extern void ext3_dirty_inode(struct inode *);
+extern int ext3_change_inode_journal_flag(struct inode *, int);
+
+/* ioctl.c */
+extern int ext3_ioctl (struct inode *, struct file *, unsigned int,
+		       unsigned long);
+
+/* namei.c */
+extern struct inode_operations ext3_dir_inode_operations;
+extern int ext3_orphan_add(handle_t *, struct inode *);
+extern int ext3_orphan_del(handle_t *, struct inode *);
+
+/* super.c */
+extern void ext3_error (struct super_block *, const char *, const char *, ...)
+	__attribute__ ((format (printf, 3, 4)));
+extern void __ext3_std_error (struct super_block *, const char *, int);
+extern void ext3_abort (struct super_block *, const char *, const char *, ...)
+	__attribute__ ((format (printf, 3, 4)));
+extern NORET_TYPE void ext3_panic (struct super_block *, const char *,
+				   const char *, ...)
+	__attribute__ ((NORET_AND format (printf, 3, 4)));
+extern void ext3_warning (struct super_block *, const char *, const char *, ...)
+	__attribute__ ((format (printf, 3, 4)));
+extern void ext3_update_dynamic_rev (struct super_block *sb);
+extern void ext3_put_super (struct super_block *);
+extern void ext3_write_super (struct super_block *);
+extern void ext3_write_super_lockfs (struct super_block *);
+extern void ext3_unlockfs (struct super_block *);
+extern int ext3_remount (struct super_block *, int *, char *);
+extern struct super_block * ext3_read_super (struct super_block *,void *,int);
+extern int ext3_statfs (struct super_block *, struct statfs *);
+
+/* truncate.c */
+extern void ext3_truncate (struct inode *);
+
+#define ext3_std_error(sb, errno)				\
+do {								\
+	if ((errno))						\
+		__ext3_std_error((sb), __FUNCTION__, (errno));	\
+} while (0)
+extern const char *ext3_decode_error(struct super_block *sb, int errno, char nbuf[16]);
+
+/*
+ * Inodes and files operations
+ */
+
+/* dir.c */
+extern struct file_operations ext3_dir_operations;
+
+/* file.c */
+extern struct inode_operations ext3_file_inode_operations;
+extern struct file_operations ext3_file_operations;
+
+/* symlink.c */
+extern struct inode_operations ext3_fast_symlink_inode_operations;
+
+extern struct address_space_operations ext3_aops;
+
+#endif	/* __KERNEL__ */
+
+#endif	/* _LINUX_EXT3_FS_H */
diff -urN linux-2.4.16-reiserfspatches-immutable/include/linux/fs.h~ linux-2.4.16-reiserfspatches-immutable-ctx4/include/linux/fs.h~
--- linux-2.4.16-reiserfspatches-immutable/include/linux/fs.h~	Mon Dec 10 13:12:58 2001
+++ linux-2.4.16-reiserfspatches-immutable-ctx4/include/linux/fs.h~	Mon Dec 10 14:28:03 2001
@@ -127,13 +127,14 @@
 
 /* Inode flags - they have nothing to superblock flags now */
 
-#define S_SYNC		1	/* Writes are synced at once */
-#define S_NOATIME	2	/* Do not update access times */
-#define S_QUOTA		4	/* Quota initialized for file */
-#define S_APPEND	8	/* Append-only file */
-#define S_IMMUTABLE	16	/* Immutable file */
-#define S_DEAD		32	/* removed, but still open directory */
-#define S_NOQUOTA	64	/* Inode is not counted to quota */
+#define S_SYNC			1	/* Writes are synced at once */
+#define S_NOATIME		2	/* Do not update access times */
+#define S_QUOTA			4	/* Quota initialized for file */
+#define S_APPEND		8	/* Append-only file */
+#define S_IMMUTABLE_FILE	16	/* Immutable file */
+#define S_DEAD			32	/* removed, but still open directory */
+#define S_NOQUOTA		64	/* Inode is not counted to quota */
+#define S_IMMUTABLE_LINK	128	/* Immutable links */
 
 /*
  * Note that nosuid etc flags are inode-specific: setting some file-system
@@ -157,7 +158,8 @@
 #define IS_QUOTAINIT(inode)	((inode)->i_flags & S_QUOTA)
 #define IS_NOQUOTA(inode)	((inode)->i_flags & S_NOQUOTA)
 #define IS_APPEND(inode)	((inode)->i_flags & S_APPEND)
-#define IS_IMMUTABLE(inode)	((inode)->i_flags & S_IMMUTABLE)
+#define IS_IMMUTABLE_FILE(inode)	((inode)->i_flags & S_IMMUTABLE_FILE)
+#define IS_IMMUTABLE_LINK(inode) ((((inode)->i_flags & S_IMMUTABLE_FILE) << 3) ^ ((inode)->i_flags & S_IMMUTABLE_LINK) )
 #define IS_NOATIME(inode)	(__IS_FLG(inode, MS_NOATIME) || ((inode)->i_flags & S_NOATIME))
 #define IS_NODIRATIME(inode)	__IS_FLG(inode, MS_NODIRATIME)
 
@@ -356,11 +358,12 @@
 /*
  * This is the inode attributes flag definitions
  */
-#define ATTR_FLAG_SYNCRONOUS	1 	/* Syncronous write */
-#define ATTR_FLAG_NOATIME	2 	/* Don't update atime */
-#define ATTR_FLAG_APPEND	4 	/* Append-only file */
-#define ATTR_FLAG_IMMUTABLE	8 	/* Immutable file */
-#define ATTR_FLAG_NODIRATIME	16 	/* Don't update atime for directory */
+#define ATTR_FLAG_SYNCRONOUS		1 	/* Syncronous write */
+#define ATTR_FLAG_NOATIME		2 	/* Don't update atime */
+#define ATTR_FLAG_APPEND		4 	/* Append-only file */
+#define ATTR_FLAG_IMMUTABLE_FILE	8 	/* Immutable file */
+#define ATTR_FLAG_NODIRATIME		16 	/* Don't update atime for directory */
+#define ATTR_FLAG_IMMUTABLE_LINK	32 	/* Immutable file */
 
 /*
  * Includes for diskquotas and mount structures.
@@ -1381,6 +1384,7 @@
 extern int block_prepare_write(struct page*, unsigned, unsigned, get_block_t*);
 extern int cont_prepare_write(struct page*, unsigned, unsigned, get_block_t*,
 				unsigned long *);
+extern int generic_cont_expand(struct inode *inode, loff_t size) ;
 extern int block_commit_write(struct page *page, unsigned from, unsigned to);
 extern int block_sync_page(struct page *);
 
diff -urN linux-2.4.16-reiserfspatches-immutable/include/linux/sched.h linux-2.4.16-reiserfspatches-immutable-ctx4/include/linux/sched.h
--- linux-2.4.16-reiserfspatches-immutable/include/linux/sched.h	Mon Dec 10 13:12:49 2001
+++ linux-2.4.16-reiserfspatches-immutable-ctx4/include/linux/sched.h	Mon Dec 10 15:01:51 2001
@@ -275,6 +275,30 @@
 	atomic_inc(&__user->__count);			\
 	__user; })
 
+
+/*
+	We may have a different domainname and nodename for each security
+	context. By default, a security context share the same as its
+	parent, potentially the information in system_utsname
+*/
+#define S_CTX_INFO_LOCK		1	/* Can't request a new s_context */
+#define S_CTX_INFO_SCHED	2	/* All process in the s_context */
+					/* Contribute to the schedular */
+#define S_CTX_INFO_NPROC	4	/* Limit number of processes in a context */
+#define S_CTX_INFO_PRIVATE	8	/* Noone can join this security context */
+
+
+struct context_info{
+	int refcount;
+	int s_context;
+	char nodename[65];
+	char domainname[65];
+	int flags;		/* S_CTX_INFO_xxx */
+	atomic_t ticks;		/* Number of ticks used by all process */
+				/* in the s_context */
+};
+
+
 extern struct user_struct root_user;
 #define INIT_USER (&root_user)
 
@@ -399,6 +423,12 @@
 	unsigned long sas_ss_sp;
 	size_t sas_ss_size;
 	int (*notifier)(void *priv);
+/* Field to make virtual server running in chroot more  isolated */
+	int s_context;	/* Process can only deal with other processes */
+			/* with the same s_context */
+	__u32 cap_bset;	/* Maximum capability of this process and children */
+	unsigned long ipv4root;	/* Process can only bind to this iP */
+	struct context_info *s_info;
 	void *notifier_data;
 	sigset_t *notifier_mask;
 	
@@ -500,6 +530,7 @@
     blocked:		{{0}},						\
     alloc_lock:		SPIN_LOCK_UNLOCKED,				\
     journal_info:	NULL,						\
+    cap_bset:		CAP_INIT_EFF_SET,				\
 }
 
 
@@ -925,6 +956,11 @@
 	mntput(rootmnt);
 	return res;
 }
+
+/* Manage the reference count of the context_info pointer */
+void sys_release_s_info (struct task_struct *);
+void sys_assign_s_info (struct task_struct *);
+void sys_alloc_s_info (void);
 
 #endif /* __KERNEL__ */
 
diff -urN linux-2.4.16-reiserfspatches-immutable/include/net/route.h linux-2.4.16-reiserfspatches-immutable-ctx4/include/net/route.h
--- linux-2.4.16-reiserfspatches-immutable/include/net/route.h	Mon Dec 10 13:12:38 2001
+++ linux-2.4.16-reiserfspatches-immutable-ctx4/include/net/route.h	Mon Dec 10 15:01:51 2001
@@ -159,6 +159,13 @@
 static inline int ip_route_connect(struct rtable **rp, u32 dst, u32 src, u32 tos, int oif)
 {
 	int err;
+	if (current->ipv4root != 0){
+		if (src == 0){
+			src = current->ipv4root;
+		}else if (current->ipv4root != src){
+			return -EPERM;
+		}
+	}
 	err = ip_route_output(rp, dst, src, tos, oif);
 	if (err || (dst && src))
 		return err;
diff -urN linux-2.4.16-reiserfspatches-immutable/kernel/exit.c linux-2.4.16-reiserfspatches-immutable-ctx4/kernel/exit.c
--- linux-2.4.16-reiserfspatches-immutable/kernel/exit.c	Mon Dec 10 13:12:57 2001
+++ linux-2.4.16-reiserfspatches-immutable-ctx4/kernel/exit.c	Mon Dec 10 15:01:51 2001
@@ -65,6 +65,7 @@
 		current->counter += p->counter;
 		if (current->counter >= MAX_COUNTER)
 			current->counter = MAX_COUNTER;
+		sys_release_s_info(p);
 		p->pid = 0;
 		free_task_struct(p);
 	} else {
diff -urN linux-2.4.16-reiserfspatches-immutable/kernel/fork.c linux-2.4.16-reiserfspatches-immutable-ctx4/kernel/fork.c
--- linux-2.4.16-reiserfspatches-immutable/kernel/fork.c	Mon Dec 10 13:12:57 2001
+++ linux-2.4.16-reiserfspatches-immutable-ctx4/kernel/fork.c	Mon Dec 10 15:01:51 2001
@@ -585,8 +585,14 @@
 	*p = *current;
 
 	retval = -EAGAIN;
+	if (p->s_info != NULL && (p->s_info->flags & S_CTX_INFO_NPROC)!=0){
+		if (p->s_info->refcount >= p->rlim[RLIMIT_NPROC].rlim_max)
+			goto bad_fork_free;
+	}
 	if (atomic_read(&p->user->processes) >= p->rlim[RLIMIT_NPROC].rlim_cur)
 		goto bad_fork_free;
+
+	sys_assign_s_info (p);
 
 	atomic_inc(&p->user->__count);
 	atomic_inc(&p->user->processes);
diff -urN linux-2.4.16-reiserfspatches-immutable/kernel/sched.c linux-2.4.16-reiserfspatches-immutable-ctx4/kernel/sched.c
--- linux-2.4.16-reiserfspatches-immutable/kernel/sched.c	Mon Dec 10 13:12:57 2001
+++ linux-2.4.16-reiserfspatches-immutable-ctx4/kernel/sched.c	Mon Dec 10 15:01:51 2001
@@ -165,7 +165,13 @@
 		 * Don't do any other calculations if the time slice is
 		 * over..
 		 */
-		weight = p->counter;
+		if (p->s_info != NULL
+			&& (p->s_info->flags & S_CTX_INFO_SCHED)!=0){
+			weight = atomic_read (&p->s_info->ticks)/p->s_info->refcount;
+			weight = (weight+p->counter)>>1;
+		}else{
+			weight = p->counter;
+		}
 		if (!weight)
 			goto out;
 			
@@ -605,8 +611,23 @@
 
 		spin_unlock_irq(&runqueue_lock);
 		read_lock(&tasklist_lock);
-		for_each_task(p)
+		/*
+			Reset the s_info->ticks to the sum off all
+			member processes p->counter
+		*/
+		for_each_task(p){
+			if (p->s_info != NULL
+				&& (p->s_info->flags & S_CTX_INFO_SCHED)!=0){
+				atomic_set (&p->s_info->ticks,0);
+			}
+		}
+		for_each_task(p){
 			p->counter = (p->counter >> 1) + NICE_TO_TICKS(p->nice);
+			if (p->s_info != NULL
+				&& (p->s_info->flags & S_CTX_INFO_SCHED)!=0){
+				atomic_add (p->counter,&p->s_info->ticks);
+			}
+		}
 		read_unlock(&tasklist_lock);
 		spin_lock_irq(&runqueue_lock);
 		goto repeat_schedule;
diff -urN linux-2.4.16-reiserfspatches-immutable/kernel/signal.c linux-2.4.16-reiserfspatches-immutable-ctx4/kernel/signal.c
--- linux-2.4.16-reiserfspatches-immutable/kernel/signal.c	Mon Dec 10 13:12:57 2001
+++ linux-2.4.16-reiserfspatches-immutable-ctx4/kernel/signal.c	Mon Dec 10 15:01:51 2001
@@ -592,7 +592,7 @@
 		retval = -ESRCH;
 		read_lock(&tasklist_lock);
 		for_each_task(p) {
-			if (p->pgrp == pgrp) {
+			if (p->pgrp == pgrp && p->s_context == current->s_context) {
 				int err = send_sig_info(sig, info, p);
 				if (retval)
 					retval = err;
@@ -639,7 +639,7 @@
 	read_lock(&tasklist_lock);
 	p = find_task_by_pid(pid);
 	error = -ESRCH;
-	if (p)
+	if (p && p->s_context == current->s_context)
 		error = send_sig_info(sig, info, p);
 	read_unlock(&tasklist_lock);
 	return error;
@@ -663,7 +663,7 @@
 
 		read_lock(&tasklist_lock);
 		for_each_task(p) {
-			if (p->pid > 1 && p != current) {
+			if (p->pid > 1 && p != current && p->s_context == current->s_context) {
 				int err = send_sig_info(sig, info, p);
 				++count;
 				if (err != -EPERM)
@@ -1256,3 +1256,100 @@
 	return ret ? ret : (unsigned long)old_sa.sa.sa_handler;
 }
 #endif /* !alpha && !__ia64__ && !defined(__mips__) */
+
+/*
+	Change to a new security context and reduce the capability
+	basic set of the current process
+*/
+asmlinkage int
+sys_new_s_context(int ctx, __u32 remove_cap, int flags)
+{
+	#define MAX_S_CONTEXT 65535	/* Arbitrary limit */
+	int ret = -EPERM;
+	if (ctx == -1){
+		if (current->s_info == NULL
+			|| (current->s_info->flags & S_CTX_INFO_LOCK) == 0){
+			/* Ok we allocate a new context. For now, we just increase */
+			/* it. Wrap around possible, so we loop */
+			static int alloc_ctx=1;
+			static spinlock_t alloc_ctx_lock = SPIN_LOCK_UNLOCKED;
+			spin_lock(&alloc_ctx_lock);
+			while (1){
+				int found = 0;
+				struct task_struct *p;
+				alloc_ctx++;
+				/* The s_context 1 is special. It sess all processes */
+				if (alloc_ctx == 1){
+					alloc_ctx++;
+				}else if (alloc_ctx > MAX_S_CONTEXT){
+					// No need to grow and grow
+					alloc_ctx = 2;
+				}
+				/* Check if in use */
+				read_lock(&tasklist_lock);
+				for_each_task(p) {
+					if (p->s_context == alloc_ctx){
+						found = 1;
+						break;
+					}
+				}
+				read_unlock(&tasklist_lock);
+				if (!found) break;
+			}
+			current->s_context = alloc_ctx;
+			current->cap_bset &= (~remove_cap);
+			ret = alloc_ctx;
+			sys_alloc_s_info();
+			if (current->s_info != NULL){
+				current->s_info->flags |= flags;
+			}
+			spin_unlock(&alloc_ctx_lock);
+		}
+	}else if (ctx == -2){
+		/* We keep the same s_context, but lower the capabilities */
+		current->cap_bset &= (~remove_cap);
+		ret = current->s_context;
+		if (current->s_info != NULL){
+			current->s_info->flags |= flags;
+		}
+	}else if (ctx <= 0 || ctx > MAX_S_CONTEXT){
+		ret = -EINVAL;
+	}else if (current->s_context == 0
+		&& capable(CAP_SYS_ADMIN)
+		&& (current->s_info == NULL
+			||(current->s_info->flags & S_CTX_INFO_LOCK) == 0)){
+		/* The root context can become any context it wants */
+		int found = 0;
+		struct task_struct *p;
+		/* Check if in use so we reuse the same context_info */
+		read_lock(&tasklist_lock);
+		ret = ctx;
+		for_each_task(p) {
+			if (p->s_context == ctx){
+				found = 1;
+				if (p->s_info == NULL
+					|| (p->s_info->flags & S_CTX_INFO_PRIVATE)==0){
+					sys_release_s_info(current);
+					sys_assign_s_info (p);
+					current->s_info = p->s_info;
+				}else{
+					ret = -EPERM;
+				}
+				break;
+			}
+		}
+		read_unlock(&tasklist_lock);
+		if (ret == ctx){
+			current->s_context = ctx;
+			current->cap_bset &= (~remove_cap);
+			if (!found){
+				sys_alloc_s_info();
+			}
+			if (current->s_info != NULL){
+				current->s_info->flags |= flags;
+			}
+		}
+	}
+	return ret;
+}
+
diff -urN linux-2.4.16-reiserfspatches-immutable/kernel/sys.c linux-2.4.16-reiserfspatches-immutable-ctx4/kernel/sys.c
--- linux-2.4.16-reiserfspatches-immutable/kernel/sys.c	Mon Dec 10 13:11:17 2001
+++ linux-2.4.16-reiserfspatches-immutable-ctx4/kernel/sys.c	Mon Dec 10 15:01:51 2001
@@ -1015,17 +1015,80 @@
 asmlinkage long sys_newuname(struct new_utsname * name)
 {
 	int errno = 0;
+	struct new_utsname tmp,*pttmp;
 
 	down_read(&uts_sem);
-	if (copy_to_user(name,&system_utsname,sizeof *name))
+	if (current->s_info != NULL){
+		tmp = system_utsname;
+		strcpy (tmp.nodename,current->s_info->nodename);
+		strcpy (tmp.domainname,current->s_info->domainname);
+		pttmp = &tmp;
+	}else{
+		pttmp = &system_utsname;
+	}
+	if (copy_to_user(name,pttmp,sizeof *name))
 		errno = -EFAULT;
 	up_read(&uts_sem);
 	return errno;
 }
 
+/*
+	Decrease the reference count on the context_info member of a task
+	Free the struct if the reference count reach 0.
+*/
+void sys_release_s_info (struct task_struct *p)
+{
+	down_write (&uts_sem);
+	if (p->s_info != NULL){
+		p->s_info->refcount--;
+		if (p->s_info->refcount == 0){
+			// printk ("vfree s_info %d\n",p->pid);
+			vfree (p->s_info);
+			p->s_info = NULL;
+		}
+	}
+	up_write (&uts_sem);
+}
+/*
+	Increase the reference count on the context_info member of a task
+*/
+void sys_assign_s_info (struct task_struct *p)
+{
+	down_write (&uts_sem);
+	if (p->s_info != NULL) p->s_info->refcount++;
+	up_write (&uts_sem);
+}
+
+/*
+	Alloc a new s_info to the current process and release
+	the one currently owned by the current process.
+*/
+void sys_alloc_s_info()
+{
+	struct context_info *s_info = vmalloc(sizeof(struct context_info));
+	// printk ("new s_info %d\n",current->pid);
+	s_info->s_context = current->s_context;
+	s_info->refcount = 1;
+	atomic_set (&s_info->ticks,current->counter);
+	s_info->flags = 0;
+	down_read (&uts_sem);
+	if (current->s_info != NULL){
+		strcpy (s_info->nodename,current->s_info->nodename);
+		strcpy (s_info->domainname,current->s_info->domainname);
+	}else{
+		strcpy (s_info->nodename,system_utsname.nodename);
+		strcpy (s_info->domainname,system_utsname.domainname);
+	}
+	up_read (&uts_sem);
+	sys_release_s_info (current);	
+	current->s_info = s_info;
+}
+
+
 asmlinkage long sys_sethostname(char *name, int len)
 {
 	int errno;
+	char *nodename;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
@@ -1033,8 +1096,10 @@
 		return -EINVAL;
 	down_write(&uts_sem);
 	errno = -EFAULT;
-	if (!copy_from_user(system_utsname.nodename, name, len)) {
-		system_utsname.nodename[len] = 0;
+	nodename = system_utsname.nodename;
+	if (current->s_info) nodename = current->s_info->nodename;
+	if (!copy_from_user(nodename, name, len)) {
+		nodename[len] = 0;
 		errno = 0;
 	}
 	up_write(&uts_sem);
@@ -1044,15 +1109,18 @@
 asmlinkage long sys_gethostname(char *name, int len)
 {
 	int i, errno;
+	char *nodename;
 
 	if (len < 0)
 		return -EINVAL;
 	down_read(&uts_sem);
-	i = 1 + strlen(system_utsname.nodename);
+	nodename = system_utsname.nodename;
+	if (current->s_info != NULL) nodename = current->s_info->nodename;
+	i = 1 + strlen(nodename);
 	if (i > len)
 		i = len;
 	errno = 0;
-	if (copy_to_user(name, system_utsname.nodename, i))
+	if (copy_to_user(name, nodename, i))
 		errno = -EFAULT;
 	up_read(&uts_sem);
 	return errno;
@@ -1065,6 +1133,7 @@
 asmlinkage long sys_setdomainname(char *name, int len)
 {
 	int errno;
+	char *domainname;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
@@ -1072,10 +1141,12 @@
 		return -EINVAL;
 
 	down_write(&uts_sem);
+	domainname = system_utsname.domainname;
+	if (current->s_info) domainname = current->s_info->domainname;
 	errno = -EFAULT;
-	if (!copy_from_user(system_utsname.domainname, name, len)) {
+	if (!copy_from_user(domainname, name, len)) {
 		errno = 0;
-		system_utsname.domainname[len] = 0;
+		domainname[len] = 0;
 	}
 	up_write(&uts_sem);
 	return errno;
diff -urN linux-2.4.16-reiserfspatches-immutable/kernel/sysctl.c linux-2.4.16-reiserfspatches-immutable-ctx4/kernel/sysctl.c
--- linux-2.4.16-reiserfspatches-immutable/kernel/sysctl.c	Mon Dec 10 13:12:58 2001
+++ linux-2.4.16-reiserfspatches-immutable-ctx4/kernel/sysctl.c	Mon Dec 10 15:01:51 2001
@@ -378,6 +378,7 @@
 
 static int test_perm(int mode, int op)
 {
+	if (!capable(CAP_SYS_ADMIN)) mode &= ~(0222);
 	if (!current->euid)
 		mode >>= 6;
 	else if (in_egroup_p(0))
@@ -792,7 +793,18 @@
 		  void *buffer, size_t *lenp)
 {
 	int r;
+	ctl_table tmp;
 
+	/* HACK for per s_context hostname and domainname */
+	if (current->s_info != NULL){
+		tmp = *table;
+		table = &tmp;
+		if (table->data == (void*)&system_utsname.nodename){
+			tmp.data = &current->s_info->nodename;
+		}else if (table->data == (void*)&system_utsname.domainname){
+			tmp.data = &current->s_info->domainname;
+		}
+	}
 	if (!write) {
 		down_read(&uts_sem);
 		r=proc_dostring(table,0,filp,buffer,lenp);
diff -urN linux-2.4.16-reiserfspatches-immutable/kernel/timer.c linux-2.4.16-reiserfspatches-immutable-ctx4/kernel/timer.c
--- linux-2.4.16-reiserfspatches-immutable/kernel/timer.c	Mon Dec 10 13:12:01 2001
+++ linux-2.4.16-reiserfspatches-immutable-ctx4/kernel/timer.c	Mon Dec 10 15:01:51 2001
@@ -583,6 +583,11 @@
 
 	update_one_process(p, user_tick, system, cpu);
 	if (p->pid) {
+		if (p->s_info != NULL
+			&& (p->s_info->flags & S_CTX_INFO_SCHED)!=0){
+			// atomic_sub (ticks*p->s_info->refcount, &p->s_info->ticks);
+			atomic_dec (&p->s_info->ticks);
+		}
 		if (--p->counter <= 0) {
 			p->counter = 0;
 			p->need_resched = 1;
diff -urN linux-2.4.16-reiserfspatches-immutable/net/ipv4/af_inet.c linux-2.4.16-reiserfspatches-immutable-ctx4/net/ipv4/af_inet.c
--- linux-2.4.16-reiserfspatches-immutable/net/ipv4/af_inet.c	Mon Dec 10 13:12:38 2001
+++ linux-2.4.16-reiserfspatches-immutable-ctx4/net/ipv4/af_inet.c	Mon Dec 10 15:01:51 2001
@@ -477,6 +477,7 @@
 	unsigned short snum;
 	int chk_addr_ret;
 	int err;
+	__u32 s_addr;
 
 	/* If the socket has its own bind function then use it. (RAW) */
 	if(sk->prot->bind)
@@ -485,7 +486,17 @@
 	if (addr_len < sizeof(struct sockaddr_in))
 		return -EINVAL;
 
-	chk_addr_ret = inet_addr_type(addr->sin_addr.s_addr);
+	s_addr = addr->sin_addr.s_addr;
+	if (current->ipv4root != 0){
+		// printk ("ipv4root0 %08lx %08x\n",current->ipv4root,s_addr);
+		if (s_addr == 0){
+			s_addr = current->ipv4root;
+		}else if (s_addr != current->ipv4root){
+			return -EADDRNOTAVAIL;
+		}
+	}
+	chk_addr_ret = inet_addr_type(s_addr);
+	// printk ("ipv4root %08lx %08x %d\n",current->ipv4root,s_addr,chk_addr_ret);
 
 	/* Not specified by any standard per-se, however it breaks too
 	 * many applications when removed.  It is unfortunate since
@@ -496,7 +507,7 @@
 	 */
 	if (sysctl_ip_nonlocal_bind == 0 && 
 	    sk->protinfo.af_inet.freebind == 0 &&
-	    addr->sin_addr.s_addr != INADDR_ANY &&
+	    s_addr != INADDR_ANY &&
 	    chk_addr_ret != RTN_LOCAL &&
 	    chk_addr_ret != RTN_MULTICAST &&
 	    chk_addr_ret != RTN_BROADCAST)
@@ -521,7 +532,7 @@
 	    (sk->num != 0))
 		goto out;
 
-	sk->rcv_saddr = sk->saddr = addr->sin_addr.s_addr;
+	sk->rcv_saddr = sk->saddr = s_addr;
 	if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
 		sk->saddr = 0;  /* Use device */
 
diff -urN linux-2.4.16-reiserfspatches-immutable/net/socket.c linux-2.4.16-reiserfspatches-immutable-ctx4/net/socket.c
--- linux-2.4.16-reiserfspatches-immutable/net/socket.c	Mon Dec 10 13:12:28 2001
+++ linux-2.4.16-reiserfspatches-immutable-ctx4/net/socket.c	Mon Dec 10 15:01:51 2001
@@ -1765,3 +1765,15 @@
 		len = 0;
 	return len;
 }
+
+asmlinkage int sys_set_ipv4root (unsigned long ip)
+{
+	int ret = -EPERM;
+	if (current->ipv4root == 0
+		|| capable(CAP_SYS_ADMIN)){
+		ret = 0;
+		current->ipv4root = ip;
+	}
+	return ret;
+}
+