/* -*- mode: c; c-basic-offset: 8; -*-
 * vim: noexpandtab sw=8 ts=8 sts=0:
 *
 * libocfs2ne.c
 *
 * Shared routines for the ocfs2 tunefs utility
 *
 * Copyright (C) 2004, 2008 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License version 2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 */

#define _LARGEFILE64_SOURCE
#define _GNU_SOURCE /* for getopt_long and O_DIRECT */

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <fcntl.h>
#include <signal.h>
#include <errno.h>
#include <inttypes.h>
#include <limits.h>
#include <getopt.h>
#include <assert.h>

#include "ocfs2/ocfs2.h"
#include "ocfs2/bitops.h"

#include "libocfs2ne.h"

#define WHOAMI "tunefs.ocfs2"


/*
 * Keeps track of how ocfs2ne sees the filesystem.  This structure is
 * filled in by the master ocfs2_filesys (the first caller to
 * tunefs_open()).  Every other ocfs2_filesys refers to it.
 */
struct tunefs_filesystem_state {
	/* The master ocfs2_filesys (first tunefs_open()) */
	ocfs2_filesys	*ts_master;

	/*
	 * When a single-node (local) filesystem is opened, we prevent
	 * concurrent mount(2) by opening the device O_EXCL.  This is the
	 * fd we used.  The value is -1 for cluster-aware filesystems.
	 */
	int		ts_local_fd;

	/*
	 * Already-mounted filesystems can only do online operations.
	 * This is the fd we send ioctl(2)s to.  If the filesystem isn't
	 * in use, this is -1.
	 */
	int		ts_online_fd;

	/*
	 * Do we have the cluster locked?  This can be zero if we're a
	 * local filesystem.  If it is non-zero, ts_master->fs_dlm_ctxt
	 * must be valid.
	 */
	int		ts_cluster_locked;

	/* Non-zero if we've ever mucked with the allocator */
	int		ts_allocation;

	/*
	 * Number of clusters in the filesystem.  If changed by a
	 * resized filesystem, it is tracked here and used at final
	 * close.
	 */
	uint32_t	ts_fs_clusters;

	/* Size of the largest journal seen in tunefs_journal_check() */
	uint32_t	ts_journal_clusters;

	/* Journal feature bits found during tunefs_journal_check() */
	ocfs2_fs_options	ts_journal_features;
};

struct tunefs_private {
	struct list_head		tp_list;
	ocfs2_filesys			*tp_fs;

	/* All tunefs_privates point to the master state. */
	struct tunefs_filesystem_state	*tp_state;

	/* Flags passed to tunefs_open() for this ocfs2_filesys */
	int				tp_open_flags;
};

/* List of all ocfs2_filesys objects opened by tunefs_open() */
static LIST_HEAD(fs_list);

/* Refcount for calls to tunefs_[un]block_signals() */
static unsigned int blocked_signals_count;

/* For DEBUG_EXE programs */
static const char *usage_string;


/*
 * Code to manage the fs_private state.
 */

static inline struct tunefs_private *to_private(ocfs2_filesys *fs)
{
	return fs->fs_private;
}

static struct tunefs_filesystem_state *tunefs_get_master_state(void)
{
	struct tunefs_filesystem_state *s = NULL;
	struct tunefs_private *tp;

	if (!list_empty(&fs_list)) {
		tp = list_entry(fs_list.prev, struct tunefs_private,
			       tp_list);
		s = tp->tp_state;
	}

	return s;
}

static struct tunefs_filesystem_state *tunefs_get_state(ocfs2_filesys *fs)
{
	struct tunefs_private *tp = to_private(fs);

	return tp->tp_state;
}

static errcode_t tunefs_set_state(ocfs2_filesys *fs)
{
	errcode_t err = 0;
	struct tunefs_private *tp = to_private(fs);
	struct tunefs_filesystem_state *s = tunefs_get_master_state();

	if (!s) {
		err = ocfs2_malloc0(sizeof(struct tunefs_filesystem_state),
				    &s);
		if (!err) {
			s->ts_local_fd = -1;
			s->ts_online_fd = -1;
			s->ts_master = fs;
			s->ts_fs_clusters = fs->fs_clusters;
		} else
			s = NULL;
	}

	tp->tp_state = s;

	return err;
}


/*
 * Functions for use by operations.
 */

/* Call this with SIG_BLOCK to block and SIG_UNBLOCK to unblock */
static void block_signals(int how)
{
     sigset_t sigs;

     sigfillset(&sigs);
     sigdelset(&sigs, SIGTRAP);
     sigdelset(&sigs, SIGSEGV);
     sigprocmask(how, &sigs, NULL);
}

void tunefs_block_signals(void)
{
	if (!blocked_signals_count)
		block_signals(SIG_BLOCK);
	blocked_signals_count++;
}

void tunefs_unblock_signals(void)
{
	if (blocked_signals_count) {
		blocked_signals_count--;
		if (!blocked_signals_count)
			block_signals(SIG_UNBLOCK);
	} else
		errorf("Trying to unblock signals, but signals were not "
		       "blocked\n");
}

errcode_t tunefs_dlm_lock(ocfs2_filesys *fs, const char *lockid,
			  int flags, enum o2dlm_lock_level level)
{
	struct tunefs_filesystem_state *state = tunefs_get_state(fs);

	if (ocfs2_mount_local(fs))
		return 0;

	return o2dlm_lock(state->ts_master->fs_dlm_ctxt, lockid, flags,
			  level);
}

errcode_t tunefs_dlm_unlock(ocfs2_filesys *fs, char *lockid)
{
	struct tunefs_filesystem_state *state = tunefs_get_state(fs);

	if (ocfs2_mount_local(fs))
		return 0;

	return o2dlm_unlock(state->ts_master->fs_dlm_ctxt, lockid);
}

errcode_t tunefs_online_ioctl(ocfs2_filesys *fs, int op, void *arg)
{
	int rc;
	struct tunefs_filesystem_state *state = tunefs_get_state(fs);

	if (state->ts_online_fd < 0)
		return TUNEFS_ET_INTERNAL_FAILURE;

	rc = ioctl(state->ts_online_fd, op, arg);
	if (rc) {
		switch (errno) {
			case EBADF:
			case EFAULT:
				return TUNEFS_ET_INTERNAL_FAILURE;
				break;

			case ENOTTY:
				return TUNEFS_ET_ONLINE_NOT_SUPPORTED;
				break;

			default:
				return TUNEFS_ET_ONLINE_FAILED;
				break;
		}
	}

	return 0;
}

errcode_t tunefs_get_number(char *arg, uint64_t *res)
{
	char *ptr = NULL;
	uint64_t num;

	num = strtoull(arg, &ptr, 0);

	if ((ptr == arg) || (num == UINT64_MAX))
		return TUNEFS_ET_INVALID_NUMBER;

	switch (*ptr) {
	case '\0':
		break;

	case 'p':
	case 'P':
		num *= 1024;
		/* FALL THROUGH */

	case 't':
	case 'T':
		num *= 1024;
		/* FALL THROUGH */

	case 'g':
	case 'G':
		num *= 1024;
		/* FALL THROUGH */

	case 'm':
	case 'M':
		num *= 1024;
		/* FALL THROUGH */

	case 'k':
	case 'K':
		num *= 1024;
		/* FALL THROUGH */

	case 'b':
	case 'B':
		break;

	default:
		return TUNEFS_ET_INVALID_NUMBER;
	}

	*res = num;

	return 0;
}

errcode_t tunefs_set_in_progress(ocfs2_filesys *fs, int flag)
{
	/* RESIZE is a special case due for historical reasons */
	if (flag == OCFS2_FEATURE_INCOMPAT_RESIZE_INPROG) {
		OCFS2_RAW_SB(fs->fs_super)->s_feature_incompat |=
			OCFS2_FEATURE_INCOMPAT_RESIZE_INPROG;
	} else {
		OCFS2_RAW_SB(fs->fs_super)->s_feature_incompat |=
			OCFS2_FEATURE_INCOMPAT_TUNEFS_INPROG;
		OCFS2_RAW_SB(fs->fs_super)->s_tunefs_flag |= flag;
	}

	return ocfs2_write_primary_super(fs);
}

errcode_t tunefs_clear_in_progress(ocfs2_filesys *fs, int flag)
{
	/* RESIZE is a special case due for historical reasons */
	if (flag == OCFS2_FEATURE_INCOMPAT_RESIZE_INPROG) {
		OCFS2_RAW_SB(fs->fs_super)->s_feature_incompat &=
			~OCFS2_FEATURE_INCOMPAT_RESIZE_INPROG;
	} else {
		OCFS2_RAW_SB(fs->fs_super)->s_tunefs_flag &= ~flag;
		if (OCFS2_RAW_SB(fs->fs_super)->s_tunefs_flag == 0)
			OCFS2_RAW_SB(fs->fs_super)->s_feature_incompat &=
				~OCFS2_FEATURE_INCOMPAT_TUNEFS_INPROG;
	}

	return ocfs2_write_primary_super(fs);
}

errcode_t tunefs_set_journal_size(ocfs2_filesys *fs, uint64_t new_size)
{
	errcode_t ret = 0;
	char jrnl_file[OCFS2_MAX_FILENAME_LEN];
	uint64_t blkno;
	int i;
	int max_slots = OCFS2_RAW_SB(fs->fs_super)->s_max_slots;
	uint32_t num_clusters;
	char *buf = NULL;
	struct ocfs2_dinode *di;
	struct tunefs_filesystem_state *state = tunefs_get_state(fs);
	struct tools_progress *prog;

	num_clusters =
		ocfs2_clusters_in_blocks(fs,
					 ocfs2_blocks_in_bytes(fs,
							       new_size));

	/* If no size was passed in, use the size we found at open() */
	if (!num_clusters)
		num_clusters = state->ts_journal_clusters;

	/*
	 * This can't come from a NOCLUSTER operation, so we'd better
	 * have a size in ts_journal_clusters
	 */
	assert(num_clusters);

	ret = ocfs2_malloc_block(fs->fs_io, &buf);
	if (ret) {
		verbosef(VL_LIB,
			 "%s while allocating inode buffer for journal "
			 "resize\n",
			 error_message(ret));
		return ret;
	}

	prog = tools_progress_start("Setting journal size", "jsize",
				    max_slots);
	if (!prog) {
		ret = TUNEFS_ET_NO_MEMORY;
		verbosef(VL_LIB,
			 "%s while initializing progress display for "
			 "journal resize\n",
			 error_message(ret));
		return ret;
	}

	for (i = 0; i < max_slots; ++i) {
		ocfs2_sprintf_system_inode_name(jrnl_file,
						OCFS2_MAX_FILENAME_LEN,
						JOURNAL_SYSTEM_INODE, i);
		ret = ocfs2_lookup_system_inode(fs, JOURNAL_SYSTEM_INODE, i,
						&blkno);
		if (ret) {
			verbosef(VL_LIB,
				 "%s while looking up \"%s\" during "
				 "journal resize\n",
				 error_message(ret),
				 jrnl_file);
			goto bail;
		}

		ret = ocfs2_read_inode(fs, blkno, buf);
		if (ret) {
			verbosef(VL_LIB,
				 "%s while reading journal inode "
				 "%"PRIu64" for resizing\n",
				 error_message(ret), blkno);
			goto bail;
		}

		di = (struct ocfs2_dinode *)buf;
		if (num_clusters == di->i_clusters) {
			tools_progress_step(prog, 1);
			continue;
		}

		verbosef(VL_LIB,
			 "Resizing journal \"%s\" to %"PRIu32" clusters\n",
			 jrnl_file, num_clusters);
		ret = ocfs2_make_journal(fs, blkno, num_clusters,
					 &state->ts_journal_features);
		if (ret) {
			verbosef(VL_LIB,
				 "%s while resizing \"%s\" at block "
				 "%"PRIu64" to %"PRIu32" clusters\n",
				 error_message(ret), jrnl_file, blkno,
				 num_clusters);
			goto bail;
		}
		verbosef(VL_LIB, "Successfully resized journal \"%s\"\n",
			 jrnl_file);
		tools_progress_step(prog, 1);
	}

bail:
	tools_progress_stop(prog);
	if (buf)
		ocfs2_free(&buf);

	return ret;
}

errcode_t tunefs_empty_clusters(ocfs2_filesys *fs, uint64_t start_blk,
				uint32_t num_clusters)
{
	errcode_t ret;
	char *buf = NULL;
	uint64_t bpc = ocfs2_clusters_to_blocks(fs, 1);
	uint64_t total_blocks = ocfs2_clusters_to_blocks(fs, num_clusters);
	uint64_t io_blocks = total_blocks;

	ret = ocfs2_malloc_blocks(fs->fs_io, io_blocks, &buf);
	if (ret == OCFS2_ET_NO_MEMORY) {
		io_blocks = bpc;
		ret = ocfs2_malloc_blocks(fs->fs_io, io_blocks, &buf);
	}
	if (ret)
		goto bail;

	memset(buf, 0, io_blocks * fs->fs_blocksize);

	while (total_blocks) {
		ret = io_write_block_nocache(fs->fs_io, start_blk,
					     io_blocks, buf);
		if (ret)
			goto bail;

		total_blocks -= io_blocks;
		start_blk += io_blocks;
	}

bail:
	if (buf)
		ocfs2_free(&buf);

	return ret;
}

errcode_t tunefs_get_free_clusters(ocfs2_filesys *fs, uint32_t *clusters)
{
	errcode_t ret;
	uint64_t blkno;
	char *buf = NULL;
	struct ocfs2_dinode *di = NULL;

	ret = ocfs2_malloc_block(fs->fs_io, &buf);
	if (ret)
		goto bail;

	ret = ocfs2_lookup_system_inode(fs, GLOBAL_BITMAP_SYSTEM_INODE,
					0, &blkno);
	if (ret)
		goto bail;

	ret = ocfs2_read_inode(fs, blkno, buf);
	if (ret)
		goto bail;

	di = (struct ocfs2_dinode *)buf;
	if (clusters)
		*clusters = di->id1.bitmap1.i_total - di->id1.bitmap1.i_used;
bail:
	if (buf)
		ocfs2_free(&buf);
	return ret;
}

static errcode_t tunefs_validate_inode(ocfs2_filesys *fs,
				       struct ocfs2_dinode *di)
{
	if (memcmp(di->i_signature, OCFS2_INODE_SIGNATURE,
		    strlen(OCFS2_INODE_SIGNATURE)))
		return OCFS2_ET_BAD_INODE_MAGIC;

	ocfs2_swap_inode_to_cpu(fs, di);

	if (di->i_fs_generation != fs->fs_super->i_fs_generation)
		return OCFS2_ET_INODE_NOT_VALID;

	if (!(di->i_flags & OCFS2_VALID_FL))
		return OCFS2_ET_INODE_NOT_VALID;

	return 0;
}

errcode_t tunefs_foreach_inode(ocfs2_filesys *fs,
			       errcode_t (*func)(ocfs2_filesys *fs,
						 struct ocfs2_dinode *di,
						 void *user_data),
			       void *user_data)
{
	errcode_t ret;
	uint64_t blkno;
	char *buf;
	struct ocfs2_dinode *di;
	ocfs2_inode_scan *scan;

	ret = ocfs2_malloc_block(fs->fs_io, &buf);
	if (ret) {
		verbosef(VL_LIB,
			 "%s while allocating a buffer for inode scanning\n",
			 error_message(ret));
		goto out;
	}

	di = (struct ocfs2_dinode *)buf;

	ret = ocfs2_open_inode_scan(fs, &scan);
	if (ret) {
		verbosef(VL_LIB,
			 "%s while opening inode scan\n",
			 error_message(ret));
		goto out_free;
	}

	for(;;) {
		ret = ocfs2_get_next_inode(scan, &blkno, buf);
		if (ret) {
			verbosef(VL_LIB, "%s while getting next inode\n",
				 error_message(ret));
			break;
		}
		if (blkno == 0)
			break;

		ret = tunefs_validate_inode(fs, di);
		if (ret)
			continue;

		if (func) {
			ret = func(fs, di, user_data);
			if (ret)
				break;
		}
	}

	ocfs2_close_inode_scan(scan);
out_free:
	ocfs2_free(&buf);

out:
	return ret;
}

/* A dirblock we have to add a trailer to */
struct tunefs_trailer_dirblock {
	struct list_head db_list;
	uint64_t db_blkno;
	char *db_buf;

	/*
	 * These require a little explanation.  They point to
	 * ocfs2_dir_entry structures inside db_buf.
	 *
	 * db_last is the entry we're going to *keep*.  If the last
	 * entry in the dirblock has enough extra rec_len to allow the
	 * trailer, db_last points to it.  We will shorten its rec_len
	 * and insert the trailer.
	 *
	 * However, if the last entry in the dirblock cannot be
	 * truncated, db_last points to the entry before that - the
	 * last entry we're keeping in this dirblock.
	 *
	 * Examples:
	 *
	 * - The last entry in the dirblock has a name_len of 1 and a
	 *   rec_len of 128.  We can easily change the rec_len to 64 and
	 *   insert the trailer.  db_last points to this entry.
	 *
	 * - The last entry in the dirblock has a name_len of 1 and a
	 *   rec_len of 48.  The previous entry has a name_len of 1 and a
	 *   rec_len of 32.  We have to move the last entry out.  The
	 *   second-to-last entry can have its rec_len truncated to 16, so
	 *   we put it in db_last.
	 */
	struct ocfs2_dir_entry *db_last;
};

void tunefs_trailer_context_free(struct tunefs_trailer_context *tc)
{
	struct tunefs_trailer_dirblock *db;
	struct list_head *n, *pos;

	if (!list_empty(&tc->d_list))
		list_del(&tc->d_list);

	list_for_each_safe(pos, n, &tc->d_dirblocks) {
		db = list_entry(pos, struct tunefs_trailer_dirblock, db_list);
		list_del(&db->db_list);
		ocfs2_free(&db->db_buf);
		ocfs2_free(&db);
	}

	ocfs2_free(&tc);
}

/*
 * We're calculating how many bytes we need to add to make space for
 * the dir trailers.  But we need to make sure that the added directory
 * blocks also have room for a trailer.
 */
static void add_bytes_needed(ocfs2_filesys *fs,
			     struct tunefs_trailer_context *tc,
			     unsigned int rec_len)
{
	unsigned int toff = ocfs2_dir_trailer_blk_off(fs);
	unsigned int block_offset = tc->d_bytes_needed % fs->fs_blocksize;

	/*
	 * If the current byte offset would put us into a trailer, push
	 * it out to the start of the next block.  Remember, dirents have
	 * to be at least 16 bytes, which is why we check against the
	 * smallest rec_len.
	 */
	if ((block_offset + rec_len) > (toff - OCFS2_DIR_REC_LEN(1)))
		tc->d_bytes_needed += fs->fs_blocksize - block_offset;

	tc->d_bytes_needed += rec_len;
	tc->d_blocks_needed =
		ocfs2_blocks_in_bytes(fs, tc->d_bytes_needed);
}

static errcode_t walk_dirblock(ocfs2_filesys *fs,
			       struct tunefs_trailer_context *tc,
			       struct tunefs_trailer_dirblock *db)
{
	errcode_t ret = 0;
	struct ocfs2_dir_entry *dirent, *prev = NULL;
	unsigned int real_rec_len;
	unsigned int offset = 0;
	unsigned int toff = ocfs2_dir_trailer_blk_off(fs);

	while (offset < fs->fs_blocksize) {
		dirent = (struct ocfs2_dir_entry *) (db->db_buf + offset);
		if (((offset + dirent->rec_len) > fs->fs_blocksize) ||
		    (dirent->rec_len < 8) ||
		    ((dirent->rec_len % 4) != 0) ||
		    (((dirent->name_len & 0xFF)+8) > dirent->rec_len)) {
			ret = OCFS2_ET_DIR_CORRUPTED;
			break;
		}

		real_rec_len = dirent->inode ?
			OCFS2_DIR_REC_LEN(dirent->name_len) :
			OCFS2_DIR_REC_LEN(1);
		if ((offset + real_rec_len) <= toff)
			goto next;

		/*
		 * The first time through, we store off the last dirent
		 * before the trailer.
		 */
		if (!db->db_last)
			db->db_last = prev;

		/* Only live dirents need to be moved */
		if (dirent->inode) {
			verbosef(VL_DEBUG,
				 "Will move dirent %.*s out of "
				 "directory block %"PRIu64" to make way "
				 "for the trailer\n",
				 dirent->name_len, dirent->name,
				 db->db_blkno);
			add_bytes_needed(fs, tc, real_rec_len);
		}

next:
		prev = dirent;
		offset += dirent->rec_len;
	}

	/* There were no dirents across the boundary */
	if (!db->db_last)
		db->db_last = prev;

	return ret;
}

static int dirblock_scan_iterate(ocfs2_filesys *fs, uint64_t blkno,
				 uint64_t bcount, uint16_t ext_flags,
				 void *priv_data)
{
	errcode_t ret = 0;
	struct tunefs_trailer_dirblock *db = NULL;
	struct tunefs_trailer_context *tc = priv_data;

	ret = ocfs2_malloc0(sizeof(struct tunefs_trailer_dirblock), &db);
	if (ret)
		goto out;

	ret = ocfs2_malloc_block(fs->fs_io, &db->db_buf);
	if (ret)
		goto out;

	db->db_blkno = blkno;

	verbosef(VL_DEBUG,
		 "Reading dinode %"PRIu64" dirblock %"PRIu64" at block "
		 "%"PRIu64"\n",
		 tc->d_di->i_blkno, bcount, blkno);
	ret = ocfs2_read_dir_block(fs, tc->d_di, blkno, db->db_buf);
	if (ret)
		goto out;

	ret = walk_dirblock(fs, tc, db);
	if (ret)
		goto out;

	list_add_tail(&db->db_list, &tc->d_dirblocks);
	db = NULL;

out:
	if (db) {
		if (db->db_buf)
			ocfs2_free(&db->db_buf);
		ocfs2_free(&db);
	}

	if (ret) {
		tc->d_err = ret;
		return OCFS2_BLOCK_ABORT;
	}

	return 0;
}

errcode_t tunefs_prepare_dir_trailer(ocfs2_filesys *fs,
				     struct ocfs2_dinode *di,
				     struct tunefs_trailer_context **tc_ret)
{
	errcode_t ret = 0;
	struct tunefs_trailer_context *tc = NULL;

	if (ocfs2_dir_has_trailer(fs, di))
		goto out;

	ret = ocfs2_malloc0(sizeof(struct tunefs_trailer_context), &tc);
	if (ret)
		goto out;

	tc->d_blkno = di->i_blkno;
	tc->d_di = di;
	INIT_LIST_HEAD(&tc->d_list);
	INIT_LIST_HEAD(&tc->d_dirblocks);

	ret = ocfs2_block_iterate_inode(fs, tc->d_di, 0,
					dirblock_scan_iterate, tc);
	if (!ret)
		ret = tc->d_err;
	if (ret)
		goto out;

	*tc_ret = tc;
	tc = NULL;

out:
	if (tc)
		tunefs_trailer_context_free(tc);

	return ret;
}

/*
 * We are hand-coding the directory expansion because we're going to
 * build the new directory blocks ourselves.  We can't just use
 * ocfs2_expand_dir() and ocfs2_link(), because we're moving around
 * entries.
 */
static errcode_t expand_dir_if_needed(ocfs2_filesys *fs,
				      struct ocfs2_dinode *di,
				      uint64_t blocks_needed)
{
	errcode_t ret = 0;
	uint64_t used_blocks, total_blocks;
	uint32_t clusters_needed;

	/* This relies on the fact that i_size of a directory is a
	 * multiple of blocksize */
	used_blocks = ocfs2_blocks_in_bytes(fs, di->i_size);
	total_blocks = ocfs2_clusters_to_blocks(fs, di->i_clusters);
	if ((used_blocks + blocks_needed) <= total_blocks)
		goto out;

	clusters_needed =
		ocfs2_clusters_in_blocks(fs,
					 (used_blocks + blocks_needed) -
					 total_blocks);
	ret = ocfs2_extend_allocation(fs, di->i_blkno, clusters_needed);
	if (ret)
		goto out;

	/* Pick up changes to the inode */
	ret = ocfs2_read_inode(fs, di->i_blkno, (char *)di);

out:
	return ret;
}

static void shift_dirent(ocfs2_filesys *fs,
			 struct tunefs_trailer_context *tc,
			 struct ocfs2_dir_entry *dirent)
{
	/* Using the real rec_len */
	unsigned int rec_len = OCFS2_DIR_REC_LEN(dirent->name_len);
	unsigned int offset, remain;

	/*
	 * If the current byte offset would put us into a trailer, push
	 * it out to the start of the next block.  Remember, dirents have
	 * to be at least 16 bytes, which is why we check against the
	 * smallest rec_len.
	 */
	if (rec_len > (tc->d_next_dirent->rec_len - OCFS2_DIR_REC_LEN(1))) {
		tc->d_cur_block += fs->fs_blocksize;
		tc->d_next_dirent = (struct ocfs2_dir_entry *)tc->d_cur_block;
	}

	assert(ocfs2_blocks_in_bytes(fs,
				     tc->d_cur_block - tc->d_new_blocks) <
	       tc->d_blocks_needed);

	offset = (char *)(tc->d_next_dirent) - tc->d_cur_block;
	remain = tc->d_next_dirent->rec_len - rec_len;

	memcpy(tc->d_cur_block + offset, dirent, rec_len);
	tc->d_next_dirent->rec_len = rec_len;

	verbosef(VL_DEBUG,
		 "Installed dirent %.*s at offset %u of new block "
		 "%"PRIu64", rec_len %u\n",
		 tc->d_next_dirent->name_len, tc->d_next_dirent->name,
		 offset,
		 ocfs2_blocks_in_bytes(fs, tc->d_cur_block - tc->d_new_blocks),
		 rec_len);


	offset += rec_len;
	tc->d_next_dirent =
		(struct ocfs2_dir_entry *)(tc->d_cur_block + offset);
	tc->d_next_dirent->rec_len = remain;

	verbosef(VL_DEBUG,
		 "New block %"PRIu64" has its last dirent at %u, with %u "
		 "bytes left\n",
		 ocfs2_blocks_in_bytes(fs, tc->d_cur_block - tc->d_new_blocks),
		 offset, remain);
}

static errcode_t fixup_dirblock(ocfs2_filesys *fs,
				struct tunefs_trailer_context *tc,
				struct tunefs_trailer_dirblock *db)
{
	errcode_t ret = 0;
	struct ocfs2_dir_entry *dirent;
	unsigned int real_rec_len;
	unsigned int offset;
	unsigned int toff = ocfs2_dir_trailer_blk_off(fs);

	/*
	 * db_last is the last dirent we're *keeping*.  So we need to 
	 * move out every valid dirent *after* db_last.
	 *
	 * tunefs_prepare_dir_trailer() should have calculated this
	 * correctly.
	 */
	offset = ((char *)db->db_last) - db->db_buf;
	offset += db->db_last->rec_len;
	while (offset < fs->fs_blocksize) {
		dirent = (struct ocfs2_dir_entry *) (db->db_buf + offset);
		if (((offset + dirent->rec_len) > fs->fs_blocksize) ||
		    (dirent->rec_len < 8) ||
		    ((dirent->rec_len % 4) != 0) ||
		    (((dirent->name_len & 0xFF)+8) > dirent->rec_len)) {
			ret = OCFS2_ET_DIR_CORRUPTED;
			break;
		}

		real_rec_len = dirent->inode ?
			OCFS2_DIR_REC_LEN(dirent->name_len) :
			OCFS2_DIR_REC_LEN(1);

		assert((offset + real_rec_len) > toff);

		/* Only live dirents need to be moved */
		if (dirent->inode) {
			verbosef(VL_DEBUG,
				 "Moving dirent %.*s out of directory "
				 "block %"PRIu64" to make way for the "
				 "trailer\n",
				 dirent->name_len, dirent->name,
				 db->db_blkno);
			shift_dirent(fs, tc, dirent);
		}

		offset += dirent->rec_len;
	}

	/*
	 * Now that we've moved any dirents out of the way, we need to
	 * fix up db_last and install the trailer.
	 */
	offset = ((char *)db->db_last) - db->db_buf;
	verbosef(VL_DEBUG,
		 "Last valid dirent of directory block %"PRIu64" "
		 "(\"%.*s\") is %u bytes in.  Setting rec_len to %u and "
		 "installing the trailer\n",
		 db->db_blkno, db->db_last->name_len, db->db_last->name,
		 offset, toff - offset);
	db->db_last->rec_len = toff - offset;
	ocfs2_init_dir_trailer(fs, tc->d_di, db->db_blkno, db->db_buf);

	return ret;
}

static errcode_t run_dirblocks(ocfs2_filesys *fs,
			       struct tunefs_trailer_context *tc)
{
	errcode_t ret = 0;
	struct list_head *pos;
	struct tunefs_trailer_dirblock *db;

	list_for_each(pos, &tc->d_dirblocks) {
		db = list_entry(pos, struct tunefs_trailer_dirblock, db_list);
		ret = fixup_dirblock(fs, tc, db);
		if (ret)
			break;
	}

	return ret;
}

static errcode_t write_dirblocks(ocfs2_filesys *fs,
				 struct tunefs_trailer_context *tc)
{
	errcode_t ret = 0;
	struct list_head *pos;
	struct tunefs_trailer_dirblock *db;

	list_for_each(pos, &tc->d_dirblocks) {
		db = list_entry(pos, struct tunefs_trailer_dirblock, db_list);
		ret = ocfs2_write_dir_block(fs, tc->d_di, db->db_blkno,
					    db->db_buf);
		if (ret) {
			verbosef(VL_DEBUG,
				 "Error writing dirblock %"PRIu64"\n",
				 db->db_blkno);
			break;
		}
	}

	return ret;
}

static errcode_t init_new_dirblocks(ocfs2_filesys *fs,
				    struct tunefs_trailer_context *tc)
{
	int i;
	errcode_t ret;
	uint64_t blkno;
	uint64_t orig_block = ocfs2_blocks_in_bytes(fs, tc->d_di->i_size);
	ocfs2_cached_inode *cinode;
	char *blockptr;
	struct ocfs2_dir_entry *first;

	ret = ocfs2_read_cached_inode(fs, tc->d_blkno, &cinode);
	if (ret)
		goto out;
	assert(!memcmp(tc->d_di, cinode->ci_inode, fs->fs_blocksize));

	for (i = 0; i < tc->d_blocks_needed; i++) {
		ret = ocfs2_extent_map_get_blocks(cinode, orig_block + i,
						  1, &blkno, NULL, NULL);
		if (ret)
			goto out;
		blockptr = tc->d_new_blocks + (i * fs->fs_blocksize);
		memset(blockptr, 0, fs->fs_blocksize);
		first = (struct ocfs2_dir_entry *)blockptr;
		first->rec_len = ocfs2_dir_trailer_blk_off(fs);
		ocfs2_init_dir_trailer(fs, tc->d_di, blkno, blockptr);
	}

out:
	return ret;
}

static errcode_t write_new_dirblocks(ocfs2_filesys *fs,
				     struct tunefs_trailer_context *tc)
{
	int i;
	errcode_t ret;
	uint64_t blkno;
	uint64_t orig_block = ocfs2_blocks_in_bytes(fs, tc->d_di->i_size);
	ocfs2_cached_inode *cinode;
	char *blockptr;

	ret = ocfs2_read_cached_inode(fs, tc->d_blkno, &cinode);
	if (ret)
		goto out;
	assert(!memcmp(tc->d_di, cinode->ci_inode, fs->fs_blocksize));

	for (i = 0; i < tc->d_blocks_needed; i++) {
		ret = ocfs2_extent_map_get_blocks(cinode, orig_block + i,
						  1, &blkno, NULL, NULL);
		if (ret)
			goto out;
		blockptr = tc->d_new_blocks + (i * fs->fs_blocksize);
		ret = ocfs2_write_dir_block(fs, tc->d_di, blkno, blockptr);
		if (ret) {
			verbosef(VL_DEBUG,
				 "Error writing dirblock %"PRIu64"\n",
				 blkno);
			goto out;
		}
	}

out:
	return ret;
}

errcode_t tunefs_install_dir_trailer(ocfs2_filesys *fs,
					struct ocfs2_dinode *di,
					struct tunefs_trailer_context *tc)
{
	errcode_t ret = 0;
	struct tunefs_trailer_context *our_tc = NULL;

	if ((di->i_dyn_features & OCFS2_INLINE_DATA_FL) ||
	    ocfs2_dir_has_trailer(fs, di))
		goto out;

	if (!tc) {
		ret = tunefs_prepare_dir_trailer(fs, di, &our_tc);
		if (ret)
			goto out;
		tc = our_tc;
	}

	if (tc->d_di != di) {
		ret = OCFS2_ET_INVALID_ARGUMENT;
		goto out;
	}

	if (tc->d_blocks_needed) {
		ret = ocfs2_malloc_blocks(fs->fs_io, tc->d_blocks_needed,
					  &tc->d_new_blocks);
		if (ret)
			goto out;

		tc->d_cur_block = tc->d_new_blocks;

		ret = expand_dir_if_needed(fs, di, tc->d_blocks_needed);
		if (ret)
			goto out;

		ret = init_new_dirblocks(fs, tc);
		if (ret)
			goto out;
		tc->d_next_dirent = (struct ocfs2_dir_entry *)tc->d_cur_block;
		verbosef(VL_DEBUG, "t_next_dirent has rec_len of %u\n",
			 tc->d_next_dirent->rec_len);
	}

	ret = run_dirblocks(fs, tc);
	if (ret)
		goto out;

	/*
	 * We write in a specific order.  We write any new dirblocks first
	 * so that they are on disk.  Then we write the new i_size in the
	 * inode.  If we crash at this point, the directory has duplicate
	 * entries but no lost entries.  fsck can clean it up.  Finally, we
	 * write the modified dirblocks with trailers.
	 */
	if (tc->d_blocks_needed) {
		ret = write_new_dirblocks(fs, tc);
		if (ret)
			goto out;

		di->i_size += ocfs2_blocks_to_bytes(fs, tc->d_blocks_needed);
		ret = ocfs2_write_inode(fs, di->i_blkno, (char *)di);
		if (ret)
			goto out;
	}

	ret = write_dirblocks(fs, tc);

out:
	if (our_tc)
		tunefs_trailer_context_free(our_tc);
	return ret;
}

/*
 * Starting, opening, closing, and exiting.
 */

static void tunefs_close_all(void)
{
	struct list_head *pos, *n;
	struct tunefs_private *tp;

	list_for_each_safe(pos, n, &fs_list) {
		tp = list_entry(pos, struct tunefs_private, tp_list);
		tunefs_close(tp->tp_fs);
	}
}

static void handle_signal(int caught_sig)
{
	int exitp = 0, abortp = 0;
	static int segv_already = 0;

	switch (caught_sig) {
		case SIGQUIT:
			abortp = 1;
			/* FALL THROUGH */

		case SIGTERM:
		case SIGINT:
		case SIGHUP:
			errorf("Caught signal %d, exiting\n", caught_sig);
			exitp = 1;
			break;

		case SIGSEGV:
			errorf("Segmentation fault, exiting\n");
			exitp = 1;
			if (segv_already) {
				errorf("Segmentation fault loop detected\n");
				abortp = 1;
			} else
				segv_already = 1;
			break;

		default:
			errorf("Caught signal %d, ignoring\n", caught_sig);
			break;
	}

	if (!exitp)
		return;

	if (abortp)
		abort();

	tunefs_close_all();

	exit(1);
}

static int setup_signals(void)
{
	int rc = 0;
	struct sigaction act;

	act.sa_sigaction = NULL;
	sigemptyset(&act.sa_mask);
	act.sa_handler = handle_signal;
#ifdef SA_INTERRUPT
	act.sa_flags = SA_INTERRUPT;
#endif

	rc += sigaction(SIGTERM, &act, NULL);
	rc += sigaction(SIGINT, &act, NULL);
	rc += sigaction(SIGHUP, &act, NULL);
	rc += sigaction(SIGQUIT, &act, NULL);
	rc += sigaction(SIGSEGV, &act, NULL);
	act.sa_handler = SIG_IGN;
	rc += sigaction(SIGPIPE, &act, NULL);  /* Get EPIPE instead */

	return rc;
}

void tunefs_init(const char *argv0)
{
	initialize_o2ne_error_table();
	initialize_ocfs_error_table();
	initialize_o2dl_error_table();
	initialize_o2cb_error_table();

	tools_setup_argv0(argv0);

	setbuf(stdout, NULL);
	setbuf(stderr, NULL);

	if (setup_signals()) {
		errorf("%s\n", error_message(TUNEFS_ET_SIGNALS_FAILED));
		exit(1);
	}
}

/*
 * Single-node filesystems need to prevent mount(8) from happening
 * while tunefs.ocfs2 is running.  bd_claim does this for us when we
 * open O_EXCL.
 */
static errcode_t tunefs_lock_local(ocfs2_filesys *fs, int flags)
{
	errcode_t err = 0;
	int mount_flags;
	int rc;
	struct tunefs_filesystem_state *state = tunefs_get_state(fs);

	if (state->ts_local_fd > -1)
		return 0;

	rc = open64(fs->fs_devname, O_RDWR | O_EXCL);
	if (rc < 0) {
		if (errno == EBUSY) {
			/* bd_claim has a hold, let's see if it's ocfs2 */
			err = ocfs2_check_if_mounted(fs->fs_devname,
						     &mount_flags);
			if (!err) {
				if (!(mount_flags & OCFS2_MF_MOUNTED) ||
				    (mount_flags & OCFS2_MF_READONLY) ||
				    (mount_flags & OCFS2_MF_SWAP) ||
				    !(flags & TUNEFS_FLAG_ONLINE))
					err = TUNEFS_ET_DEVICE_BUSY;
				else
					err = TUNEFS_ET_PERFORM_ONLINE;
			}
		} else if (errno == ENOENT)
			err = OCFS2_ET_NAMED_DEVICE_NOT_FOUND;
		else
			err = OCFS2_ET_IO;
	} else
		state->ts_local_fd = rc;

	return err;
}

static void tunefs_unlock_local(ocfs2_filesys *fs)
{
	struct tunefs_filesystem_state *state = tunefs_get_state(fs);

	assert(state->ts_master == fs);
	if (state->ts_local_fd > -1) {
		close(state->ts_local_fd);  /* Don't care about errors */
		state->ts_local_fd = -1;
	}
}

static errcode_t tunefs_unlock_cluster(ocfs2_filesys *fs)
{
	errcode_t tmp, err = 0;
	struct tunefs_filesystem_state *state = tunefs_get_state(fs);
	struct tools_progress *prog = NULL;

	if (fs->fs_dlm_ctxt)
		prog = tools_progress_start("Unlocking filesystem",
					    "unlocking", 2);
	/*
	 * We continue even with no progress, because we're unlocking
	 * and probably exiting.
	 */

	assert(state->ts_master == fs);
	if (state->ts_cluster_locked) {
		assert(fs->fs_dlm_ctxt);

		tunefs_block_signals();
		err = ocfs2_release_cluster(fs);
		tunefs_unblock_signals();
		state->ts_cluster_locked = 0;
	}
	if (prog)
		tools_progress_step(prog, 1);

	/* We shut down the dlm regardless of err */
	if (fs->fs_dlm_ctxt) {
		tmp = ocfs2_shutdown_dlm(fs, WHOAMI);
		if (!err)
			err = tmp;
	}
	if (prog) {
		tools_progress_step(prog, 1);
		tools_progress_stop(prog);
	}

	return err;
}

/*
 * We only unlock if we're closing the master filesystem.  We unlock
 * both local and cluster locks, because we may have started as a local
 * filesystem, then switched to a cluster filesystem in the middle.
 */
static errcode_t tunefs_unlock_filesystem(ocfs2_filesys *fs)
{
	errcode_t err = 0;
	struct tunefs_filesystem_state *state = tunefs_get_state(fs);

	if (state->ts_master == fs) {
		tunefs_unlock_local(fs);
		err = tunefs_unlock_cluster(fs);
	}

	return err;
}

static errcode_t tunefs_lock_cluster(ocfs2_filesys *fs, int flags)
{
	errcode_t err = 0;
	struct tunefs_filesystem_state *state = tunefs_get_state(fs);
	ocfs2_filesys *master_fs = state->ts_master;
	struct tools_progress *prog = NULL;

	if (state->ts_cluster_locked)
		goto out;

	if (flags & TUNEFS_FLAG_SKIPCLUSTER) {
		err = TUNEFS_ET_CLUSTER_SKIPPED;
		goto out;
	}

	prog = tools_progress_start("Locking filesystem", "locking", 2);
	if (!prog) {
		err = TUNEFS_ET_NO_MEMORY;
		goto out;
	}

	if (!master_fs->fs_dlm_ctxt) {
		err = o2cb_init();
		if (err)
			goto out;

		err = ocfs2_initialize_dlm(master_fs, WHOAMI);
		if (flags & TUNEFS_FLAG_NOCLUSTER) {
			if (err == O2CB_ET_INVALID_STACK_NAME) {
				/*
				 * We expected this - why else ask for
				 * TUNEFS_FLAG_NOCLUSTER?
				 *
				 * Note that this is distinct from the O2CB
				 * error, as that is a real error when
				 * TUNEFS_FLAG_NOCLUSTER is not specified.
				 */
				err = TUNEFS_ET_INVALID_STACK_NAME;
			}
			/*
			 * Success means do nothing, any other error
			 * propagates up.
			 */
			goto out;
		} else if (err)
			goto out;
	}

	tools_progress_step(prog, 1);

	tunefs_block_signals();
	err = ocfs2_lock_down_cluster(master_fs);
	tunefs_unblock_signals();
	if (!err)
		state->ts_cluster_locked = 1;
	else if ((err == O2DLM_ET_TRYLOCK_FAILED) &&
		 (flags & TUNEFS_FLAG_ONLINE))
		err = TUNEFS_ET_PERFORM_ONLINE;
	else
		ocfs2_shutdown_dlm(fs, WHOAMI);

	tools_progress_step(prog, 1);

out:
	if (prog)
		tools_progress_stop(prog);

	return err;
}

/*
 * We try to lock the filesystem in *this* ocfs2_filesys.  We get the
 * state off of the master, but the filesystem may have changed since
 * the master opened its ocfs2_filesys.  It might have been switched to
 * LOCAL or something.  We trust the current status in order to make our
 * decision.
 *
 * Inside the underlying lock functions, they check the state to see if
 * they actually need to do anything.  If they don't have it locked, they
 * will always retry the lock.  The filesystem may have gotten unmounted
 * right after we ran our latest online operation.
 */
static errcode_t tunefs_lock_filesystem(ocfs2_filesys *fs, int flags)
{
	errcode_t err = 0;

	if (ocfs2_mount_local(fs))
		err = tunefs_lock_local(fs, flags);
	else
		err = tunefs_lock_cluster(fs, flags);

	return err;
}

static int tunefs_count_free_bits(struct ocfs2_group_desc *gd)
{
	int end = 0;
	int start;
	int bits = 0;

	while (end < gd->bg_bits) {
		start = ocfs2_find_next_bit_clear(gd->bg_bitmap, gd->bg_bits, end);
		if (start >= gd->bg_bits)
			break;
		end = ocfs2_find_next_bit_set(gd->bg_bitmap, gd->bg_bits, start);
		bits += (end - start);
	}

	return bits;
}

static errcode_t tunefs_validate_chain_group(ocfs2_filesys *fs,
					     struct ocfs2_dinode *di,
					     int chain)
{
	errcode_t ret = 0;
	uint64_t blkno;
	char *buf = NULL;
	struct ocfs2_group_desc *gd;
	struct ocfs2_chain_list *cl;
	struct ocfs2_chain_rec *cr;
	uint32_t total = 0;
	uint32_t free = 0;
	uint16_t bits;

	ret = ocfs2_malloc_block(fs->fs_io, &buf);
	if (ret) {
		verbosef(VL_LIB,
			 "%s while allocating a buffer for chain group "
			 "validation\n",
			 error_message(ret));
		goto bail;
	}

	total = 0;
	free = 0;

	cl = &(di->id2.i_chain);
	cr = &(cl->cl_recs[chain]);
	blkno = cr->c_blkno;

	while (blkno) {
		ret = ocfs2_read_group_desc(fs, blkno, buf);
		if (ret) {
			verbosef(VL_LIB,
				 "%s while reading chain group descriptor "
				 "at block %"PRIu64"\n",
				 error_message(ret), blkno);
			goto bail;
		}

		gd = (struct ocfs2_group_desc *)buf;

		if (gd->bg_parent_dinode != di->i_blkno) {
			ret = OCFS2_ET_CORRUPT_CHAIN;
			verbosef(VL_LIB,
				 "Chain allocator at block %"PRIu64" is "
				 "corrupt.  It contains group descriptor "
				 "at %"PRIu64", but that descriptor says "
				 "it belongs to allocator %"PRIu64"\n",
				 (uint64_t)di->i_blkno, blkno,
				 (uint64_t)gd->bg_parent_dinode);
			goto bail;
		}

		if (gd->bg_chain != chain) {
			ret = OCFS2_ET_CORRUPT_CHAIN;
			verbosef(VL_LIB,
				 "Chain allocator at block %"PRIu64" is "
				 "corrupt.  Group descriptor at %"PRIu64" "
				 "was found on chain %u, but it says it "
				 "belongs to chain %u\n",
				 (uint64_t)di->i_blkno, blkno,
				 chain, gd->bg_chain);
			goto bail;
		}

		bits = tunefs_count_free_bits(gd);
		if (bits != gd->bg_free_bits_count) {
			ret = OCFS2_ET_CORRUPT_CHAIN;
			verbosef(VL_LIB,
				 "Chain allocator at block %"PRIu64" is "
				 "corrupt.  Group descriptor at %"PRIu64" "
				 "has %u free bits but says it has %u\n",
				 (uint64_t)di->i_blkno, (uint64_t)blkno,
				 bits, gd->bg_free_bits_count);
			goto bail;
		}

		if (gd->bg_bits > gd->bg_size * 8) {
			ret = OCFS2_ET_CORRUPT_CHAIN;
			verbosef(VL_LIB,
				 "Chain allocator at block %"PRIu64" is "
				 "corrupt.  Group descriptor at %"PRIu64" "
				 "can only hold %u bits, but it claims to "
				 "have %u\n",
				 (uint64_t)di->i_blkno, (uint64_t)blkno,
				 gd->bg_size * 8, gd->bg_bits);
			goto bail;
		}

		if (gd->bg_free_bits_count >= gd->bg_bits) {
			ret = OCFS2_ET_CORRUPT_CHAIN;
			verbosef(VL_LIB,
				 "Chain allocator at block %"PRIu64" is "
				 "corrupt.  Group descriptor at %"PRIu64" "
				 "claims to have more free bits than "
				 "total bits\n",
				 (uint64_t)di->i_blkno, (uint64_t)blkno);
			goto bail;
		}

		total += gd->bg_bits;
		free += gd->bg_free_bits_count;
		blkno = gd->bg_next_group;
	}

	if (cr->c_total != total) {
		ret = OCFS2_ET_CORRUPT_CHAIN;
		verbosef(VL_LIB,
			 "Chain allocator at block %"PRIu64" is corrupt. "
			 "It contains %u total bits, but it says it has "
			 "%u\n",
			 (uint64_t)di->i_blkno, total, cr->c_total);
		goto bail;

	}

	if (cr->c_free != free) {
		ret = OCFS2_ET_CORRUPT_CHAIN;
		verbosef(VL_LIB,
			 "Chain allocator at block %"PRIu64" is corrupt. "
			 "It contains %u free bits, but it says it has "
			 "%u\n",
			 (uint64_t)di->i_blkno, free, cr->c_free);
		goto bail;
	}

bail:
	if (buf)
		ocfs2_free(&buf);

	return ret;
}

static errcode_t tunefs_global_bitmap_check(ocfs2_filesys *fs)
{
	errcode_t ret = 0;
	uint64_t bm_blkno = 0;
	char *buf = NULL;
	struct ocfs2_chain_list *cl;
	struct ocfs2_dinode *di;
	int i;

	verbosef(VL_LIB, "Verifying the global allocator\n");

	ret = ocfs2_malloc_block(fs->fs_io, &buf);
	if (ret) {
		verbosef(VL_LIB,
			 "%s while allocating an inode buffer to validate "
			 "the global bitmap\n",
			 error_message(ret));
		goto bail;
	}

	ret = ocfs2_lookup_system_inode(fs, GLOBAL_BITMAP_SYSTEM_INODE, 0,
					&bm_blkno);
	if (ret) {
		verbosef(VL_LIB,
			 "%s while looking up the global bitmap inode\n",
			 error_message(ret));
		goto bail;
	}

	ret = ocfs2_read_inode(fs, bm_blkno, buf);
	if (ret) {
		verbosef(VL_LIB,
			 "%s while reading the global bitmap inode at "
			 "block %"PRIu64"",
			 error_message(ret), bm_blkno);
		goto bail;
	}

	di = (struct ocfs2_dinode *)buf;
	cl = &(di->id2.i_chain);

	for (i = 0; i < cl->cl_next_free_rec; ++i) {
		ret = tunefs_validate_chain_group(fs, di, i);
		if (ret)
			goto bail;
	}

bail:
	if (buf)
		ocfs2_free(&buf);
	return ret;
}

static errcode_t tunefs_open_bitmap_check(ocfs2_filesys *fs)
{
	struct tunefs_private *tp = to_private(fs);
	struct tunefs_filesystem_state *state = tunefs_get_state(fs);

	if (!(tp->tp_open_flags & TUNEFS_FLAG_ALLOCATION))
		return 0;

	state->ts_allocation = 1;
	return tunefs_global_bitmap_check(fs);
}

void tunefs_update_fs_clusters(ocfs2_filesys *fs)
{
	struct tunefs_private *tp = to_private(fs);
	struct tunefs_filesystem_state *state = tunefs_get_state(fs);

	if (!(tp->tp_open_flags & TUNEFS_FLAG_ALLOCATION)) {
		verbosef(VL_LIB,
			 "Operation that claimed it would do no allocation "
			 "just attempted to update the filesystem size\n");
		return;
	}

	state->ts_fs_clusters = fs->fs_clusters;
}

static errcode_t tunefs_close_bitmap_check(ocfs2_filesys *fs)
{
	errcode_t ret;
	uint32_t old_clusters;
	struct tunefs_filesystem_state *state = tunefs_get_state(fs);

	if (!state->ts_allocation)
		return 0;

	if (state->ts_master != fs)
		return 0;

	/*
	 * An operation that resized the filesystem will have called
	 * tunefs_update_fs_clusters().  The bitmap check needs this
	 * new value, so we swap it in for the call.
	 */
	old_clusters = fs->fs_clusters;
	fs->fs_clusters = state->ts_fs_clusters;
	fs->fs_blocks = ocfs2_clusters_to_blocks(fs, fs->fs_clusters);
	ret = tunefs_global_bitmap_check(fs);
	fs->fs_clusters = old_clusters;
	fs->fs_blocks = ocfs2_clusters_to_blocks(fs, fs->fs_clusters);

	return ret;
}

static errcode_t tunefs_journal_check(ocfs2_filesys *fs)
{
	errcode_t ret;
	char *jsb_buf = NULL;
	ocfs2_cached_inode *ci = NULL;
	uint64_t blkno, contig;
	journal_superblock_t *jsb;
	int i, dirty = 0;
	uint16_t max_slots = OCFS2_RAW_SB(fs->fs_super)->s_max_slots;
	struct tunefs_private *tp = to_private(fs);
	struct tunefs_filesystem_state *state = tunefs_get_state(fs);

	/* We only need to check the journal once */
	if (state->ts_journal_clusters)
		return 0;

	verbosef(VL_LIB, "Checking for dirty journals\n");

	ret = ocfs2_malloc_block(fs->fs_io, &jsb_buf);
	if (ret) {
		verbosef(VL_LIB,
			"%s while allocating a block during journal "
			"check\n",
			error_message(ret));
		goto bail;
	}

	for (i = 0; i < max_slots; ++i) {
		ret = ocfs2_lookup_system_inode(fs, JOURNAL_SYSTEM_INODE, i,
						&blkno);
		if (ret) {
			verbosef(VL_LIB,
				 "%s while looking up journal inode for "
				 "slot %u during journal check\n",
				 error_message(ret), i);
			goto bail;
		}

		ret = ocfs2_read_cached_inode(fs, blkno, &ci);
		if (ret) {
			verbosef(VL_LIB,
				 "%s while reading inode %"PRIu64" during "
				 " journal check",
				 error_message(ret), blkno);
			goto bail;
		}

		state->ts_journal_clusters =
			ocfs2_max(state->ts_journal_clusters,
				  ci->ci_inode->i_clusters);

		dirty = (ci->ci_inode->id1.journal1.ij_flags &
			 OCFS2_JOURNAL_DIRTY_FL);
		if (dirty) {
			ret = TUNEFS_ET_JOURNAL_DIRTY;
			verbosef(VL_LIB,
				 "Node slot %d's journal is dirty. Run "
				 "fsck.ocfs2 to replay all dirty journals.",
				 i);
			break;
		}

		ret = ocfs2_extent_map_get_blocks(ci, 0, 1, &blkno, &contig, NULL);
		if (!ret)
			ret = ocfs2_read_journal_superblock(fs, blkno,
							    jsb_buf);
		if (ret) {
			verbosef(VL_LIB,
				 "%s while reading journal superblock "
				 "for inode %"PRIu64" during journal "
				 "check",
				 error_message(ret), ci->ci_blkno);
			goto bail;
		}

		jsb = (journal_superblock_t *)jsb_buf;
		state->ts_journal_features.opt_compat |=
			jsb->s_feature_compat;
		state->ts_journal_features.opt_ro_compat |=
			jsb->s_feature_ro_compat;
		state->ts_journal_features.opt_incompat |=
			jsb->s_feature_incompat;
	}

	/*
	 * If anything follows a NOCLUSTER operation, it will have
	 * closed and reopened the filesystem.  It must recheck the
	 * journals.
	 */
	if (tp->tp_open_flags & TUNEFS_FLAG_NOCLUSTER)
		state->ts_journal_clusters = 0;

bail:
	if (ci)
		ocfs2_free_cached_inode(fs, ci);
	if (jsb_buf)
		ocfs2_free(&jsb_buf);

	return ret;
}

static errcode_t tunefs_open_online_descriptor(ocfs2_filesys *fs)
{
	int rc, flags = 0;
	errcode_t ret = 0;
	char mnt_dir[PATH_MAX];
	struct tunefs_filesystem_state *state = tunefs_get_state(fs);

	if (state->ts_online_fd > -1)
		goto out;

	memset(mnt_dir, 0, sizeof(mnt_dir));

	ret = ocfs2_check_mount_point(fs->fs_devname, &flags,
				      mnt_dir, sizeof(mnt_dir));
	if (ret)
		goto out;

	if (!(flags & OCFS2_MF_MOUNTED) ||
	    (flags & OCFS2_MF_READONLY) ||
	    (flags & OCFS2_MF_SWAP)) {
		ret = TUNEFS_ET_NOT_MOUNTED;
		goto out;
	}

	rc = open64(mnt_dir, O_RDONLY);
	if (rc < 0) {
		if (errno == EBUSY)
			ret = TUNEFS_ET_DEVICE_BUSY;
		else if (errno == ENOENT)
			ret = TUNEFS_ET_NOT_MOUNTED;
		else
			ret = OCFS2_ET_IO;
	} else
		state->ts_online_fd = rc;

out:
	return ret;
}

static void tunefs_close_online_descriptor(ocfs2_filesys *fs)
{
	struct tunefs_filesystem_state *state = tunefs_get_state(fs);

	if ((state->ts_master == fs) && (state->ts_online_fd > -1)) {
		close(state->ts_online_fd);  /* Don't care about errors */
		state->ts_online_fd = -1;
	}
}

/*
 * If io_init_cache fails, we will go do the work without the
 * io_cache, so there is no check for failure here.
 */
static void tunefs_init_cache(ocfs2_filesys *fs)
{
	errcode_t err;
	struct tunefs_private *tp = to_private(fs);
	struct tunefs_filesystem_state *state = tunefs_get_state(fs);
	uint64_t blocks_wanted;
	int scale_down;

	/*
	 * We have one I/O cache for all ocfs2_filesys structures.  This
	 * guarantees a consistent view of the disk.  The master filesys
	 * allocates it, child filesyses just use it.
	 */
	if (state->ts_master != fs) {
		io_share_cache(state->ts_master->fs_io, fs->fs_io);
		return;
	}

	/*
	 * Operations needing a large cache really want enough to
	 * hold the whole filesystem in memory.  The rest of the
	 * operations don't need much at all.  A cache big enough to
	 * hold a chain allocator group should be enough.  Our largest
	 * chain allocator is 4MB, so let's do 8MB and allow for
	 * incidental blocks.
	 */
	if (tp->tp_open_flags & TUNEFS_FLAG_LARGECACHE)
		blocks_wanted = fs->fs_blocks;
	else
		blocks_wanted = ocfs2_blocks_in_bytes(fs, 8 * 1024 * 1024);

	/*
	 * We don't want to exhaust memory, so we start with twice our
	 * actual need.  When we find out how much we can get, we actually
	 * get half that.
	 */
	blocks_wanted <<= 1;
	scale_down = 1;

	while (blocks_wanted > 0) {
		io_destroy_cache(fs->fs_io);
		verbosef(VL_LIB,
			 "Asking for %"PRIu64" blocks of I/O cache\n",
			 blocks_wanted);
		err = io_init_cache(fs->fs_io, blocks_wanted);
		if (!err) {
			/*
			 * We want to pin our cache; there's no point in
			 * having a large cache if half of it is in swap.
			 * However, some callers may not be privileged
			 * enough, so once we get down to a small enough
			 * number (512 blocks), we'll stop caring.
			 */
			err = io_mlock_cache(fs->fs_io);
			if (err && (blocks_wanted <= 512))
				err = 0;
		}
		if (!err) {
			verbosef(VL_LIB, "Got %"PRIu64" blocks\n",
				 blocks_wanted);
			/* If we've already scaled down, we're done. */
			if (!scale_down)
				break;
			scale_down = 0;
		}

		blocks_wanted >>= 1;
	}
}

static errcode_t tunefs_add_fs(ocfs2_filesys *fs, int flags)
{
	errcode_t err;
	struct tunefs_private *tp;

	err = ocfs2_malloc0(sizeof(struct tunefs_private), &tp);
	if (err)
		goto out;

	tp->tp_open_flags = flags;
	fs->fs_private = tp;
	tp->tp_fs = fs;

	err = tunefs_set_state(fs);
	if (err) {
		fs->fs_private = NULL;
		ocfs2_free(&tp);
		goto out;
	}

	/*
	 * This is purposely a push.  The first open of the filesystem
	 * will be the one holding the locks, so we want it to be the last
	 * close (a FILO stack).  When signals happen, tunefs_close_all()
	 * pops each off in turn, finishing with the lock holder.
	 */
	list_add(&tp->tp_list, &fs_list);

out:
	return err;
}

static void tunefs_remove_fs(ocfs2_filesys *fs)
{
	struct tunefs_private *tp = to_private(fs);
	struct tunefs_filesystem_state *s = NULL;

	if (tp) {
		s = tp->tp_state;
		list_del(&tp->tp_list);
		tp->tp_fs = NULL;
		fs->fs_private = NULL;
		ocfs2_free(&tp);
	}

	if (s && (s->ts_master == fs)) {
		assert(list_empty(&fs_list));
		ocfs2_free(&s);
	}
}


/*
 * Return true if this error code is a special (non-fatal) ocfs2ne
 * error code.
 */
static int tunefs_special_errorp(errcode_t err)
{
	if (err == TUNEFS_ET_CLUSTER_SKIPPED)
		return 1;
	if (err == TUNEFS_ET_INVALID_STACK_NAME)
		return 1;
	if (err == TUNEFS_ET_PERFORM_ONLINE)
		return 1;

	return 0;
}

errcode_t tunefs_open(const char *device, int flags,
		      ocfs2_filesys **ret_fs)
{
	int rw = flags & TUNEFS_FLAG_RW;
	errcode_t err, tmp;
	int open_flags;
	ocfs2_filesys *fs = NULL;

	verbosef(VL_LIB, "Opening device \"%s\"\n", device);

	open_flags = OCFS2_FLAG_HEARTBEAT_DEV_OK;
	if (rw)
		open_flags |= OCFS2_FLAG_RW | OCFS2_FLAG_STRICT_COMPAT_CHECK;
	else
		open_flags |= OCFS2_FLAG_RO;

	err = ocfs2_open(device, open_flags, 0, 0, &fs);
	if (err)
		goto out;

	err = tunefs_add_fs(fs, flags);
	if (err)
		goto out;

	if (!rw)
		goto out;

	if (OCFS2_RAW_SB(fs->fs_super)->s_feature_incompat &
	    OCFS2_FEATURE_INCOMPAT_HEARTBEAT_DEV) {
		err = TUNEFS_ET_HEARTBEAT_DEV;
		goto out;
	}

	if (OCFS2_RAW_SB(fs->fs_super)->s_feature_incompat &
	    OCFS2_FEATURE_INCOMPAT_RESIZE_INPROG) {
		err = TUNEFS_ET_RESIZE_IN_PROGRESS;
		goto out;
	}

	if (OCFS2_RAW_SB(fs->fs_super)->s_feature_incompat &
	    OCFS2_FEATURE_INCOMPAT_TUNEFS_INPROG) {
		err = TUNEFS_ET_TUNEFS_IN_PROGRESS;
		goto out;
	}

	err = tunefs_lock_filesystem(fs, flags);
	if (err && !tunefs_special_errorp(err))
		goto out;

	/*
	 * We will use block cache in io.  Now, whether the cluster is
	 * locked or the volume is mount local, in both situation we can
	 * safely use cache.  If we're not locked
	 * (tunefs_special_errorp(err) != 0), we can't safely use it.
	 * If this tunefs run has both special and regular operations,
	 * ocfs2ne will retry with the regular arguments and will get
	 * the cache for the regular operations.
	 */
	if (!err)
		tunefs_init_cache(fs);

	/*
	 * SKIPCLUSTER operations don't check the journals - they couldn't
	 * replay them anyway.
	 */
	if (err == TUNEFS_ET_CLUSTER_SKIPPED)
		goto out;

	/* Offline operations need clean journals */
	if (err != TUNEFS_ET_PERFORM_ONLINE) {
		tmp = tunefs_journal_check(fs);
		if (!tmp)
			tmp = tunefs_open_bitmap_check(fs);
		if (tmp) {
			err = tmp;
			tunefs_unlock_filesystem(fs);
		}
	} else {
		tmp = tunefs_open_online_descriptor(fs);
		if (tmp) {
			err = tmp;
			tunefs_unlock_filesystem(fs);
		}
	}

out:
	if (err && !tunefs_special_errorp(err)) {
		if (fs) {
			tunefs_remove_fs(fs);
			ocfs2_close(fs);
			fs = NULL;
		}
		verbosef(VL_LIB, "Open of device \"%s\" failed\n", device);
	} else {
		verbosef(VL_LIB, "Device \"%s\" opened\n", device);
		*ret_fs = fs;
	}

	return err;
}

errcode_t tunefs_close(ocfs2_filesys *fs)
{
	errcode_t tmp, err = 0;

	/*
	 * We want to clean up everything we can even if there
	 * are errors, but we preserve the first error we get.
	 */
	if (fs) {
		verbosef(VL_LIB, "Closing device \"%s\"\n", fs->fs_devname);
		tunefs_close_online_descriptor(fs);
		err = tunefs_close_bitmap_check(fs);
		tmp = tunefs_unlock_filesystem(fs);
		if (!err)
			err = tmp;

		tunefs_remove_fs(fs);
		tmp = ocfs2_close(fs);
		if (!err)
			err = tmp;

		if (!err)
			verbosef(VL_LIB, "Device closed\n");
		else
			verbosef(VL_LIB, "Close of device failed\n");
		fs = NULL;
	}

	return err;
}


/*
 * Helper functions for the main code.
 */

errcode_t tunefs_feature_run(ocfs2_filesys *master_fs,
			     struct tunefs_feature *feat)
{
	int rc = 0;
	errcode_t err, tmp;
	ocfs2_filesys *fs;
	int flags;

	verbosef(VL_DEBUG, "Running feature \"%s\"\n", feat->tf_name);

	flags = feat->tf_open_flags & ~(TUNEFS_FLAG_ONLINE |
				      TUNEFS_FLAG_NOCLUSTER);
	err = tunefs_open(master_fs->fs_devname, feat->tf_open_flags, &fs);
	if (err == TUNEFS_ET_PERFORM_ONLINE)
		flags |= TUNEFS_FLAG_ONLINE;
	else if (err == TUNEFS_ET_INVALID_STACK_NAME)
		flags |= TUNEFS_FLAG_NOCLUSTER;
	else if (err)
		goto out;

	err = 0;
	switch (feat->tf_action) {
		case FEATURE_ENABLE:
			rc = feat->tf_enable(fs, flags);
			break;

		case FEATURE_DISABLE:
			rc = feat->tf_disable(fs, flags);
			break;

		case FEATURE_NOOP:
			verbosef(VL_APP,
				 "Ran NOOP for feature \"%s\" - how'd "
				 "that happen?\n",
				 feat->tf_name);
			break;

		default:
			errorf("Unknown action %d called against feature "
			       "\"%s\"\n",
			       feat->tf_action, feat->tf_name);
			err = TUNEFS_ET_INTERNAL_FAILURE;
			break;
	}

	if (rc)
		err = TUNEFS_ET_OPERATION_FAILED;

	tmp = tunefs_close(fs);
	if (!err)
		err = tmp;

out:
	return err;
}

errcode_t tunefs_op_run(ocfs2_filesys *master_fs,
			struct tunefs_operation *op)
{
	errcode_t err, tmp;
	ocfs2_filesys *fs;
	int flags;

	verbosef(VL_DEBUG, "Running operation \"%s\"\n", op->to_name);

	flags = op->to_open_flags & ~(TUNEFS_FLAG_ONLINE |
				      TUNEFS_FLAG_NOCLUSTER);
	err = tunefs_open(master_fs->fs_devname, op->to_open_flags, &fs);
	if (err == TUNEFS_ET_PERFORM_ONLINE)
		flags |= TUNEFS_FLAG_ONLINE;
	else if (err == TUNEFS_ET_INVALID_STACK_NAME)
		flags |= TUNEFS_FLAG_NOCLUSTER;
	else if (err == TUNEFS_ET_CLUSTER_SKIPPED)
		flags |= TUNEFS_FLAG_SKIPCLUSTER;
	else if (err)
		goto out;

	err = 0;
	if (op->to_run(op, fs, flags))
		err = TUNEFS_ET_OPERATION_FAILED;

	tmp = tunefs_close(fs);
	if (!err)
		err = tmp;

out:
	return err;
}


/*
 * Helper calls for operation and feature DEBUG_EXE code
 */

static errcode_t copy_argv(char **argv, char ***new_argv)
{
	int i;
	char **t_argv;

	for (i = 0; argv[i]; i++)
		;  /* Count argv */

	/* This is intentionally leaked */
	t_argv = malloc(sizeof(char *) * (i + 1));
	if (!t_argv)
		return TUNEFS_ET_NO_MEMORY;

	for (i = 0; argv[i]; i++)
		t_argv[i] = (char *)argv[i];
	t_argv[i] = NULL;

	*new_argv = t_argv;
	return 0;
}

/* All the +1 are to leave argv[0] in place */
static void shuffle_argv(int *argc, int optind, char **argv)
{
	int src, dst;
	int new_argc = *argc - optind + 1;

	for (src = optind, dst = 1; src < *argc; src++, dst++)
		argv[dst] = argv[src];
	if (dst != new_argc)
		verbosef(VL_DEBUG,
			 "dst is not new_argc %d %d\n", dst, new_argc);

	argv[dst] = NULL;
	*argc = new_argc;
}

static void tunefs_debug_usage(int error)
{
	enum tools_verbosity_level level = VL_ERR;

	if (!error)
		level = VL_OUT;

	verbosef(level, "%s", usage_string ? usage_string : "(null)");
	verbosef(level,
		 "[opts] can be any mix of:\n"
		 "\t-i|--interactive\n"
		 "\t-v|--verbose (more than one increases verbosity)\n"
		 "\t-q|--quiet (more than one decreases verbosity)\n"
		 "\t-h|--help\n"
		 "\t-V|--version\n");
}

extern int optind, opterr, optopt;
extern char *optarg;
static void tunefs_parse_core_options(int *argc, char ***argv, char *usage)
{
	errcode_t err;
	int c;
	char **new_argv;
	int print_usage = 0, print_version = 0;
	char error[PATH_MAX];
	static struct option long_options[] = {
		{ "help", 0, NULL, 'h' },
		{ "version", 0, NULL, 'V' },
		{ "verbose", 0, NULL, 'v' },
		{ "quiet", 0, NULL, 'q' },
		{ "interactive", 0, NULL, 'i'},
		{ 0, 0, 0, 0}
	};

	usage_string = usage;
	err = copy_argv(*argv, &new_argv);
	if (err) {
		tcom_err(err, "while processing command-line arguments");
		exit(1);
	}

	opterr = 0;
	error[0] = '\0';
	while ((c = getopt_long(*argc, new_argv,
				":hVvqi", long_options, NULL)) != EOF) {
		switch (c) {
			case 'h':
				print_usage = 1;
				break;

			case 'V':
				print_version = 1;
				break;

			case 'v':
				tools_verbose();
				break;

			case 'q':
				tools_quiet();
				break;

			case 'i':
				tools_interactive();
				break;

			case '?':
				snprintf(error, PATH_MAX,
					 "Invalid option: \'-%c\'",
					 optopt);
				print_usage = 1;
				break;

			case ':':
				snprintf(error, PATH_MAX,
					 "Option \'-%c\' requires an argument",
					 optopt);
				print_usage = 1;
				break;

			default:
				snprintf(error, PATH_MAX,
					 "Shouldn't get here %c %c",
					 optopt, c);
				break;
		}

		if (*error)
			break;
	}

	if (*error)
		errorf("%s\n", error);

	if (print_version)
		tools_version();

	if (print_usage)
		tunefs_debug_usage(*error != '\0');

	if (print_usage || print_version)
		exit(0);

	if (*error)
		exit(1);

	shuffle_argv(argc, optind, new_argv);
	*argv = new_argv;
}

static int single_feature_parse_option(struct tunefs_operation *op,
				       char *arg)
{
	int rc = 0;
	struct tunefs_feature *feat = op->to_private;

	if (!arg) {
		errorf("No action specified\n");
		rc = 1;
	} else if (!strcmp(arg, "enable"))
		feat->tf_action = FEATURE_ENABLE;
	else if (!strcmp(arg, "disable"))
		feat->tf_action = FEATURE_DISABLE;
	else {
		errorf("Invalid action: \"%s\"\n", arg);
		rc = 1;
	}

	return rc;
}

static int single_feature_run(struct tunefs_operation *op,
			      ocfs2_filesys *fs, int flags)
{
	errcode_t err;
	struct tunefs_feature *feat = op->to_private;

	err = tunefs_feature_run(fs, feat);
	if (err && (err != TUNEFS_ET_OPERATION_FAILED))
		tcom_err(err, "while toggling feature \"%s\"",
			 feat->tf_name);

	return err;
}

DEFINE_TUNEFS_OP(single_feature,
		 NULL,
		 0,
		 single_feature_parse_option,
		 single_feature_run);

int tunefs_feature_main(int argc, char *argv[], struct tunefs_feature *feat)
{
	char usage[PATH_MAX];

	snprintf(usage, PATH_MAX,
		 "Usage: ocfs2ne_feature_%s [opts] <device> "
		 "{enable|disable}\n",
		 feat->tf_name);
	single_feature_op.to_debug_usage = usage;
	single_feature_op.to_open_flags = feat->tf_open_flags;
	single_feature_op.to_private = feat;

	return tunefs_op_main(argc, argv, &single_feature_op);
}

int tunefs_op_main(int argc, char *argv[], struct tunefs_operation *op)
{
	errcode_t err;
	int rc = 1;
	ocfs2_filesys *fs;
	char *arg = NULL;

	tunefs_init(argv[0]);
	tunefs_parse_core_options(&argc, &argv, op->to_debug_usage);
	if (argc < 2) {
		errorf("No device specified\n");
		tunefs_debug_usage(1);
		goto out;
	}

	if (op->to_parse_option) {
		if (argc > 3) {
			errorf("Too many arguments\n");
			tunefs_debug_usage(1);
			goto out;
		}
		if (argc == 3)
			arg = argv[2];

		rc = op->to_parse_option(op, arg);
		if (rc) {
			tunefs_debug_usage(1);
			goto out;
		}
	} else if (argc > 2) {
		errorf("Too many arguments\n");
		tunefs_debug_usage(1);
		goto out;
	}

	err = tunefs_open(argv[1], op->to_open_flags, &fs);
	if (err && !tunefs_special_errorp(err)) {
		tcom_err(err, "- Unable to open device \"%s\" read-write.",
			 argv[1]);
		goto out;
	}

	err = tunefs_op_run(fs, op);
	if (!err)
		rc = 0;
	else if (err != TUNEFS_ET_OPERATION_FAILED)
		tcom_err(err, "while running operation \"%s\"",
			 op->to_name);

	err = tunefs_close(fs);
	if (err) {
		tcom_err(err, "while closing device \"%s\"", argv[1]);
		rc = 1;
	}

out:
	return rc;
}

#ifdef DEBUG_EXE

int parent = 0;


static void closeup(ocfs2_filesys *fs, const char *device)
{
	errcode_t err;

	verbosef(VL_OUT, "success\n");
	err = tunefs_close(fs);
	if (err)  {
		tcom_err(err, "- Unable to close device \"%s\".", device);
	}
}

int main(int argc, char *argv[])
{
	errcode_t err;
	const char *device;
	ocfs2_filesys *fs;

	tunefs_init(argv[0]);
	tunefs_parse_core_options(&argc, &argv,
				  "Usage: debug_libocfs2ne [-p] <device>\n");

	if (argc > 3) {
		errorf("Too many arguments\n");
		tunefs_debug_usage(1);
		return 1;
	}
	if (argc == 3) {
		if (strcmp(argv[1], "-p")) {
			errorf("Invalid argument: \'%s\'\n", argv[1]);
			tunefs_debug_usage(1);
			return 1;
		}
		parent = 1;
		device = argv[2];
	} else if ((argc == 2) &&
		   strcmp(argv[1], "-p")) {
		device = argv[1];
	} else {
		errorf("Device must be specified\n");
		tunefs_debug_usage(1);
		return 1;
	}

	verbosef(VL_OUT, "Opening device \"%s\" read-only... ", device);
	err = tunefs_open(device, TUNEFS_FLAG_RO, &fs);
	if (err) {
		verbosef(VL_OUT, "failed\n");
		tcom_err(err, "- Unable to open device \"%s\" read-only.",
			 device);
	} else
		closeup(fs, device);

	verbosef(VL_OUT, "Opening device \"%s\" read-write... ", device);
	err = tunefs_open(device, TUNEFS_FLAG_RW, &fs);
	if (err) {
		verbosef(VL_OUT, "failed\n");
		tcom_err(err, "- Unable to open device \"%s\" read-write.",
			 device);
	} else
		closeup(fs, device);

	verbosef(VL_OUT,
		 "Opening device \"%s\" for an online operation... ",
		 device);
	err = tunefs_open(device, TUNEFS_FLAG_RW | TUNEFS_FLAG_ONLINE,
			  &fs);
	if (err == TUNEFS_ET_PERFORM_ONLINE) {
		closeup(fs, device);
		verbosef(VL_OUT, "Operation would have been online\n");
	} else if (!err) {
		closeup(fs, device);
		verbosef(VL_OUT, "Operation would have been offline\n");
	} else {
		verbosef(VL_OUT, "failed\n");
		tcom_err(err, "- Unable to open device \"%s\" read-write.",
			 device);
	}

	verbosef(VL_OUT,
		 "Opening device \"%s\" for a stackless operation... ",
		 device);
	err = tunefs_open(device, TUNEFS_FLAG_RW | TUNEFS_FLAG_NOCLUSTER,
			  &fs);
	if (err == TUNEFS_ET_INVALID_STACK_NAME) {
		closeup(fs, device);
		verbosef(VL_OUT, "Expected cluster stack mismatch found\n");
	} else if (!err) {
		closeup(fs, device);
		verbosef(VL_OUT, "Cluster stacks already match\n");
	} else {
		verbosef(VL_OUT, "failed\n");
		tcom_err(err, "- Unable to open device \"%s\" read-write.",
			 device);
	}

	return 0;
}


#endif /* DEBUG_EXE */