/*
-*- linux-c -*-
   drbd_fs.c
   Kernel module for 2.2.x/2.4.x Kernels

   This file is part of drbd by Philipp Reisner.

   Copyright (C) 1999-2002, Philipp Reisner <philipp.reisner@gmx.at>.
        main author.

   Copyright (C) 2000, Fbio Oliv Leite <olive@conectiva.com.br>.
        Some sanity checks in IOCTL_SET_STATE.

   Copyright (C) 2002, Lars Ellenberg <l.g.e@web.de>.
        drbd_is_mounted() for IOCTL_SET_STATE, and IOCTL_SET_DISK_CONFIG.
        Some sanity checks in IOCTL, ctl_mutex

   drbd is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   drbd is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with drbd; see the file COPYING.  If not, write to
   the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.

 */

#include <linux/config.h>
#ifdef CONFIG_MODVERSIONS
#include <linux/modversions.h>
#endif

#include <asm/uaccess.h>
#include <linux/in.h>
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/slab.h>
#include <linux/utsname.h>
#include "drbd.h"
#include "drbd_int.h"

#if LINUX_VERSION_CODE > KERNEL_VERSION(2,3,0)
#include <linux/blkpg.h>
#endif

STATIC enum { NotMounted=0,MountedRO,MountedRW } drbd_is_mounted(int minor)
{
       struct super_block *sb;

       sb = get_super(MKDEV(MAJOR_NR, minor));
       if(!sb) return NotMounted;

       if(sb->s_flags & MS_RDONLY) {
	       drop_super(sb);
               return MountedRO;
       }

       drop_super(sb);
       return MountedRW;
}

STATIC
int drbd_ioctl_set_disk(struct Drbd_Conf *mdev,
			struct ioctl_disk_config * arg)
{
	int err,i,minor;
	enum ret_codes retcode;
	struct disk_config new_conf;
	struct file *filp;
	struct inode *inode;
	kdev_t ll_dev;

	if (!capable(CAP_SYS_ADMIN)) //MAYBE: Move this to the drbd_ioctl()
		return -EACCES;

	minor=(int)(mdev-drbd_conf);

	if (mdev->open_cnt > 1)
		return -EBUSY;

	if (copy_from_user(&new_conf, &arg->config,sizeof(struct disk_config)))
		return -EFAULT;

	filp = fget(new_conf.lower_device);
	if (!filp) {
		retcode=LDFDInvalid;
		goto fail_ioctl;
	}

	inode = filp->f_dentry->d_inode;

	for(i=0;i<minor_count;i++) {
		if( i != minor &&
		    inode->i_rdev == drbd_conf[i].lo_device) {
			retcode=LDAlreadyInUse;
			goto fail_ioctl;
		}
	}

	if (!S_ISBLK(inode->i_mode)) {
		fput(filp);
		retcode=LDNoBlockDev;
		goto fail_ioctl;
	}

	if (drbd_is_mounted(inode->i_rdev)) {
		printk(KERN_WARNING DEVICE_NAME
			"%d: can not configure %d:%d, has active inodes!\n",
			minor, MAJOR(inode->i_rdev), MINOR(inode->i_rdev));
		retcode=LDMounted;
		goto fail_ioctl;
	}

	if ((err = blkdev_open(inode, filp))) {
		printk(KERN_ERR DEVICE_NAME
		       "%d: blkdev_open( %d:%d ,) returned %d\n", minor,
		       MAJOR(inode->i_rdev), MINOR(inode->i_rdev),err);
		fput(filp);
		retcode=LDOpenFailed;
		goto fail_ioctl;
	}

	ll_dev = inode->i_rdev;

	if (blk_size[MAJOR(ll_dev)][MINOR(ll_dev)] < new_conf.disk_size ) {
		retcode=LDDeviceTooSmall;
		goto fail_ioctl;
	}


	fsync_dev(MKDEV(MAJOR_NR, minor));
	drbd_thread_stop(&mdev->syncer);
	drbd_thread_stop(&mdev->asender);
	drbd_thread_stop(&mdev->receiver);
	drbd_free_resources(minor);

	mdev->lo_device = ll_dev;
	mdev->lo_file = filp;
	mdev->lo_usize = new_conf.disk_size;
        mdev->do_panic = new_conf.do_panic;

	if (mdev->lo_usize) {
		blk_size[MAJOR_NR][minor] = mdev->lo_usize;
		/*
		printk(KERN_INFO DEVICE_NAME"%d: user provided size = %d KB\n",
		       minor,blk_size[MAJOR_NR][minor]);
		*/

		if (!mdev->mbds_id) {
			mdev->mbds_id = bm_init(MKDEV(MAJOR_NR, minor));
		}
	}

	set_blocksize(MKDEV(MAJOR_NR, minor), INITIAL_BLOCK_SIZE);
	set_blocksize(mdev->lo_device, INITIAL_BLOCK_SIZE);
	mdev->blk_size_b = drbd_log2(INITIAL_BLOCK_SIZE);

	if (mdev->cstate == Unconfigured) mdev->state = Secondary;
	set_cstate(mdev,StandAlone);
	drbd_md_read(minor);

	return 0;

 fail_ioctl:
	if (put_user(retcode, &arg->ret_code)) return -EFAULT;
	return -EINVAL;
}

STATIC
int drbd_ioctl_get_conf(struct Drbd_Conf *mdev, struct ioctl_get_config* arg)
{
	struct ioctl_get_config cn;

	cn.cstate=mdev->cstate;
	cn.lower_device_major=MAJOR(mdev->lo_device);
	cn.lower_device_minor=MINOR(mdev->lo_device);
	cn.disk_size_user=mdev->lo_usize;
	cn.do_panic=mdev->do_panic;
	memcpy(&cn.net,  &mdev->net,  sizeof(struct net_config));
	memcpy(&cn.sync, &mdev->sync, sizeof(struct sync_config));

	if (copy_to_user(arg,&cn,sizeof(struct ioctl_get_config)))
		return -EFAULT;

	return 0;
};

STATIC
int drbd_ioctl_set_syncer(struct Drbd_Conf *mdev, struct ioctl_sync_config * arg)
{
	// CAP_SYS_ADMIN ??

	int minor;
	enum ret_codes retcode = NoError;
	struct sync_config sync;

	minor=(int)(mdev-drbd_conf);

	if (copy_from_user(&sync, &arg->sync, sizeof(struct sync_config)))
		return -EFAULT;

	if( mdev->lo_file == 0 || mdev->lo_device == 0 ) {
		retcode=LDNoConfig;
		goto fail_ioctl;
	}

	// rate == 0 means do not change
	if (   sync.min == 0
	    && sync.max > 0
	    && sync.max > mdev->sync.min )
	{
		mdev->sync.max = sync.max;
	} else if (sync.max == 0
		&& sync.min > 0
		&& sync.min < mdev->sync.max)
	{
		mdev->sync.min = sync.min;
	} else if (sync.min > 0 && sync.min < sync.max) {
		mdev->sync.min = sync.min;
		mdev->sync.max = sync.max;
	} else if (sync.min != 0 || sync.max != 0) {
		return -EINVAL;
	}
	if (-20 <= sync.nice && sync.nice <= 19) {
		mdev->sync.nice = sync.nice;
	} else if (sync.nice != 20)
		return -EINVAL;

	if (sync.group >= 0)
		mdev->sync.group = sync.group;
	if (sync.skip >= 0)
		mdev->sync.skip  = sync.skip;

	for (minor=0; minor<minor_count; minor++) {
		// in case sync group has changed or something
		if (waitqueue_active(&drbd_conf[minor].cstate_wait))
			wake_up_interruptible(&drbd_conf[minor].cstate_wait);
	}

fail_ioctl:
	if (put_user(retcode, &arg->ret_code)) return -EFAULT;
	return (retcode? -EINVAL : 0);
}

STATIC
int drbd_ioctl_set_net(struct Drbd_Conf *mdev, struct ioctl_net_config * arg)
{
	int i,minor;
	enum ret_codes retcode = NoError;
	struct net_config  new_net;

	if (!capable(CAP_SYS_ADMIN)) //MAYBE: Move this to the drbd_ioctl()
		return -EACCES;

	minor=(int)(mdev-drbd_conf);

	if (copy_from_user(&new_net, &arg->net,sizeof(struct net_config)))
		return -EFAULT;

	// HACK, but this cast is legal.
	if ((i=drbd_ioctl_set_syncer(mdev,(struct ioctl_sync_config*)&arg->sync)))
		return i;

#define M_ADDR(A) (((struct sockaddr_in *)&A.my_addr)->sin_addr.s_addr)
#define M_PORT(A) (((struct sockaddr_in *)&A.my_addr)->sin_port)
#define O_ADDR(A) (((struct sockaddr_in *)&A.other_addr)->sin_addr.s_addr)
#define O_PORT(A) (((struct sockaddr_in *)&A.other_addr)->sin_port)
	for(i=0;i<minor_count;i++) {
		if( i!=minor && drbd_conf[i].cstate!=Unconfigured &&
		    M_ADDR(new_net) == M_ADDR(drbd_conf[i].net) &&
		    M_PORT(new_net) == M_PORT(drbd_conf[i].net) )
		{
			retcode=LAAlreadyInUse;
			goto fail_ioctl;
		}
		if( i!=minor && drbd_conf[i].cstate!=Unconfigured &&
		    O_ADDR(new_net) == O_ADDR(drbd_conf[i].net) &&
		    O_PORT(new_net) == O_PORT(drbd_conf[i].net) )
		{
			retcode=OAAlreadyInUse;
			goto fail_ioctl;
		}
	}
#undef M_ADDR
#undef M_PORT
#undef O_ADDR
#undef O_PORT

	/* IMPROVE:
	   We should warn the user if the LL_DEV is
	   used already. E.g. some FS mounted on it.
	*/

	fsync_dev(MKDEV(MAJOR_NR, minor));
	drbd_thread_stop(&mdev->syncer);
	drbd_thread_stop(&mdev->asender);
	drbd_thread_stop(&mdev->receiver);
	drbd_free_sock(minor);

	memcpy(&mdev->net,&new_net,
	       sizeof(struct net_config));

	if (!mdev->transfer_log) {
		mdev->transfer_log = kmalloc(sizeof(struct tl_entry) *
					     mdev->net.tl_size, GFP_KERNEL);
                if (!mdev->transfer_log) {
			printk(KERN_ERR DEVICE_NAME
			       "%d: could not kmalloc transfer_log\n", minor);
			goto fail_ioctl;
                }
		tl_init(&drbd_conf[minor]);
	}

	set_cstate(&drbd_conf[minor],Unconnected);
	drbd_thread_start(&mdev->receiver);

	return 0;

	fail_ioctl:
	if (put_user(retcode, &arg->ret_code)) return -EFAULT;
	return -EINVAL;
}

int drbd_set_state(int minor,Drbd_State newstate)
{
	if(newstate == drbd_conf[minor].state) return 0; /* nothing to do */

	// exactly one of sec or pri. not both.
	if ( !((newstate ^ (newstate >> 1)) & 1) ) return -EINVAL;

	if(drbd_conf[minor].cstate == Unconfigured)
		return -ENXIO;

	if ( (newstate & Primary) && (drbd_conf[minor].o_state == Primary) )
		return -EACCES;

	if ( drbd_conf[minor].cstate > Connected )
		return -EINPROGRESS;

	if( (newstate & Secondary) &&
	   (test_bit(WRITER_PRESENT, &drbd_conf[minor].flags) ||
	    drbd_is_mounted(minor) == MountedRW))
		return -EBUSY;

	if( (newstate & Primary) &&
	    !(drbd_conf[minor].gen_cnt[Flags] & MDF_Consistent) &&
	    !(newstate & DontBlameDrbd) )
		return -EIO;

	fsync_dev(MKDEV(MAJOR_NR, minor));

		/* Wait until nothing is on the fly :) */
		/* PRI -> SEC : TL is empty || cstate < connected
		   SEC -> PRI : ES is empty || cstate < connected
                     -> this should be the case anyway, becuase the
		        other one should be already in SEC state

		   FIXME:
		     The current implementation is full of races.
		     Will do the right thing in 2.4 (using a rw-semaphore),
		     for now it is good enough. (Do not panic, these races
		     are not harmfull)
		*/
		/*
		printk(KERN_ERR DEVICE_NAME "%d: set_state(%d,%d,%d,%d,%d)\n",
		       minor,
		       drbd_conf[minor].state,
		       drbd_conf[minor].pending_cnt,
		       drbd_conf[minor].unacked_cnt,
		       drbd_conf[minor].epoch_size);
		*/


	if ( atomic_read(&drbd_conf[minor].pending_cnt) ||
	     atomic_read(&drbd_conf[minor].unacked_cnt) ) {

		if((newstate & Secondary) && (newstate & DontWait)) {
			set_bit(BECOME_SECONDARY,&drbd_conf[minor].flags);
			return 0;
		}

		printk(KERN_WARNING DEVICE_NAME
		       "%d: set_state() waiting for pe=0 and ua=0\n",minor);

		if ( wait_event_interruptible( drbd_conf[minor].state_wait,
		       atomic_read(&drbd_conf[minor].pending_cnt) == 0 &&
		       atomic_read(&drbd_conf[minor].unacked_cnt) == 0 ) ) {
			return -EINTR;
		}
	}

	drbd_conf[minor].state = (Drbd_State) newstate & 0x03;
	if(newstate & Primary) {
		set_device_ro(MKDEV(MAJOR_NR, minor), FALSE );
		if(newstate & Human) {
			drbd_md_inc(minor,HumanCnt);
		} else if(newstate & TimeoutExpired ) {
			drbd_md_inc(minor,TimeoutCnt);
		} else {
			drbd_md_inc(minor,
			    drbd_conf[minor].cstate >= Connected ?
			    ConnectedCnt : ArbitraryCnt);
		}
		drbd_conf[minor].gen_cnt[Flags] |= MDF_Consistent;
	} else {
		set_device_ro(MKDEV(MAJOR_NR, minor), TRUE );
	}
	drbd_md_write(minor); /* Primary indicator has changed in any case. */

	if (drbd_conf[minor].cstate >= WFReportParams)
		drbd_send_param(drbd_conf+minor);

	return 0;
}

int drbd_ioctl(struct inode *inode, struct file *file,
			   unsigned int cmd, unsigned long arg)
{
	int minor,err=0;
	long time;
	struct Drbd_Conf *mdev;

	minor = MINOR(inode->i_rdev);
	if(minor >= minor_count) return -ENODEV;
	mdev = &drbd_conf[minor];

	if( (err=down_interruptible(&mdev->ctl_mutex)) ) return err;
	/*
	 * please no 'return', use 'err = -ERRNO; break;'
	 * we hold the ctl_mutex
	 */
	switch (cmd) {
	case BLKGETSIZE:
		err = put_user(blk_size[MAJOR_NR][minor]<<1, (long *)arg);
		break;

#ifdef BLKGETSIZE64
	case BLKGETSIZE64: /* see ./drivers/block/loop.c */
		err = put_user((u64)blk_size[MAJOR_NR][minor]<<10, (u64*)arg);
		break;
#endif

#if LINUX_VERSION_CODE > KERNEL_VERSION(2,3,0)
	case BLKROSET:
	case BLKROGET:
	case BLKFLSBUF:
	case BLKSSZGET:
#ifdef BLKBSZGET
	case BLKBSZGET:
#endif
	case BLKPG:
		err=blk_ioctl(inode->i_rdev, cmd, arg);
#else
		RO_IOCTLS(inode->i_rdev, arg);
#endif
		break;
	case DRBD_IOCTL_GET_VERSION:
		err = put_user(API_VERSION, (int *) arg);
		break;

	case DRBD_IOCTL_SET_STATE:
		if (arg & ~(Primary|Secondary|Human|TimeoutExpired|
			    DontBlameDrbd|DontWait) ) {
			err = -EINVAL;
			break;
		}

		err = drbd_set_state(minor,arg);
		break;

	case DRBD_IOCTL_SET_DISK_CONFIG:
		err = drbd_ioctl_set_disk(mdev,(struct ioctl_disk_config*)arg);
		break;

	case DRBD_IOCTL_SET_NET_CONFIG:
		err = drbd_ioctl_set_net(mdev,(struct ioctl_net_config*) arg);
		break;

	case DRBD_IOCTL_SET_SYNC_CONFIG:
		err	= drbd_ioctl_set_syncer(mdev,(struct ioctl_sync_config*) arg);
		break;

	case DRBD_IOCTL_GET_CONFIG:
		err = drbd_ioctl_get_conf(mdev,(struct ioctl_get_config*) arg);
		break;

	case DRBD_IOCTL_UNCONFIG_NET:
		if( mdev->cstate == Unconfigured) break;
		/* FIXME what if fsync returns error */
		fsync_dev(MKDEV(MAJOR_NR, minor));
		set_bit(DO_NOT_INC_CONCNT,&mdev->flags);
		drbd_thread_stop(&mdev->syncer);
		drbd_thread_stop(&mdev->asender);
		drbd_thread_stop(&mdev->receiver);

		set_cstate(mdev,StandAlone);
		break;

	case DRBD_IOCTL_UNCONFIG_BOTH:
		if (mdev->cstate == Unconfigured) break;

		if (mdev->open_cnt > 1) {
			err=-EBUSY;
			break;
		}

		fsync_dev(MKDEV(MAJOR_NR, minor));
		set_bit(DO_NOT_INC_CONCNT,&mdev->flags);
		drbd_thread_stop(&mdev->syncer);
		drbd_thread_stop(&mdev->asender);
		drbd_thread_stop(&mdev->receiver);
		drbd_free_resources(minor);
		if (mdev->mbds_id) {
			bm_cleanup(mdev->mbds_id);
			mdev->mbds_id=0;
		}

		set_cstate(mdev,Unconfigured);
		mdev->state = Secondary;

		break;

	case DRBD_IOCTL_WAIT_CONNECT:
		if ((err = get_user(time, (int *) arg)))
			break;

		// We can drop the mutex, we do not touch anything in mdev.
		up(&mdev->ctl_mutex);

		time = time*HZ ?: MAX_SCHEDULE_TIMEOUT;
		err = wait_event_interruptible_timeout(
			mdev->cstate_wait,
			mdev->cstate < Unconnected ||
			mdev->cstate >= Connected,
			time );
		if (mdev->cstate < Unconnected) err = -ENOTCONN;
		if (err == 0) err = -ETIME;
		if (err < 0) goto out_unlocked;
		err=0; // no error

		err = put_user(mdev->cstate >= Connected, (int *) arg);
		goto out_unlocked;

	case DRBD_IOCTL_WAIT_SYNC:
		if ((err = get_user(time, (int *) arg)))
			break;

		up(&mdev->ctl_mutex);

		time=time*HZ ?: MAX_SCHEDULE_TIMEOUT;

		do {
			if (mdev->cstate > Connected)
				time=MAX_SCHEDULE_TIMEOUT;
			// XXX else back to user supplied timeout ??
			err = wait_event_interruptible_timeout(
				mdev->cstate_wait,
				mdev->cstate == Connected ||
				mdev->cstate < Unconnected,
				time );
			if (mdev->cstate < Unconnected) err = -ENOTCONN;
			if (err == 0) err = -ETIME;
			if (err < 0) goto out_unlocked;
		} while (err > 0
			 && mdev->cstate != Connected
			 && mdev->cstate >= Unconnected);
		err=0; // no error

		err = put_user(mdev->cstate == Connected, (int *) arg);
		goto out_unlocked;

	case DRBD_IOCTL_DO_SYNC_ALL:
		if (mdev->cstate < Connected) {
			err = -ENOTCONN;
			break;
		} else if ( mdev->cstate > Connected ) {
			err = -EALREADY;
			break;
		}


		if (mdev->state == Primary) {
			set_cstate(mdev, mdev->sync_method ?
					 mdev->sync_method : SyncingAll);
			drbd_send_cstate(mdev);
			drbd_thread_start(&mdev->syncer);
		} else if (mdev->o_state == Primary) {
			drbd_send_cmd(drbd_conf+minor,StartSync,0);
		} else err = -ENODATA; /* Suggest a better one! */

		break;

	case DRBD_IOCTL_SECONDARY_REM:
		if (mdev->cstate != Connected) {
			err = -ENXIO;
			break;
		}

		if (mdev->o_state == Primary) {
			drbd_send_cmd(drbd_conf+minor,BecomeSec,0);
		} else err = -ESRCH;

		break;

	default:
		err = -EINVAL;
	}
//out:
	up(&mdev->ctl_mutex);
 out_unlocked:
	return err;
}


