/*
 *	Ohio Trollius
 *	Copyright 1996 The Ohio State University
 *	RBD/NJN
 *
 *	$Id: mpirun.c,v 6.1.1.1 96/12/13 16:08:24 nevin Exp $
 *
 *	Function:	- start an MPI application
 */

#include <lam_config.h>

#include <sys/types.h>
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#include <all_list.h>
#include <all_opt.h>
#include <app_mgmt.h>
#include <app_schema.h>
#include <args.h>
#include <events.h>
#include <freq.h>
#include <kio.h>
#include <mpi.h>
#include <mpisys.h>
#include <net.h>
#include <portable.h>
#include <priority.h>
#include <preq.h>
#include <sfh.h>
#include <terror.h>
#include <typical.h>

/*
 * external functions
 */
extern char		*getworkdir();
extern char		*killname();
extern int		lam_lfopenfd();

/*
 * private functions
 */
static int		pwait();
static int		set_mode();
static int		set_stdio();
static void		reset_stdio();
static LIST		*build_app();
static char		*locate_aschema();
static void		node_ranges();
static void		help();

/*
 * external variables
 */
extern struct kio_t	_kio;			/* kernel I/O block */
extern struct fclient	_ufd[FUMAX];		/* user file desc. */

/*
 * private variables
 */
static int		fl_fdesc;		/* don't open stdio */
static int		fl_nprocs;		/* # procs specified */
static int		fl_verbose;		/* verbose mode */
static int		fl_wait;		/* wait for completion */
static int4		nprocs;			/* # procs */
static int4		rtf;			/* runtime flags */
static char		fmtbuf[512];		/* formatting buffer */
static char		smallbuf[128];		/* small extra buffer */
static OPT		*ad;			/* argument descriptor */

static char		*syntaxerr =
	"mpirun: application schema syntax error, line %d\n";

static char		*usage =
	"mpirun [options] [<app>] [<prog> [<nodes>] [-- <args>]]\n";

int
main(argc, argv)

int			argc;
char			**argv;

{
	LIST		*app;			/* application desc. */
	struct _gps	*world;			/* world GPS array */
	char		*cwd;			/* current working directory */
	int		world_n;		/* size of world */
	int		status;			/* return status */
/*
 * Parse the command line.
 */
        ad = ao_init();
        if (ad == 0) {
		perror("mpirun (ao_init)");
		exit(errno);
	}
	ao_setopt1(ad, "fhtvwDO", 0, 0, 0);
	ao_setopt(ad, "c", 0, 1, AOINT);
	ao_setopt(ad, "c2c", 0, 0, 0);
	ao_setopt1(ad, "ios", 0, 1, 0);
	ao_setopt(ad, "nger", 0, 0, 0);
	ao_setopt(ad, "toff", 0, 0, 0);
	ao_setopt(ad, "ton", 0, 0, 0);

	if (ao_parse(ad, &argc, argv))  {
		fprintf(stderr, usage);
		exit(errno);
	}
/*
 * Check for help request.
 */
	if (ao_taken(ad, "h")) {
		help();
		exit(0);
	}
/*
 * Set the operating mode.
 */
	if (set_mode()) {
		terror("mpirun (set_mode)");
		exit(errno);
	}
/*
 * Attach to kernel.
 */
	if (kinit(PRCMD)) {
		terror("mpirun (kinit)");
		exit(errno);
	}
/*
 * Set job identifier to be inherited by the application.
 */
	_kio.ki_jobid = ((getpid() << 16) | getnodeid());
/*
 * Build the application and allocate the world GPS array.
 */
	if ((app = build_app(argc, argv)) == 0) {
		kexit(errno);
	}

	world_n = al_count(app);
	world = (struct _gps *) malloc(world_n * sizeof(struct _gps));
	if (world == 0) lamfail("mpirun");
/*
 * Pre-qualify the GER and set buffer limitations on each node.
 */
	if (!ao_taken(ad, "nger") && !ao_taken(ad, "c2c")) {
		if (asc_ger(app, TRUE)) kexit(errno);
	}
/*
 * Set application stdio.
 */
	if (set_stdio()) lamfail("mpirun (set_stdio)");
/*
 * Change local working directory.
 */
	if (!ao_taken(ad, "D")) {
		if ((cwd = getworkdir()) == 0) lamfail("mpirun (getworkdir)");

		if (lam_rfchdir(LOCAL, cwd)) lamfail("mpirun (lam_rfchdir)");

		free(cwd);
	}
/*
 * Run the application.
 */
	if (asc_run(app, 0, rtf, fl_verbose, TRUE, world)) {
		reset_stdio();
		kexit(errno);
	}
	reset_stdio();
/*
 * Send process table to all processes.
 */
	if (app_sendprocs(world_n, world)) {
		terror("mpirun");
		app_doom(world_n, world);
		kexit(errno);
	}
/*
 * If needed, wait for the application to terminate.
 */
	status = 0;
	
	if (fl_wait) {
		if (pwait(world_n, &status)) {
			terror("mpirun (pwait)");
			app_doom(world_n, world);
			kexit(errno);
		}
	}

	kexit(status);
	return(0);
}

/*
 *	build_app
 *
 *	Function:	- build application from command line specification
 *	Accepts:	- comamnd line argument count
 *			- command line argument vector
 *	Returns:	- application descriptor or 0
 */
static LIST *
build_app(argc, argv)

int			argc;
char			**argv;

{
	LIST		*app;			/* application */
	LIST		*app_sched;		/* scheduled application */
	int4		nodeid;			/* target node */
	int4		index;			/* for getnode() */
	int4		nflags;			/* for getnode() */
	int		lineno;			/* line number */
	int		argtailc;		/* argument tail count */
	char		**argtailv;		/* argument tail vector */
	char		*aschema;		/* application schema */

	if (nid_parse(&argc, argv) || (errno = (argc > 1) ? 0 : EUSAGE)) {
		fprintf(stderr, usage);
		return(0);
	}
/*
 * Application schema or command line?
 */
	nid_get(&index, &nodeid, &nflags);

	if ((index < 0) && !fl_nprocs) {
/*
 * Parse the app. schema.
 */
		if ((argc != 2) || ao_taken(ad, "s")) {
			fprintf(stderr, usage);
			kexit(EUSAGE);
		}

		aschema = locate_aschema(argv[1]);
		if (aschema == 0) {
			fprintf(stderr, "mpirun (locate_aschema): %s: ",
				argv[1]);
			terror("");
			return(0);
		}

		app = asc_parse(aschema, &lineno);
	}
	else {
/*
 * Parse the "command line" application specification.
 * Reconstruct its relevant parts from the parsed information.
 * It was parsed to distinguish between the appl/cmdline cases.
 *
 * Format: [<nodes>] program [-s <srcnode>] [-c #] [-- args]
 *
 */
		fmtbuf[0] = 0;

		if (index == 0) {
			if (nodeid == LOCAL) nodeid = getnodeid();
			node_ranges(fmtbuf, nodeid, index);
		}

		for (--argc, ++argv; argc > 0; --argc, ++argv) {
			strcat(fmtbuf, *argv);
			strcat(fmtbuf, " ");
		}

		if (ao_taken(ad, "s")) {
			strcat(fmtbuf, "-s ");
			strcat(fmtbuf, ao_param(ad, "s", 0, 0));
			strcat(fmtbuf, " ");
		}
			
		if (fl_nprocs) {
			sprintf(smallbuf, "-c %d ", nprocs);
			strcat(fmtbuf, smallbuf);
		}

		ao_tail (ad, &argtailc, &argtailv);
		if (argtailc > 0) strcat(fmtbuf, "-- ");

		for ( ; argtailc > 0; --argtailc, ++argtailv) {
			strcat(fmtbuf, *argtailv);
			strcat(fmtbuf, " ");
		}
/*
 * Parse the command line.
 */
		app = asc_bufparse(fmtbuf, strlen(fmtbuf), &lineno);
		lineno = 0;
	}
	
	if (app == 0) {
		if (lineno > 0) {
			fprintf(stderr, syntaxerr, lineno);
			errno = EUSAGE;
		} else {
			fprintf(stderr, "mpirun: cannot parse: ");
			terror("");
		}

		return(0);
	}
/*
 * Schedule the application.
 */
	app_sched = asc_schedule(app);
	asc_free(app);

	if (app_sched == 0) {
		terror("mpirun (asc_schedule)");
		return(0);
	} else {
		return(app_sched);
	}
}

/*
 *	pwait
 *
 *	Function:	- waits for processes to complete
 *	Accepts:	- # of processes
 *			- return status of first child in error, else 0
 *	Returns:	- 0 or LAMERROR
 */
static int
pwait(nwait, childstat)

int4			nwait;
int			*childstat;

{
	int4		nodeid;			/* child's node ID */
	int		pid;			/* child's process ID */
	int		status;			/* return status */
	int		first = 1;		/* first error occurred */

	*childstat = 0;

	for ( ; nwait > 0; --nwait) {

		if (rpwait(&nodeid, &pid, &status)) return(LAMERROR);

		if (status) {
			printf("%d exited with status %d\n", pid, status);
			if (first) {
				*childstat = status;
				first = 0;
			}
		}
	}

	return(0);
}

/*
 *	set_mode
 *
 *	Function:	- set the operating mode
 */
static int
set_mode()

{
/*
 * flags
 */
	fl_fdesc = ao_taken(ad, "f");
	fl_verbose = ao_taken(ad, "v");
	fl_wait = ao_taken(ad, "w");
/*
 * followed options
 */
	nprocs = -1;
	fl_nprocs = ao_taken(ad, "c");
	if (fl_nprocs) ao_intparam(ad, "c", 0, 0, &nprocs);
/*
 * runtime flags
 */
	rtf = RTF_MPIRUN;

	if (fl_wait) rtf |= RTF_WAIT;
	if (ao_taken(ad, "D")) rtf |= RTF_APPWD;
	if (ao_taken(ad, "O")) rtf |= RTF_HOMOG;
	if (ao_taken(ad, "c2c")) rtf |= RTF_MPIC2C;
	if (!ao_taken(ad, "nger")) rtf |= RTF_MPIGER;

	if (ao_taken(ad, "t") || ao_taken(ad, "ton") || ao_taken(ad, "toff")) {
		rtf |= RTF_TRACE;
	}

	if (!ao_taken(ad, "toff")) rtf |= RTF_TRSWITCH;

	if (isatty(1)) {
		rtf |= RTF_TTYOUT;
	}
#if HAVE_BSD43_FD_PASSING || HAVE_BSD44_FD_PASSING
	if (!fl_fdesc) rtf |= RTF_IO;
#endif
	return(0);
}

/*
 *	set_stdio
 *
 *	Function:	- set up application stdio
 *	Returns:	- 0 or LAMERROR
 */
static int
set_stdio()

{
	char		server[LAM_PATH_MAX];	/* fd server socket name */
	
	if (fl_fdesc) return(0);
	
#if HAVE_BSD43_FD_PASSING || HAVE_BSD44_FD_PASSING
/*
 * Pass stdin, stdout and stderr to filed.
 */
	if (lam_mktmpid((int) getpid(), server, sizeof(server))) {
		return(LAMERROR);
	}
	
	if (lam_lfopenfd(server)) {
		return(LAMERROR);
	}
/*
 * Set LAM file descriptors to the passed file descriptors.  The call to
 * lam_lfopenfd() takes care of the case when stdin is a tty.
 */
	_kio.ki_stdin = _ufd[0].fu_tfd;
	_kio.ki_stdout = _ufd[1].fu_tfd;
	_kio.ki_stderr = _ufd[2].fu_tfd;
#endif
	return(0);
}

/*
 *	reset_stdio
 *
 *	Function:	- reset stdio so rfatexit will clean it up
 *	Returns:	- 0 or LAMERROR
 */
static void
reset_stdio()

{
	_kio.ki_stdin = 0;
	_kio.ki_stdout = 1;
	_kio.ki_stderr = 2;
}

/*
 *	locate_aschema
 *
 *	Function:	- locate an application schema
 *	Accepts:	- filename
 *	Returns:	- full pathname or NULL
 */
static char *
locate_aschema(filename)

char			*filename;

{
	int		pathc = 0;		/* paths argc */
	char		**pathv = 0;		/* paths argv */
	char		*appdir;		/* application dir */
	char		*fullpath;		/* full pathname */

	if ((appdir = getenv("LAMAPPLDIR"))) {
		argvadd(&pathc, &pathv, appdir);
	}
	argvadd(&pathc, &pathv, "");
	argvadd(&pathc, &pathv, "$LAMHOME/boot");
	argvadd(&pathc, &pathv, "$TROLLIUSHOME/boot");
	argvadd(&pathc, &pathv, DEFPSCHEMA);

	fullpath = sfh_path_find(filename, pathv, R_OK);
	argvfree(pathv);
	return(fullpath);
}

/*
 *	node_ranges
 *
 *	Function:	- fill buffer with list of nodes
 *			- compact nodes to ranges
 *			- formatting buffer assumed large enough
 *	Accepts:	- formatting buffer
 *			- initial node
 *			- initial index
 */
static void
node_ranges(buf, initnode, initindex)

char			*buf;
int4			initnode;
int4			initindex;

{
	int4		node;			/* target node */
	int4		index;			/* for getnode() */
	int4		flags;			/* for getnode() */
	int4		next;			/* next expected node */
	int		fl_flush = 0;		/* flush node flag */

	sprintf(smallbuf, "n%d", initnode);
	strcat(buf, smallbuf);
/*
 * Loop over the nodes.
 */
	next = initnode + 1;

	nid_get(&index, &node, &flags);

	while (index != initindex) {
		if (node != next) {
			if (next > initnode + 1) {
/*
 * We have a "hole" ending a range of length >= 2.
 */
				sprintf(smallbuf, "-%d,%d", next - 1, node);
				strcat(buf, smallbuf);
			}
			else {
/*
 * We have a "hole" ending a range of length 1.
 */
				sprintf(smallbuf, ",%d", node);
				strcat(buf, smallbuf);
			}
			initnode = node;
			next = node + 1;
			fl_flush = 0;
		}
/*
 * Otherwise, keep incrementing.
 */
		else {
			++next;
			fl_flush = 1;
		}

		nid_get(&index, &node, &flags);
	}
/*
 * If we were still in a range, flush it out.
 */
	if (fl_flush) {
		sprintf(smallbuf, "-%d", next - 1);
		strcat(buf, smallbuf);
	}

	strcat(buf, " ");
}

/*
 *	help
 *
 *	Function:	- prints helpful information on this command
 */
static void
help()

{
	printf(
"\nSynopsis:\tmpirun [options] [<app>] [<prog> [<nodes>] [-- <args>]]\n");

	printf("\nDescription:\tStart an MPI application on LAM.\n");
	printf("\nOptions:\t-h\t\tPrint this help message.\n");
	printf("\t\t-c2c\t\tUse fast library (C2C) mode.\n");
	printf("\t\t-f\t\tDo not open stdio descriptors.\n");
	printf("\t\t-nger\t\tTurn off GER mode.\n");
	printf("\t\t-toff\t\tEnable tracing with generation off initially.\n");
	printf("\t\t-ton\t\tEnable tracing with generation on initially.\n");
	printf("\t\t-v\t\tBe verbose.\n");
	printf("\t\t-w\t\tWait for application to complete.\n");
	printf("\t\t-D\t\tSet current working directory to pathname.\n");
	printf("\t\t-O\t\tMulticomputer is homogeneous.\n");
	printf("\t\t-c <num>\tRun <num> copies of <prog>.\n");
	printf("\t\t-s <nodeid>\tLoad <prog> from this source node.\n");
	mnusage();
	printf("\t\th (local), o (origin), N (all)\n");
	printf("\nExample:        mpirun prog1 n0-7\n");
	printf("\t\t\t\"Execute `prog1' on nodes 0 thru 7.\"\n");
}
