/***********************************************************************
* DOLIB/DONIO Version 0.0 (8/24/94)                                    *
*  Software to emulate shared memory on distributed memory environments*
* written by:                                                          *
*  Ed D'Azevedo and Charles Romine of Oak Ridge National Laboratory    *
*                                                                      *
* Questions and comments should be directed to                         *
*      efdazedo@msr.epm.ornl.gov or romine@msr.epm.ornl.gov            *
*                                                                      *
*  Please notify and acknowledge the authors in any research or        *
*  publications utilizing DOLIB/DONIO or any part of the code.         *
*                                                                      *
* NOTICE: Neither the institution nor the author make any              *
*  representations about the suitability of this software for any      *
*  purpose. This software is provided "as is", without express or      *
*  implied warranty.                                                   *
************************************************************************/

#include "stdinc.h"
#include "message.h"

#include <stdio.h>
#include <sys/types.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>

#define STRDUP( cdest, path ) { \
	MEMALLOC( cdest, (char *), (strlen( path ) + 1) ); \
	strcpy( cdest, path ); \
	}



#if defined(USE_INTEGER)

/*
 * if all i/o requests are integers,real*4,real*8, then using base type to be
 * "integer" gives better performance
 */

typedef unsigned int UINTEGER;
typedef UINTEGER BASE_TYPE;
#define BASE_TYPE_STRING "integer"

#else

/*
 * if some i/o request need to seek to arbitrary places or i/o block size is
 * not a multiple of sizeof(integer), then we must use base type of "char" to
 * be correct.
 */

typedef unsigned char BYTE;
typedef BYTE    BASE_TYPE;
#define BASE_TYPE_STRING "char"

#endif				/* if USE_INTEGER */

/* use read/write instead of cread/cwrite for simplicity */
#define LSEEK lseek
#define CLOSE close



#if NX || I860 || PGON || PARAGON
	/* use asynchronous read write */
#include <nx.h>

#define READ(fd, buf, nbytes )  \
	nxread( (int) (fd), (char *) (buf), (unsigned int) (nbytes) )

#define WRITE(fd, buf, nbytes ) \
	nxwrite( (int) (fd), (char *) (buf), (unsigned int) (nbytes) )


static
int nxread( int fd, char *buf, unsigned int nbytes )
{
	long itag;

	itag = iread( fd, buf, nbytes );
	while (!iodone(itag)) {
	   INTERRUPT_CHECK();
	   };
	return( nbytes );
}


static
int nxwrite( int fd, char *buf, unsigned int nbytes )
{
        long itag;

        itag = iwrite( fd, buf, nbytes );
        while (!iodone(itag)) {
           INTERRUPT_CHECK();
           };
        return( nbytes );
}

static int ispfs(int fd ) {
#include <sys/mount.h>

	/* check if the file descriptor points to /pfs */

	struct statfs buffer;
	
	fstatfs( fd, &buffer );
	
	return( buffer.f_type == MOUNT_PFS );
}

	

	/* NX lsize does not work on NFS files */

#define LSIZE(fd, max_size) { \
	if (ispfs(fd)) { lsize( (fd), (off_t) (max_size), SIZE_SET ); } \
	}





 /* take advantage of new M_ASYNC mode for faster disk i/o */
#if defined(M_ASYNC)

/* some old paragon systems may not have performed the upgrade */

#define READ_OPEN( path, flags ) \
    gopen( (path), (flags), M_ASYNC, (mode_t) 0) 

#define WRITE_OPEN( path, flags, mode ) \
	gopen( (path), (flags), M_ASYNC, (mode_t) (mode) )

#endif /* defined(M_ASYNC) */


	

#else

	/* use plain UNIX primitives */

#define READ read
#define WRITE write


#endif

#if !defined(READ_OPEN)
#define READ_OPEN open
#endif



#if !defined(WRITE_OPEN)
#define WRITE_OPEN open
#endif

#if !defined(LSIZE)
#define LSIZE(fd, max_size) 
#endif









/* Intel OSF uses 8Kbytes for paging */
#define HARDWARE_PAGE_SIZE_IN_BYTES (8*1024)

#if !defined(DEFAULT_PAGE_SIZE_IN_BYTES)
#define DEFAULT_PAGE_SIZE_IN_BYTES (1*(HARDWARE_PAGE_SIZE_IN_BYTES))
#endif

#if !defined(DEFAULT_PAGE_SIZE)
#define DEFAULT_PAGE_SIZE (DEFAULT_PAGE_SIZE_IN_BYTES/sizeof(BASE_TYPE))
#endif

#if !defined(DEFAULT_BLOCK_SIZE)
#define DEFAULT_BLOCK_SIZE 1
#endif


#if defined(USE_FAST_IO)
	/* configure i/o buffer and io nodes for fast i/o */

#if !defined(MAX_IO_NODES)
#define MAX_IO_NODES 1024
#endif

#if !defined(RIOBUFSIZE_IN_BYTES)
#define RIOBUFSIZE_IN_BYTES ((DEFAULT_PAGE_SIZE_IN_BYTES)*64)
#endif

#if !defined(WIOBUFSIZE_IN_BYTES)
#define WIOBUFSIZE_IN_BYTES ((DEFAULT_PAGE_SIZE_IN_BYTES)*64)
#endif

#else


	/* configure i/o buffer to save memory */

#if !defined(MAX_IO_NODES)
#define MAX_IO_NODES 4
#endif

#if !defined(RIOBUFSIZE_IN_BYTES)
#define RIOBUFSIZE_IN_BYTES ((DEFAULT_PAGE_SIZE_IN_BYTES)*8)
#endif

#if !defined(WIOBUFSIZE_IN_BYTES)
#define WIOBUFSIZE_IN_BYTES ((DEFAULT_PAGE_SIZE_IN_BYTES)*8)
#endif

#endif /* defined(USE_FAST_IO) */


extern void do_disable(int Iaf);

extern int do_bgather(int Iaf, int nsize, int index_start, void *buf);

#define DOBSCATTER do_bscatter
extern void do_bscatter(int Iaf, int nsize, int index_start, void *buf);

#define DOBGATHER(Iaf,nitems,istart,buf) {\
	do_wait(do_bgather(Iaf,nitems,istart,buf)); }

extern void do_wait(int Iaf);

extern int      myid;
extern int      nproc;

extern void
do_declare(int *Iaf, char *name, int gsize,
	   char *ctype, int page_size, int block_size);

extern int do_setchsize(int npages);
extern void do_destroy(int Iaf);
extern void do_init(int param_myid, int param_nproc);

typedef enum {
  NO_TYPE = 0,
  READ_ONLY,
  WRITE_ONLY,
  READ_WRITE
} file_type;

typedef struct {
  int             simulated_file_pointer;	/* in bytes */
  int             gsize;	/* global size of file in BASE_TYPE */
  int             gmax_index;	/* max index accessed in array */
  int             Iaf;	/* distributed array */
  file_type       ftype;
  unsigned int    creat_mode;	/* mode to use if creating the file */
  char           *file_name;	/* full pathname of associated disk
				 * file */
  logical         file_touched;	/* has do_write been called on this
				 * file? */
} do_file_node;

#define MAX_FILES 1024

do_file_node do_file_array[MAX_FILES + 1];

#define IS_SLOT_EMPTY(do_fd) \
((do_file_array[do_fd].simulated_file_pointer == 0) && \
 (do_file_array[do_fd].gsize == 0) && \
(do_file_array[do_fd].gmax_index == 0) && \
(do_file_array[do_fd].Iaf == 0) && \
(do_file_array[do_fd].ftype == NO_TYPE) && \
(do_file_array[do_fd].creat_mode == 0) && \
(do_file_array[do_fd].file_name == NULL) && \
(do_file_array[do_fd].file_touched == FALSE))

#define EMPTY_SLOT(do_fd) \
{ \
    do_file_array[do_fd].simulated_file_pointer = 0; \
    do_file_array[do_fd].gsize = 0; \
    do_file_array[do_fd].gmax_index = 0; \
    do_file_array[do_fd].Iaf = 0; \
    do_file_array[do_fd].ftype = NO_TYPE; \
    do_file_array[do_fd].creat_mode = 0; \
    if (do_file_array[do_fd].file_name != NULL) { \
      MEMFREE(do_file_array[do_fd].file_name); \
      do_file_array[do_fd].file_name = NULL; \
    } \
    do_file_array[do_fd].file_touched = FALSE; \
}

#define IS_READONLY(do_fd) \
(do_file_array[do_fd].ftype == READ_ONLY)

#define IS_WRITEONLY(do_fd) \
(do_file_array[do_fd].ftype == WRITE_ONLY)

#define IS_READWRITE(do_fd) \
(do_file_array[do_fd].ftype == READ_WRITE)

#define IS_READABLE(do_fd) \
(IS_READONLY(do_fd) || IS_READWRITE(do_fd))

#define IS_WRITABLE(do_fd) \
(IS_WRITEONLY(do_fd) || IS_READWRITE(do_fd))

int
do_diskopen(int do_fd, int lsizebytes)
{
  logical         is_io_node, is_io_node0, is_empty, is_done, is_valid;
  int             gsize, Iaf, ierr, fd, offset;
  int             i, total_bytes, nbytes;

  int             fsize, jstart, jend, jsize, buf_ptr;

  /* BUFSIZE in term of number of BASE_TYPE, NOT necessarily bytes */


#define RIOBUFSIZE (RIOBUFSIZE_IN_BYTES/sizeof(BASE_TYPE))

  int             iostart[MAX_IO_NODES + 1];
  int             mystrt, myend, nio_nodes;

  int             page_size;
  int             block_size;

  int             nitems, istart, iend;
  int             flags;
  char           *path;

  INTERRUPT_CHECK();

  nio_nodes = max(1, min(MAX_IO_NODES, nproc));
  is_io_node = (myid < nio_nodes);
  is_io_node0 = (myid == 0);

  /* the file should already exist, it may be a 0 byte file */
  /* hence O_CREAT flag should not be necessary ! */

  is_valid = (1 <= do_fd) && (do_fd <= MAX_FILES);
  ASSERT(is_valid, "do_diskopen(): do_fd of %d out of range\n", do_fd);

  is_empty = IS_SLOT_EMPTY(do_fd);
  ASSERT(!is_empty, "do_diskopen(): no empty slot found\n", do_fd);

  if (IS_READONLY(do_fd)) {
    flags = O_RDONLY;
  } else if (IS_READWRITE(do_fd)) {
    flags = O_RDWR;
  } else {
    ASSERT(FALSE, "do_diskopen(): file modes do not contain READ\n", 0); 
	/* do_diskopen should not be called */
  };

  path = do_file_array[do_fd].file_name;
  if (is_io_node) {

    fd = READ_OPEN(path, flags);
    ASSERT(fd >= 0, 
	"do_diskopen(): Unable to open file %s for reading\n", path);
    /* discover size of file */

    offset = 0;

    /* size of file is "total_bytes" */

    total_bytes = LSEEK(fd, offset, SEEK_END);
    ASSERT(MOD(total_bytes, sizeof(BASE_TYPE)) == 0, 
	"do_diskopen(): total_bytes %d not a multiple of sizeof(BASE_TYPE)\n", 
	total_bytes);

    /* reset file pointer to beginning */

    offset = 0;
    ierr = LSEEK(fd, offset, SEEK_SET);
    ASSERT(ierr >= 0, "do_diskopen(): LSEEK failed\n", 0);
  } /* end if (is_io_node) */

  gsize = 0;
  if (is_io_node0) {
    gsize = max(lsizebytes, total_bytes) / sizeof(BASE_TYPE);
    ASSERT(MOD(max(lsizebytes, total_bytes), sizeof(BASE_TYPE)) == 0, 
	"do_diskopen(): size not a multiple of sizeof(BASE_TYPE)\n", 0);
  };

  GISUM(&gsize, 1);
  ASSERT(gsize > 0, "do_diskopen(): gsize of %d out of range\n", gsize);


  /* allocate distributed global array */

  page_size = DEFAULT_PAGE_SIZE;
  block_size = DEFAULT_BLOCK_SIZE;


  do_declare(&Iaf, path, gsize,
	     BASE_TYPE_STRING, page_size, block_size);

  /* disable cache for extra safety */
  do_disable(Iaf);

  /* stuff essential information into structure */
  do_file_array[do_fd].gsize = gsize;
  do_file_array[do_fd].Iaf = Iaf;

  /* read in file */

  if (is_io_node) {

    BASE_TYPE buf[RIOBUFSIZE];

    fsize = total_bytes / sizeof(BASE_TYPE);
    if (fsize > 0) {
      iostart[0] = 0;		/* iostart store C index 0..(fsize-1) */
      for (i = 1; i <= (nio_nodes - 1); i++) {
	iostart[i] = iostart[i - 1] + (fsize / nio_nodes);
      };
      iostart[nio_nodes] = fsize;

      mystrt = iostart[myid];
      is_valid = ((0 <= mystrt) && (mystrt < fsize));
      ASSERT(is_valid, "do_diskopen(): mystrt of %d out of range\n", mystrt);

      myend = iostart[myid + 1] - 1;
      is_valid = ((0 <= myend) && (myend < fsize));
      ASSERT(is_valid, "do_diskopen(): myend of %d out of range\n", myend);

      istart = mystrt;
      iend = min(myend, istart + RIOBUFSIZE - 1);
      for (;;) {
	is_done = (istart > iend);
	if (is_done) {
	  break;
	};

	is_valid = ((0 <= istart) && (istart < fsize));
	ASSERT(is_valid, "do_diskopen(): istart of %d out of range\n", istart);

	is_valid = ((0 <= iend) && (iend < fsize));
	ASSERT(is_valid, "do_diskopen(): iend of %d out of range\n", iend);

	nitems = (iend - istart + 1);
	nbytes = nitems * sizeof(BASE_TYPE);

	offset = istart * sizeof(BASE_TYPE);
	ierr = LSEEK(fd, offset, SEEK_SET);
	ASSERT(ierr >= 0, "do_diskopen(): LSEEK failed\n", 0);

	ierr = READ(fd, (char *) &(buf[0]), (unsigned int) nbytes);
	ASSERT(ierr >= 0, "do_diskopen(): READ failed\n", 0);

	DOBSCATTER(Iaf, nitems, istart, (void *)buf);

	istart = iend + 1;
	iend = min(myend, istart + RIOBUFSIZE - 1);
      };			/* end for */
    };				/* if (fsize) */

    ierr = CLOSE(fd);
    ASSERT(ierr >= 0, "do_diskopen(): Unable to close file\n", 0);
  };				/* end if (is_io_node) */

  GSYNC();

  return (do_fd);
}


int
do_lseek(int do_fd, int offset, int whence)
{

  logical         is_valid, is_empty;
  int             total_bytes;

  INTERRUPT_CHECK();

  is_valid = ((1 <= do_fd) && (do_fd <= MAX_FILES));
  ASSERT(is_valid, "do_lseek(): do_fd of %d out of range\n", do_fd);

  is_empty = IS_SLOT_EMPTY(do_fd);
  ASSERT(!is_empty, "do_lseek(): do_fd of %d indexes empty slot\n", do_fd);

  is_valid = ((whence == SEEK_SET) ||
	      (whence == SEEK_CUR) ||
	      (whence == SEEK_END));
  ASSERT(is_valid, "do_lseek(): value of whence invalid\n", 0);

  if (whence == SEEK_SET) {
    do_file_array[do_fd].simulated_file_pointer = offset;
  } else if (whence == SEEK_CUR) {
    do_file_array[do_fd].simulated_file_pointer += offset;
  } else if (whence == SEEK_END) {
    total_bytes = do_file_array[do_fd].gsize * sizeof(BASE_TYPE);
    do_file_array[do_fd].simulated_file_pointer = total_bytes + offset;
  };

  ASSERT(do_file_array[do_fd].simulated_file_pointer >= 0, "do_lseek(): file pointer of %d out of range\n", do_file_array[do_fd].simulated_file_pointer);

  return (do_file_array[do_fd].simulated_file_pointer);
}


int
do_read(int do_fd, void *buf, int nbytes)
{
  logical         is_valid, is_empty, is_readable;
  int             nitems, iremain;
  int             istart, iend;
  int             mystart, myend, buf_ptr;

  int             gsize, Iaf;
  int             simulated_file_pointer;
  char           *file_name;

  BASE_TYPE      *ibuf;

  INTERRUPT_CHECK();

  ibuf = (BASE_TYPE *) buf;

  is_valid = ((1 <= do_fd) && (do_fd <= MAX_FILES));
  ASSERT(is_valid, "do_read(): do_fd of %d out of range\n", do_fd);

  gsize = do_file_array[do_fd].gsize;
  Iaf = do_file_array[do_fd].Iaf;
  simulated_file_pointer = do_file_array[do_fd].simulated_file_pointer;
  file_name = do_file_array[do_fd].file_name;

  is_empty = IS_SLOT_EMPTY(do_fd);
  ASSERT(!is_empty, "do_read(): do_fd of %d indexes empty slot\n", do_fd);

  is_readable = IS_READABLE(do_fd);
  ASSERT(is_readable, "do_read(): file indexed by do_fd of %d not readable\n", do_fd);

  nitems = nbytes / sizeof(BASE_TYPE);
  iremain = nbytes - nitems * sizeof(BASE_TYPE);
  ASSERT(iremain == 0, "do_read(): alignment mismatch\n", 0);

  is_valid = (MOD(simulated_file_pointer, sizeof(BASE_TYPE)) == 0);
  ASSERT(is_valid, "do_read(): mismatch with sizeof(BASE_TYPE)\n", 0);

  mystart = (simulated_file_pointer / sizeof(BASE_TYPE));
  myend = mystart + nitems - 1;

  is_valid = ((0 <= mystart) && (mystart < gsize));
  ASSERT(is_valid, "do_read(): mystart of %d out of range\n", mystart);

  is_valid = ((0 <= myend) && (myend < gsize));
  ASSERT(is_valid, "do_read(): myend of %d out of range\n", myend);

  /* keep track of largest index access */

  do_file_array[do_fd].gmax_index = max(do_file_array[do_fd].gmax_index, myend);

  DOBGATHER(Iaf, nitems, mystart, ibuf);

  do_file_array[do_fd].simulated_file_pointer = simulated_file_pointer + nbytes;

  return (nbytes);
}


int
do_rclose(int do_fd)
{
  logical         is_valid, is_empty;
  int             Iaf;

  is_valid = (1 <= do_fd) && (do_fd <= MAX_FILES);
  ASSERT(is_valid, "do_rclose(): do_fd of %d out of range\n", do_fd);

  is_empty = IS_SLOT_EMPTY(do_fd);
  ASSERT((!is_empty), "do_rclose(): do_fd of %d indexes empty slot\n", do_fd);

  Iaf = do_file_array[do_fd].Iaf;

  GSYNC();
  do_destroy(Iaf);

  EMPTY_SLOT(do_fd);

  return (0);
}

int
do_open(char *path, int flags, int mode)
{

  /*
   * do_open will find an empty file slot, and initialize:
   * simulated_file_pointer, file_type, creat_mode and file_name. The
   * other records(gsize and Iaf) must be initialized by do_lsize, or
   * do_diskopen which must also allocate the distributed array with
   * do_declare.
   */

  logical         found, is_valid;
  logical         is_readonly, is_writeonly, is_readwrite;
  logical         is_io_node0;
  int             lsizebytes;

  int             do_fd, fd;

  file_type       fflag;
  int             i;

  extern void     do_enable(int Iaf);
  extern int      myid;

  /* ==================== */
  INTERRUPT_CHECK();

  is_io_node0 = (myid == 0);

  /* allocate structure block for file */

  found = FALSE;
  for (i = 1; i <= MAX_FILES; i++) {
    found = IS_SLOT_EMPTY(i);
    if (found) {
      do_fd = i;
      break;
    }
  }

  ASSERT(found, "do_open(): empty slot not found\n", 0);

  is_readonly = (flags == O_RDONLY);
  is_writeonly = (flags == O_WRONLY) || (flags == (O_WRONLY | O_CREAT));
  is_readwrite = (flags == O_RDWR) || (flags == (O_RDWR | O_CREAT));
  is_valid = (is_readonly || is_writeonly || is_readwrite);
  ASSERT(is_valid, "do_open(): invalid flags\n", 0);

  if (is_readonly) {
    fflag = READ_ONLY;
  } else if (is_writeonly) {
    fflag = WRITE_ONLY;
  } else if (is_readwrite) {
    fflag = READ_WRITE;
  } else {
    ASSERT(FALSE, "do_open(): invalid mode flags\n", 0); /* impossible */
  };
  do_file_array[do_fd].ftype = fflag;

  do_file_array[do_fd].simulated_file_pointer = 0;
  do_file_array[do_fd].creat_mode = mode;

  /* duplicated path (string) into file_name */
  STRDUP(do_file_array[do_fd].file_name, path);

  GSYNC();

  if (IS_WRITABLE(do_fd)) {

    /* touch a file, thereby creating a zero byte file if it */
    /* does not already exist */


    /* the O_CREAT and mode are used here if the file does */
    /* not already exist */

    if (is_io_node0) {
	/* use plain unix open, only one processor doing */

      fd = open(path, flags, mode);
      ASSERT(fd >= 0, "do_rwopen(): Unable to open file %s\n", path);
      close(fd);
    };				/* if (is_io_node0) */
  };				/* end if  */

  GSYNC();
  if (IS_READONLY(do_fd)) {
    /* no need to wait for lsize */

    do_diskopen(do_fd, lsizebytes = 0);
    do_enable(do_file_array[do_fd].Iaf);

  };

  GSYNC();
  return (do_fd);
}

int
do_flush(int do_fd)
{
  /*
   * open the file on disk and write out the data that have been
   * accumulated in the simulated file.
   */

  logical         is_valid, is_empty, is_writable, is_modified;

  int             gmax_index, max_gsize;

  int             jstart, jend, jsize, buf_ptr;


#define WIOBUFSIZE (WIOBUFSIZE_IN_BYTES/sizeof(BASE_TYPE))

  int             simulated_file_pointer;
  int             fd;
  int             Iaf;
  int             gsize;
  int             creat_mode;
  char           *file_name;

  int             iostart[MAX_IO_NODES + 1];
  int             offset;
  int             ierr;
  int             i;
  int             istart, iend;
  int             mystart;
  int             myend;
  int             nio_nodes;

  int             nitems, nbytes;
  int             totalbytes;

  logical         is_io_node;
  logical         is_io_node0;

  INTERRUPT_CHECK();

  nio_nodes = max(1, min(MAX_IO_NODES, nproc));
  is_io_node = (myid < nio_nodes);
  is_io_node0 = (myid == 0);

  is_valid = ((1 <= do_fd) && (do_fd <= MAX_FILES));
  ASSERT(is_valid, "do_flush(): do_fd of %d out of range\n", do_fd);

  is_empty = IS_SLOT_EMPTY(do_fd);
  ASSERT(!is_empty, "do_flush(): do_fd of %d indexes empty slot\n", do_fd);

  simulated_file_pointer = do_file_array[do_fd].simulated_file_pointer;
  gsize = do_file_array[do_fd].gsize;
  Iaf = do_file_array[do_fd].Iaf;
  creat_mode = do_file_array[do_fd].creat_mode;
  file_name = do_file_array[do_fd].file_name;

  is_modified = (do_file_array[do_fd].file_touched == TRUE);
  /* for safety, synchronize before doing global operations */

  GSYNC();
  GIMAX(is_modified);		/* all must agree whether file is modified */

  is_writable = IS_WRITABLE(do_fd);
  ASSERT(is_writable, "do_flush(): file indexed by %d not writable\n", do_fd);

  ASSERT(gsize > 0, "do_flush(): gsize of %d out of range\n", gsize);
  ASSERT(Iaf > 0, "do_flush(): Iaf of %d out of range\n", Iaf);

  totalbytes = gsize * sizeof(BASE_TYPE);
  is_valid = (0 <= simulated_file_pointer) &&
    (simulated_file_pointer <= totalbytes);
  ASSERT(is_valid, "do_flush(): simulated_file_pointer of %d out of range\n", simulated_file_pointer);

  /*
   * if the file has not been modified, we don't need to write it to
   * disk, we just need to be sure to GSYNC and destroy the cached copy
   */

  if (is_modified) {

    /* first truncate file to 0 bytes */
    /* note we must guarantee this is done before any open */
    /* assume GIMAX(:) is doing an implicit synchronization */

    if (is_io_node0) {

	/* use unix open, only 1 processor doing */

      fd = open(file_name, O_TRUNC | O_WRONLY | O_CREAT, creat_mode);
      ASSERT(fd >= 0, 
		"do_flush(): Unable to open file %s for writing\n", file_name);
      close(fd);
    }
    /*
     * determine actual array size as maximum index actually
     * accessed.  Synchronize before global operation.
     */

    GSYNC();
    GIMAX(do_file_array[do_fd].gmax_index);

    gmax_index = do_file_array[do_fd].gmax_index;
    is_valid = (0 <= gmax_index) && (gmax_index < gsize);
    ASSERT(is_valid, "do_flush(): gmax_index of %d out of range\n", gmax_index);

	/* plus 1 to convert from C index to number of entries */
    max_gsize = min(1+gmax_index, gsize);

    /* open disk file */

    is_valid = (file_name != NULL);
    ASSERT(is_valid, "do_flush(): file_name is NULL\n", 0);

    if (is_io_node) {

      BASE_TYPE buf[WIOBUFSIZE];

      fd = WRITE_OPEN(file_name, O_WRONLY | O_CREAT, creat_mode);
      ASSERT(fd >= 0, 
	"do_flush(): Unable to open file %s for writing\n", file_name);

      LSIZE( fd, max_gsize );

      /* write out file */

      offset = 0;
      ierr = LSEEK(fd, offset, SEEK_SET);
      ASSERT(ierr >= 0, "do_flush(): LSEEK failed\n", 0);
      iostart[0] = 0;		/* iostart stores C index 0..(max_gsize-1) */
      for (i = 1; i <= (nio_nodes - 1); i++) {
	iostart[i] = iostart[i - 1] + (max_gsize / nio_nodes);
      }
      iostart[nio_nodes] = max_gsize;

      mystart = iostart[myid];
      is_valid = ((0 <= mystart) && (mystart < max_gsize));
      ASSERT(is_valid, "do_flush(): mystart of %d out of range\n", mystart);

      myend = iostart[myid + 1] - 1;
      is_valid = (0 <= myend) && (myend < max_gsize);
      ASSERT(is_valid, "do_flush(): myend of %d out of range\n", myend);

      istart = mystart;
      iend = min(myend, istart + WIOBUFSIZE - 1);

      offset = istart * sizeof(BASE_TYPE);
      ierr = LSEEK(fd, offset, SEEK_SET);
      ASSERT(ierr >= 0, "do_flush(): LSEEK failed\n", 0);

      while (istart <= iend) {
	is_valid = ((0 <= istart) && (istart < max_gsize));
	ASSERT(is_valid, "do_flush(): istart of %d out of range\n", istart);

	is_valid = ((0 <= iend) && (iend < max_gsize));
	ASSERT(is_valid, "do_flush(): iend of %d out of range\n", iend);

	nitems = (iend - istart + 1);
	nbytes = nitems * sizeof(BASE_TYPE);

	DOBGATHER(Iaf, nitems, istart, &(buf[0]));

	/*
	 * now we've got the info, stuff it onto the
	 * disk file
	 */

	ierr = WRITE(fd, (char *) (&(buf[0])), (unsigned int) nbytes);
	ASSERT(ierr >= 0, "do_flush(): write failed\n", 0);

	istart = iend + 1;
	iend = min(myend, istart + WIOBUFSIZE - 1);
      }	/* end while */

      ierr = CLOSE(fd);
      ASSERT(ierr >= 0, "do_flush(): close failed\n", 0);
    }
  } /* end if is_modified */
  GSYNC();

  /* disk file has now been updated, so reset file_touched flag */
  do_file_array[do_fd].file_touched = FALSE;

  return (0);
}

int
do_wclose(int do_fd)
{
  logical         is_valid, is_empty;
  int             Iaf;
  int             ierr;

  GSYNC();

  is_valid = ((1 <= do_fd) && (do_fd <= MAX_FILES));
  ASSERT(is_valid, "do_wclose(): do_fd of %d out of range\n", do_fd);

  is_empty = IS_SLOT_EMPTY(do_fd);
  ASSERT(!is_empty, "do_wclose(): do_fd of %d indexes empty slot\n", do_fd);

  ierr = do_flush(do_fd);
  ASSERT(ierr == 0, "do_wclose(): do_flush of file indexed by %d failed\n", do_fd);

  Iaf = do_file_array[do_fd].Iaf;
  do_destroy(Iaf);

  EMPTY_SLOT(do_fd);

  return (do_fd);
}

int
do_lsize(int do_fd, int nbytes)
{
  logical         is_valid, is_writable, is_readonly, is_empty;
  int             lsizebytes, gsize;

  int             page_size, block_size;
  int             Iaf;
  char           *path;

  /* file must be writable */

  INTERRUPT_CHECK();

  is_valid = ((1 <= do_fd) && (do_fd <= MAX_FILES));
  ASSERT(is_valid, "do_lsize(): do_fd of %d out of range\n", do_fd);

  /* verify that do_fd has been allocated */

  is_empty = IS_SLOT_EMPTY(do_fd);
  ASSERT(!is_empty, "do_lsize(): do_fd of %d indexes empty slot\n", do_fd);

  is_readonly = IS_READONLY(do_fd);
  ASSERT(!is_readonly, "do_lsize(): file indexed by %d is read-only\n", do_fd);

  is_writable = IS_WRITABLE(do_fd);
  ASSERT(is_writable, "do_lsize(): file indexed by %d not writable\n", do_fd);

  /* verify that nbytes is valid */

  ASSERT(nbytes > 0, "do_lsize(): nbytes of %d not valid\n", nbytes);
  is_valid = (MOD(nbytes, sizeof(BASE_TYPE)) == 0);
  ASSERT(is_valid, "do_lsize(): alignment mismatch\n", 0);

  if (IS_READABLE(do_fd)) {
    do_diskopen(do_fd, lsizebytes = nbytes);
  } else if (IS_WRITEONLY(do_fd)) {

    /* still have to allocate global array */
    /* but do no need to initialize gloabl array  */
    /* by reading in the disk file */

    gsize = nbytes / sizeof(BASE_TYPE);
    ASSERT(MOD(nbytes, sizeof(BASE_TYPE)) == 0, "do_lsize(): alignment mismatch\n", 0);

    path = do_file_array[do_fd].file_name;
    page_size = DEFAULT_PAGE_SIZE;
    block_size = DEFAULT_BLOCK_SIZE;

    do_declare(&Iaf, path, gsize, BASE_TYPE_STRING,
	       page_size, block_size);


    do_file_array[do_fd].Iaf = Iaf;
    do_file_array[do_fd].gsize = gsize;
  } else {
    ASSERT(FALSE, "do_lsize(): Invalid file mode\n", 0);
  };


  /* writable files, MUST disable cache */
  do_disable(do_file_array[do_fd].Iaf);

  return (nbytes);
}

int
do_write(int do_fd, void *buf, int nbytes)
{
  logical         is_valid, is_empty, is_writable;

  int             nitems;
  int             ip, istart, iend, mystart, myend, buf_ptr;

  int             gsize, Iaf, totalbytes;
  int             simulated_file_pointer;

  BASE_TYPE      *ibuf;

  INTERRUPT_CHECK();

  ibuf = (BASE_TYPE *) buf;

  is_valid = ((1 <= do_fd) && (do_fd <= MAX_FILES));
  ASSERT(is_valid, "do_write(): do_fd of %d out of range\n", do_fd);

  is_empty = IS_SLOT_EMPTY(do_fd);
  ASSERT(!is_empty, "do_write(): do_fd of %d indexes empty slot\n", do_fd);

  is_writable = IS_WRITABLE(do_fd);
  ASSERT(is_writable, "do_write(): file indexed by %d not writable\n", do_fd);

  /* verify that do_lsize has been called, otherwise error */

  is_valid = (do_file_array[do_fd].gsize != 0);
  ASSERT(is_valid, "do_write(): do_lsize has not been called\n", 0);

  Iaf = do_file_array[do_fd].Iaf;

  /* make certain the write will not extend past end of file */

  gsize = do_file_array[do_fd].gsize;
  totalbytes = gsize * sizeof(BASE_TYPE);

  simulated_file_pointer = do_file_array[do_fd].simulated_file_pointer;
  is_valid = ((simulated_file_pointer + nbytes) <= totalbytes);
  ASSERT(is_valid, "do_write(): attempt to write past end of file %d set by do_lsize\n", totalbytes);

  /* make sure number of bytes preserves alignment */

  is_valid = (MOD(simulated_file_pointer, sizeof(BASE_TYPE)) == 0);
  ASSERT(is_valid, "do_write(): alignment mismatch\n", 0);
  if (nbytes == 0) return(0);
  ASSERT(nbytes > 0, "do_write(): nbytes of %d out of range\n", nbytes);
  is_valid = (MOD(nbytes, sizeof(BASE_TYPE)) == 0);
  ASSERT(is_valid, "do_write(): alignment mismatch\n", 0);

  nitems = nbytes / sizeof(BASE_TYPE);

  mystart = (simulated_file_pointer / sizeof(BASE_TYPE));
  is_valid = ((0 <= mystart) && (mystart < gsize));
  ASSERT(is_valid, "do_write(): mystart of %d out of range\n", mystart);

  nitems = nbytes / sizeof(BASE_TYPE);
  myend = mystart + nitems - 1;
  is_valid = ((0 <= myend) && (myend < gsize));
  ASSERT(is_valid, "do_write(): myend of %d out of range\n", myend);

  /* keep track of largest index access */
  do_file_array[do_fd].gmax_index =
    max(myend, do_file_array[do_fd].gmax_index);

  DOBSCATTER(Iaf, nitems, mystart, &(ibuf[0]));

  do_file_array[do_fd].simulated_file_pointer =
    simulated_file_pointer + nbytes;

  /* mark that the file has been modified */

  if (nbytes > 0) {
    do_file_array[do_fd].file_touched = TRUE;
  }
  return (nbytes);
}


int
do_close(int do_fd)
{
  GSYNC();

  if (IS_READONLY(do_fd)) {
    return (do_rclose(do_fd));
  } else if (IS_WRITABLE(do_fd)) {
    return (do_wclose(do_fd));
  }
  return (0);
}
