/***********************************************************************
* DOLIB/DONIO Version 0.0 (8/24/94)                                    *
*  Software to emulate shared memory on distributed memory environments*
* written by:                                                          *
*  Ed D'Azevedo and Charles Romine of Oak Ridge National Laboratory    *
*                                                                      *
* Questions and comments should be directed to                         *
*      efdazedo@msr.epm.ornl.gov or romine@msr.epm.ornl.gov            *
*                                                                      *
*  Please notify and acknowledge the authors in any research or        *
*  publications utilizing DOLIB/DONIO or any part of the code.         *
*                                                                      *
* NOTICE: Neither the institution nor the author make any              *
*  representations about the suitability of this software for any      *
*  purpose. This software is provided "as is", without express or      *
*  implied warranty.                                                   *
************************************************************************/

#include "stdinc.h"
#include "globals.h"
#include "message.h"


#include <assert.h>

/*
 * NOTE: 2 arrays are packed in list[:], this must be consistent with the
 * routine (do_axpby)
 */

#define START_LIST(i) list[2*(i)]
#define SIZE_LIST(i)  list[2*(i) + 1]

#define COMPUTE_LOCAL_INDEX(gni,ip) { \
	/* compute index into local page block */ \
	\
	gpage_no = DIV_OR_SHIFT((gni)-1,page_size,page_shift); \
	is_valid = ((0 <= gpage_no) && (gpage_no < total_pages)); \
	ASSERT(is_valid, \
	       "COMPUTE_LOCAL_INDEX: gpage_no of %d out of range\n", \
	       gpage_no); \
	\
	gblock_no = DIV_OR_SHIFT(gpage_no,block_size,block_shift); \
	is_valid = ((0 <= gblock_no) && (gblock_no < total_blocks)); \
	ASSERT(is_valid, \
	       "COMPUTE_LOCAL_INDEX: gblock_no of %d out of range\n", \
	       gblock_no); \
	\
	lblock_no = DIV_OR_SHIFT(gblock_no,nproc,proc_shift); \
	is_valid = ((0 <= lblock_no) && (lblock_no < nblocks )); \
	ASSERT(is_valid, \
	       "COMPUTE_LOCAL_INDEX: lblock_no of %d out of range\n", \
	       lblock_no); \
	\
	iproc = MOD_OR_SHIFT(gblock_no,nproc,proc_shift); \
	ASSERT(iproc == gni2proc(Iaf,(gni)), \
	       "COMPUTE_LOCAL_INDEX: iproc of %d mismatch with gni2proc\n", \
	       iproc); \
	\
	is_local = (iproc == myid); \
	ASSERT(is_local, \
	       "COMPUTE_LOCAL_INDEX: computed local index not local\n", 0); \
	\
	block_offset = MOD_OR_SHIFT(gpage_no,block_size,block_shift); \
	lpage_no = MUL_OR_SHIFT(lblock_no,block_size,block_shift) + \
					block_offset; \
	\
	ioffset = MOD_OR_SHIFT((gni)-1,page_size,page_shift); \
	ip = MUL_OR_SHIFT(lpage_no,page_size,page_shift) + ioffset; \
}

void
local_axpby(int Iaf, int ialpha, int ibeta, dfloat dalpha, dfloat dbeta,
	    int ncount, void *xlist, int *list,
	    logical is_regular_mode,
	    logical use_compress, int nsets,
	    logical returnz)
{

  extern void     vec_iaxpby(int nsize, int alpha, int beta,
			     int *xvec, int *yvec);

  extern void     vec_raxpby(int nsize, dfloat alpha, dfloat beta,
			     float *xvec, float *yvec);

  extern void     vec_daxpby(int nsize, dfloat alpha, dfloat beta,
			     dfloat * xvec, dfloat * yvec);

  extern void     vec_iaxpbyz(int nsize, int alpha, int beta,
			      int *xvec, int *yvec);

  extern void     vec_raxpbyz(int nsize, dfloat alpha, dfloat beta,
			      float *xvec, float *yvec);

  extern void     vec_daxpbyz(int nsize, dfloat alpha, dfloat beta,
			      dfloat * xvec, dfloat * yvec);

  extern int      gni2proc(int Iaf, int gni);


  extern int      myid;
  extern int      nproc, proc_shift;

  logical         is_local, is_integer, is_dfloat, is_char, is_real, is_valid;
  logical         is_scatter;

  int             gni_end, gni, i, iproc, ip, gsize, page_size, block_size;
  int             total_pages, total_blocks, nblocks;

  int             gpage_no, gblock_no, lblock_no, block_offset, lpage_no;
  int             ioffset, page_shift, block_shift;

  struct Iarray_node *anp;
  int            *iblock_ptr;
  dfloat         *dblock_ptr;
  char           *cblock_ptr;
  float          *rblock_ptr;

  int            *ixlist;
  dfloat         *dxlist;
  char           *cxlist;
  float          *rxlist;

  char           *csrc;
  char           *cdest;
  int *isrc; int *idest;
  float *rsrc; float *rdest;
  dfloat *dsrc; dfloat *ddest;

  int iytemp; char cytemp; float rytemp; dfloat dytemp;

  int             jp, buf_ptr, isize;

  /* ================== */

  /* critical section */
  if (is_regular_mode) {
    DISABLE_INTERRUPT();
  }

  if (use_compress) {
    ASSERT(nsets >= 1,
	   "local_axpby(): nsets of %d not valid with compression\n", nsets);
  }
  is_valid = ((1 <= Iaf) && (Iaf <= MAX_GLOBAL_ARRAY));
  ASSERT(is_valid, "local_axpby(): Iaf of %d out of range\n", Iaf);

  anp = Global_array[Iaf];
  ASSERT(anp != NULL, "local_axpby(): Iaf of %d indexes null entry\n", Iaf);

  is_integer = (anp->type == INTEGER);
  is_dfloat = (anp->type == REAL8);
  is_char = (anp->type == CHAR);
  is_real = (anp->type == REAL);

  is_valid = (is_integer || is_dfloat || is_char || is_real);
  ASSERT(is_valid, "local_axpby(): global array type not valid\n", 0);

  ixlist = NULL;
  cxlist = NULL;
  dxlist = NULL;
  rxlist = NULL;

  if (is_dfloat) {
    dxlist = (dfloat *) xlist;
  } else if (is_char) {
    cxlist = (char *) xlist;
  } else if (is_integer) {
    ixlist = (int *) xlist;
  } else if (is_real) {
    rxlist = (float *) xlist;
  } else {
    ASSERT(FALSE, "local_axpby(): global array type not valid\n", 0);
  }

  gsize = anp->gsize;
  ASSERT(gsize > 0, "local_axpby(): gsize of %d out of range\n", gsize);

  page_size = anp->page_size;
  ASSERT(page_size > 0,
	 "local_axpby(): page_size of %d out of range\n", page_size);
  page_shift = anp->page_shift;
  ASSERT(page_shift == compute_shift(page_size), \
	 "local_axpby(): error -- page_shift = %d\n", page_shift);

  block_size = anp->block_size;
  ASSERT(block_size > 0,
	 "local_axpby(): block_size of %d out of range\n", block_size);
  block_shift = anp->block_shift;
  ASSERT(block_shift == compute_shift(block_size), \
	 "local_axpby(): error -- block_shift = %d\n", block_shift);

  total_pages = anp->total_pages;
  ASSERT(total_pages > 0,
	 "local_axpby(): total_pages of %d out of range\n", total_pages);

  total_blocks = anp->total_blocks;
  ASSERT(total_blocks > 0,
	 "local_axpby(): total_blocks of %d out of range\n", total_blocks);

  nblocks = anp->nblocks;
  ASSERT(nblocks >= 0,
	 "local_axpby(): nblocks of %d out of range\n", nblocks);

  iblock_ptr = anp->iblock_ptr;
  dblock_ptr = anp->dblock_ptr;
  cblock_ptr = anp->cblock_ptr;
  rblock_ptr = anp->rblock_ptr;

  if (is_dfloat) {
    ASSERT(dblock_ptr != NULL,
	   "local_axpby(): type is double, but dblock_ptr is null\n", 0);
  } else if (is_char) {
    ASSERT(cblock_ptr != NULL,
	   "local_axpby(): type is char, but cblock_ptr is null\n", 0);
  } else if (is_integer) {
    ASSERT(iblock_ptr != NULL,
	   "local_axpby(): type is integer, but iblock_ptr is null\n", 0);
  } else if (is_real) {
    ASSERT(rblock_ptr != NULL,
	   "local_axpby(): type is real, but rblock_ptr is null\n", 0);
  } else {
    ASSERT(FALSE, "local_axpby(): global array type not valid\n", 0);
  }

  is_scatter = ((ialpha == 1) && (ibeta == 0) &&
		(dalpha == (dfloat) 1) && (dbeta == (dfloat) 0));

	/* assume arithmetic on char type does not make sense */
  if (is_char) {
    ASSERT(is_scatter,
	   "local_axpby(): arithmetic operations on characters\n", 0);
  }
  if (use_compress) {
    buf_ptr = 0;
    for (i = 0; i < nsets; i++) {
      gni = START_LIST(i);
      is_valid = ((1 <= gni) && (gni <= gsize));
      ASSERT(is_valid, "local_axpby(): gni of %d out of range\n", gni);

#if defined(DEBUG)
      iproc = gni2proc(Iaf, gni);
      is_local = (iproc == myid);
      ASSERT(is_local, "local_axpby(): iproc mismatch with gni2proc\n", 0);
#endif

      isize = SIZE_LIST(i);
      gni_end = gni + isize - 1;
      is_valid = ((1 <= gni_end) && (gni_end <= gsize));
      ASSERT(is_valid, "local_axpby(): gni_end of %d out of range\n", gni_end);

#if defined(DEBUG)
      iproc = gni2proc(Iaf, gni);
      printf("iproc, nproc, proc_shift: %d, %d, %d\n", iproc, nproc, proc_shift);
#endif
      
      COMPUTE_LOCAL_INDEX(gni, ip);
      if (is_scatter) {
	if (is_char) {
	  csrc = (char *) (&(cxlist[buf_ptr]));
	  cdest = (char *) (&(cblock_ptr[ip]));
	  if (returnz) {
	    int j;
	    for(j=0; j < isize; j++ ) {
	      cytemp = cdest[j];
	      cdest[j] = csrc[j];
	      csrc[j] = cytemp;
	    };
	  }
	  else {
	    CCOPY( csrc, cdest, isize );
	  };
	} else if (is_dfloat) {
	  dsrc = (dfloat *) (&(dxlist[buf_ptr]));
	  ddest = (dfloat *) (&(dblock_ptr[ip]));
	  if (returnz) {
	    int j;
	    for(j=0; j < isize; j++ ) {
	      dytemp = ddest[j];
	      ddest[j] = dsrc[j];
	      dsrc[j] = dytemp;
	    };
	  }
	  else {
	    DCOPY(dsrc,ddest,isize);
	  };
	} else if (is_integer) {
	  isrc = (int *) (&(ixlist[buf_ptr]));
	  idest = (int *) (&(iblock_ptr[ip]));
	  if (returnz) {
	    int j;
	    for(j=0; j < isize; j++ ) {
	      iytemp = idest[j];
	      idest[j] = isrc[j];
	      isrc[j] = iytemp;
	    }; 
	  }
	  else {
	    ICOPY(isrc,idest,isize);
	  };
	} else if (is_real) {
	  rsrc = (float *) (&(rxlist[buf_ptr]));
	  rdest = (float *) (&(rblock_ptr[ip]));
	  if (returnz) {
	    int j;
	    for(j=0; j < isize; j++ ) {
	      rytemp = rdest[j];
	      rdest[j] = rsrc[j];
	      rsrc[j] = rytemp;
	    };
	  }
	  else {
	    RCOPY(rsrc,rdest,isize);
	  };
	} else {
	  ASSERT(FALSE, "local_axpby(): global array type not valid\n", 0);
	}
      } else {
	/* do arithmetic */

	jp = buf_ptr;

	/* critical section */

	if (is_dfloat) {
	  if (returnz) {
	    vec_daxpbyz(isize, dalpha, dbeta,
			&(dxlist[jp]), &(dblock_ptr[ip]));
	  }
	  else {
	    vec_daxpby(isize, dalpha, dbeta,
		       &(dxlist[jp]), &(dblock_ptr[ip]));
	  };
	} else if (is_integer) {
	  if (returnz) {
	    vec_iaxpbyz(isize, ialpha, ibeta,
			&(ixlist[jp]), &(iblock_ptr[ip]));
	  }
	  else {
	    vec_iaxpby(isize, ialpha, ibeta,
		       &(ixlist[jp]), &(iblock_ptr[ip]));
	  };
	} else if (is_real) {
	  if (returnz) {
	    vec_raxpbyz(isize, dalpha, dbeta,
			&(rxlist[jp]), &(rblock_ptr[ip]));
	  }
	  else {
	    vec_raxpby(isize, dalpha, dbeta,
		       &(rxlist[jp]), &(rblock_ptr[ip]));
	  };
	} else {
	  ASSERT(FALSE, "local_axpby(): global array type not valid\n", 0);
	}

      }	/* end if (is_scatter) */

      buf_ptr = buf_ptr + isize;
    }		/* end for(i) */
  } else {

    /* no compression */

    for (i = 0; i < ncount; i++) {

      gni = list[i];
      is_valid = ((1 <= gni) && (gni <= gsize));
      ASSERT(is_valid, "local_axpby(): gni of %d out of range\n", gni);

      COMPUTE_LOCAL_INDEX(gni, ip);

      if (is_integer) {
	if (is_scatter) {
	  if (returnz) {
	    iytemp = iblock_ptr[ip];
	    iblock_ptr[ip] = ixlist[i];
	    ixlist[i] = iytemp;
	  }
	  else {
	    iblock_ptr[ip] = ixlist[i];
	  };
	} else {
	  if (returnz) {
	    iytemp = iblock_ptr[ip];
	    iblock_ptr[ip] = 
	      ialpha * ixlist[i] + 
		ibeta * iblock_ptr[ip];
	    ixlist[i] = iytemp;
	  }
	  else {
	    iblock_ptr[ip] = 
	      ialpha * ixlist[i] + 
		ibeta * iblock_ptr[ip];
	  };
	}
      } else if (is_dfloat) {
	if (is_scatter) {
	  if (returnz) {
	    dytemp = dblock_ptr[ip];
	    dblock_ptr[ip] = dxlist[i];
	    dxlist[i] = dytemp;
	  }
	  else {
	    dblock_ptr[ip] = dxlist[i];
	  };
	} else {
	  if (returnz) {
	    dytemp = dblock_ptr[ip];
	    dblock_ptr[ip] = 
	      dalpha * dxlist[i] + 
		dbeta * dblock_ptr[ip];
	    dxlist[i] = dytemp;
	  }
	  else {
	    dblock_ptr[ip] = 
	      dalpha * dxlist[i] + 
		dbeta * dblock_ptr[ip];
	  };
	}
      } else if (is_char) {
	ASSERT(is_scatter,
	       "local_axpby(): arithmetic operations on character data\n", 0);
	if (returnz) {
	  cytemp = cblock_ptr[ip];
	  cblock_ptr[ip] = cxlist[i];
	  cxlist[i] = cytemp;
	}
	else {
	  cblock_ptr[ip] = cxlist[i];
	};
      } else if (is_real) {
	if (is_scatter) {
	  if (returnz) {
	    rytemp = rblock_ptr[ip];
	    rblock_ptr[ip] = rxlist[i];
	    rxlist[i] = rytemp;
	  }
	  else {
	    rblock_ptr[ip] = rxlist[i];
	  };
	} else {
	  if (returnz) {
	    rytemp = rblock_ptr[ip];
	    rblock_ptr[ip] = 
	      dalpha * rxlist[i] + 
		dbeta * rblock_ptr[ip];
	    rxlist[i] = rytemp;
	  }
	  else {
	    rblock_ptr[ip] = 
	      dalpha * rxlist[i] + 
		dbeta * rblock_ptr[ip];
	  };
	}
      } else {
	ASSERT(FALSE, "local_axpby(): global array type not valid\n", 0);
      }
    }		/* end for */
  }			/* end if (use_compress) */

  /* restore interrupts */
  if (is_regular_mode) {
    RESTORE_INTERRUPT();
  }
}
