
#include "ipx.h"
/***********************************************************************
* DOLIB/DONIO Version 0.0 (8/24/94)                                    *       *
*  Software to emulate shared memory on distributed memory environments*
* written by:                                                          *
*  Ed D'Azevedo and Charles Romine of Oak Ridge National Laboratory    *
*                                                                      *
* Questions and comments should be directed to                         *
*      efdazedo@msr.epm.ornl.gov or romine@msr.epm.ornl.gov            *
*                                                                      *
*  Please notify and acknowledge the authors in any research or        *
*  publications utilizing DOLIB/DONIO or any part of the code.         *
*                                                                      *
* NOTICE: Neither the institution nor the author make any              *
*  representations about the suitability of this software for any      *
*  purpose. This software is provided "as is", without express or      *
*  implied warranty.                                                   *
************************************************************************/

#include "stdinc.h"
#include <memory.h>
#include "message.h"

#include "ipx.h"
#define INTERRUPT_CHECK() { mark(); }



#include "globals.h"

/*
 * declare dfloat dalpha,dbeta first for structure alignment in IPP
 * invokation
 */
IPP 
remot_axpby(dfloat dalpha, dfloat dbeta,
	    int src_node, int Iaf, int ialpha, int ibeta,
	    int ncount, int x_msgtag,
	    logical use_compress, int nsets,
	    logical returnz);
/***** Translation of IPP declaration ****/

typedef struct{char __NAME__[12];IDTYPE ID;
	dfloat dalpha; dfloat dbeta;
	    int src_node; int Iaf; int ialpha; int ibeta;
	    int ncount; int x_msgtag;
	    logical use_compress; int nsets;
	    logical returnz;
} A_remot_axpby;

#define S_remot_axpby(NODE,MTYPE,ARG_1,ARG_2,ARG_3,ARG_4,ARG_5,ARG_6,ARG_7,ARG_8,ARG_9,ARG_10,ARG_11) \
	{A_remot_axpby buf={"remot_axpby\0"};\
		buf.dalpha=ARG_1;\
		buf.dbeta=ARG_2;\
		buf.src_node=ARG_3;\
		buf.Iaf=ARG_4;\
		buf.ialpha=ARG_5;\
		buf.ibeta=ARG_6;\
		buf.ncount=ARG_7;\
		buf.x_msgtag=ARG_8;\
		buf.use_compress=ARG_9;\
		buf.nsets=ARG_10;\
		buf.returnz=ARG_11;\
		QLOAD(NODE,MTYPE);}


void	R_remot_axpby(IDTYPE *pid,A_remot_axpby *PB)		{
		*pid=PB->ID;
		remot_axpby(PB->dalpha,PB->dbeta,PB->src_node,PB->Iaf,PB->ialpha,PB->ibeta,PB->ncount,PB->x_msgtag,PB->use_compress,PB->nsets,PB->returnz);
		 return;}




extern int      gen_msgtag(void);


#if !defined(GENMSGTAG)
#define  GENMSGTAG( msgtag ) { msgtag = gen_msgtag(); }
#endif

#if NX || I860 || PGON || PARAGON || INTEL



#if PGON || PARAGON

#include <nx.h>

static long     mergeid = -1;	/* -1 is a special value */

#define MSGWAIT() \
{ \
    extern long msgdone( long ); \
    logical isdone; \
if (mergeid != -1) { \
   for(;;) { \
      i_disable(); isdone = (1 == msgdone( mergeid )); i_enable(); \
      if (isdone) { \
         /* reset mergeid back to special value of -1 */ \
         mergeid = -1; break; }; \
      }; \
   }; \
}

#define  MSGSEND( msgtag, iproc, buffer, lnbytes ) \
{ \
   long msgid; extern long msgmerge(long,long); \
{ \
/* Ed: May 27: harmless mark() for polling version */ mark();} \
   i_disable();  \
   msgid = isend( (long) msgtag, (char *) buffer, \
	  (long) lnbytes, (long) iproc, (long) myptype() );  \
   mergeid = msgmerge( mergeid, msgid ); \
		  i_enable();  \
}

#else
/* NX || I860 || RX */
#if !defined(FORCE_TYPE)
#include <cube.h>
#endif

#define MAX_PENDING 512
static int      npend = 0;
static long     msgid_array[MAX_PENDING + 1];

#define MSGWAIT() \
{   int i; logical is_done; extern logical msgdone(long);   \
    for( i=1; i <= npend; i++ ) {   \
       for(;;) {   \
       i_disable(); is_done = msgdone( msgid_array[i] ); i_enable();   \
       if (is_done) { break; };   \
       };   \
    };   \
    npend = 0;   \
}


#define MSGSEND( msgtag, iproc, buffer, lnbytes ) \
{ long msgid; logical is_done;\
  i_disable(); \
  msgid = isend( (long) msgtag, \
	    (char *) buffer, \
	      (long) lnbytes, \
	      (long) iproc ,  (long) 0 ); \
		npend += 1; \
		  assert( (1 <= npend) && (npend <= MAX_PENDING) ); \
		    msgid_array[ npend ] = msgid; \
		      i_enable(); \
}

#endif				/* if PGON || PARAGON */

#else
/* not Intel NX system, use PICL */

#define MSGSEND( msgtag, iproc, buffer, lnbytes ) {\
   extern void send0( char *, int, int, int );  \
   send0( (char *) (buffer), (int) (lnbytes),   \
	   (int) (msgtag), (int) (iproc));  \
}

#define MSGWAIT()

#endif				/* if NX || I860 || PGON || PARAGON || INTEL */



#if defined(REAL_IPX)
#define PMSGRECV MSGRECV
#else /* defined(REAL_IPX) */

	/* polling version */

#define PMSGRECV(msgtag, iproc, buf, nxbytes ) {  \
	for(;;) {  \
		INTERRUPT_CHECK();  \
		if (PROBE0( (msgtag) )) {  \
			MSGRECV( (msgtag), (iproc), (buf), (nxbytes) );  \
			break; \
			};  \
		};  \
	}

#endif /* defined(REAL_IPX) */




#define DEFAULT_MAXSETS 8192 /* (8*1024) */

#define LIST(i) (index_start + (i))


extern int 
encode_list(int nitems, int *index_list, int segment_size,
	    int maxsets, int *start_list, int *size_list);

extern void 
local_axpby(int Iaf,
	    int ialpha, int ibeta, dfloat dalpha, dfloat dbeta,
	    int ncount, void *xlist, int *list,
	    logical is_regular_mode,
	    logical use_compress, int nsets,
	    logical returnz);


void
do_bset_axpby(int Iaf, int nsize, void *xvalues, int *list,
	      int nsets, int index_start, int *start_list, int *size_list,
	      int ialpha, int ibeta, dfloat dalpha, dfloat dbeta,
	      logical returnz)
{

	extern int      gni2proc(int Iaf, int gni);






	logical         use_compress;
	int             istart, isize;
	char           *csrc;
	char           *cdest;
	dfloat *dsrc; dfloat *ddest;
	float *rsrc; float *rdest;
	int *isrc; int *idest;
	void *buf;
	

	/*
	 * NOTE: 2 arrays are packed into index_list[iproc], the routine
	 * receiving  (remot_axpby, local_axpby) the message must unpack
	 * accordingly
	 */

#define START_LIST(iproc,i)		index_list[iproc][2*(i)]
#define SIZE_LIST(iproc,i)		index_list[iproc][2*(i)+1]

	logical         is_valid, is_integer, is_dfloat, is_char, is_real;
	logical         is_local, is_non_local;
	logical         is_regular_mode, is_block_mode;

	struct Iarray_node *anp;
	int             iproc, i, ip, gni, gsize;
	int             nbytes, lnbytes;
	int             nidxbytes, nxbytes, totalbytes;

	int            *ncount;
	int            *ncount_sets;
	int           **index_list;
	int           **ixlist;
	dfloat        **dxlist;
	char          **cxlist;
	float         **rxlist;
	void          **msgbuf;
	char           *cbuf;

	void           *pxlist;
	int            *ixvalues;
	dfloat         *dxvalues;
	char           *cxvalues;
	float          *rxvalues;

	int             *x_msgtag;

	/* =========================== */

	use_compress = (nsets >= 1);

	is_valid = ((1 <= Iaf) && (Iaf <= MAX_GLOBAL_ARRAY));
	assert(is_valid);

	anp = Global_array[Iaf];
	assert(anp != NULL);


	/*
	 * Aug 22, Ed -- double check cacheing is turned off in doing updates
	 */

	assert(!(anp->cache_is_on));





	gsize = anp->gsize;

	is_integer = (anp->type == INTEGER);
	is_dfloat = (anp->type == REAL8);
	is_char = (anp->type == CHAR);
	is_real = (anp->type == REAL);

	if (is_integer) {
		ixvalues = (int *) xvalues;
	} else if (is_dfloat) {
		dxvalues = (dfloat *) xvalues;
	} else if (is_char) {
		cxvalues = (char *) xvalues;
	} else if (is_real) {
		rxvalues = (float *) xvalues;
	} else {
		assert(FALSE);
	}
	/* --------------------------------------------------------------- */
	/* Make several passes to determine the index set for each process */
	/* ---------------------------------------------------------------- */

	/* ncount = (int *) malloc( nproc*sizeof(int) ); */

	MEMALLOC(ncount, (int *), nproc * sizeof(int));
	assert(ncount != NULL);

	MEMALLOC(x_msgtag, (int *), nproc * sizeof(int));
	assert(x_msgtag != NULL);



	MEMALLOC(ncount_sets, (int *), nproc * sizeof(int));
	assert(ncount_sets != NULL);

	for (i = 0; i < nproc; i++) {
		ncount[i] = 0;
	};

	for (i = 0; i < nproc; i++) {
		ncount_sets[i] = 0;
	};

	/* first pass */

	is_block_mode = (list == NULL);
	if (use_compress) {
		for (i = 0; i < nsets; i++) {

			istart = start_list[i];
			is_valid = (0 <= istart) && (istart < nsize);
			assert(is_valid);

			isize = size_list[i];
			is_valid = ((1 <= isize) && (isize <= nsize));

			assert(is_valid);

			if (is_block_mode) {
				gni = LIST(istart);
			} else {
				gni = list[istart];
			};

			is_valid = ((1 <= gni) && (gni <= gsize));
			assert(is_valid);

			iproc = gni2proc(Iaf, gni);
			is_valid = ((0 <= iproc) && (iproc < nproc));
			assert(is_valid);

			ncount_sets[iproc] = ncount_sets[iproc] + 1;
			ncount[iproc] = ncount[iproc] + isize;
		};		/* end for */
	} else {
		for (i = 0; i < nsize; i++) {
			gni = list[i];
			iproc = gni2proc(Iaf, gni);
			is_valid = ((0 <= iproc) && (iproc < nproc));
			assert(is_valid);

			ncount[iproc] = ncount[iproc] + 1;
		};		/* end for */
	};

	/* allocate temporary storage for index list */

	if (is_integer) {
		/* ixlist = (int **) malloc( nproc*sizeof( int *) ); */
		MEMALLOC(ixlist, (int **), nproc * sizeof(int *));
		assert(ixlist != NULL);

		for (i = 0; i < nproc; i++) {
			ixlist[i] = NULL;
		}
	} else if (is_dfloat) {
		/* dxlist = (dfloat **) malloc( nproc*sizeof( dfloat *) ); */
		MEMALLOC(dxlist, (dfloat **), nproc * sizeof(dfloat *));
		assert(dxlist != NULL);

		for (i = 0; i < nproc; i++) {
			dxlist[i] = NULL;
		}
	} else if (is_char) {
		/* cxlist = (char **) malloc(nproc*sizeof(char *)); */
		MEMALLOC(cxlist, (char **), nproc * sizeof(char *));
		assert(cxlist != NULL);

		for (i = 0; i < nproc; i++) {
			cxlist[i] = NULL;
		}
	} else if (is_real) {
		/* rxlist = (float **) malloc( nproc*sizeof( float *) ); */
		MEMALLOC(rxlist, (float **), nproc * sizeof(float *));
		assert(rxlist != NULL);

		for (i = 0; i < nproc; i++) {
			rxlist[i] = NULL;
		}
	} else {
		assert(FALSE);
	}
	/* index_list = (int **) malloc( nproc*sizeof( int *) ); */
	MEMALLOC(index_list, (int **), nproc * sizeof(int *));

	assert(index_list != NULL);
	for (i = 0; i < nproc; i++) {
		index_list[i] = NULL;
	}

	MEMALLOC(msgbuf, (void **), nproc * sizeof(void *));
	assert(msgbuf != NULL);
	for (i = 0; i < nproc; i++) {
		msgbuf[i] = NULL;
	};

	for (i = 0; i < nproc; i++) {
		if (ncount[i] > 0) {

			if (use_compress) {

				/*
				 * storage for "start_list[:]" and
				 * "size_list[:]"
				 */
				nidxbytes = 2 * ncount_sets[i] * sizeof(int);

			} else {
				nidxbytes = ncount[i] * sizeof(int);
			};

			if (is_integer) {
				nxbytes = ncount[i] * sizeof(int);
			} else if (is_dfloat) {
				nxbytes = ncount[i] * sizeof(dfloat);
			} else if (is_char) {
				nxbytes = ncount[i] * sizeof(char);
			} else if (is_real) {
				nxbytes = ncount[i] * sizeof(float);
			} else {
				assert(FALSE);
			}
			totalbytes = nidxbytes + nxbytes;
			MEMALLOC(msgbuf[i], (char *), totalbytes);
			assert(msgbuf[i] != NULL);

			cbuf = (char *) msgbuf[i];
			if (is_integer) {
				ixlist[i] = (int *) (&(cbuf[0]));
				index_list[i] = (int *) (&(cbuf[nxbytes]));
			} else if (is_dfloat) {
				dxlist[i] = (dfloat *) (&(cbuf[0]));
				index_list[i] = (int *) (&(cbuf[nxbytes]));
			} else if (is_real) {
				rxlist[i] = (float *) (&(cbuf[0]));
				index_list[i] = (int *) (&(cbuf[nxbytes]));
			} else if (is_char) {

				/* reverse storage to guarantee alignment */
				/*
				 * NOTE: code in receiving message must also
				 * be aware of this
				 */

				index_list[i] = (int *) (&(cbuf[0]));
				cxlist[i] = (char *) (&(cbuf[nidxbytes]));
			} else {
				assert(FALSE);
			}
		}		/* end if */
	}			/* end for */

	for (i = 0; i < nproc; i++) {
		ncount[i] = 0;
	};

	for (i = 0; i < nproc; i++) {
		ncount_sets[i] = 0;
	};

	if (use_compress) {

		/* another pass to copy into message buffers */

		for (i = 0; i < nsets; i++) {
			istart = start_list[i];
			is_valid = ((0 <= istart) && (istart < nsize));
			assert(is_valid);

			isize = size_list[i];
			is_valid = ((1 <= isize) && (isize <= nsize));
			assert(is_valid);

			if (is_block_mode) {
				gni = LIST(istart);
			} else {
				gni = list[istart];
			};

			iproc = gni2proc(Iaf, gni);

			START_LIST(iproc, ncount_sets[iproc]) = gni;
			SIZE_LIST(iproc, ncount_sets[iproc]) = size_list[i];

			ncount_sets[iproc] += 1;

			/* copy into message buffer */

			ip = ncount[iproc];
			if (is_integer) {
				isrc = (int *) (&(ixvalues[istart]));
				idest = (int *) (&(ixlist[iproc][ip]));
				ICOPY(isrc,idest,isize);
			} else if (is_dfloat) {
				dsrc = (dfloat *) (&(dxvalues[istart]));
				ddest = (dfloat *) (&(dxlist[iproc][ip]));
				DCOPY(dsrc,ddest, isize );
			} else if (is_real) {
				rsrc = (float *) (&(rxvalues[istart]));
				rdest = (float *) (&(rxlist[iproc][ip]));
				RCOPY(rsrc,rdest,isize);
			} else if (is_char) {
				csrc = (char *) (&(cxvalues[istart]));
				cdest = (char *) (&(cxlist[iproc][ip]));
				CCOPY(csrc,cdest,isize);
			} else {
				assert(FALSE);
			}

			ncount[iproc] += isize;

		};		/* end for (i) */


	} else {

		/* another pass thru list[:] */

		for (i = 0; i < nsize; i++) {
			gni = list[i];
			iproc = gni2proc(Iaf, gni);

			ip = ncount[iproc];
			index_list[iproc][ip] = gni;
			if (is_integer) {
				ixlist[iproc][ip] = ixvalues[i];
			} else if (is_dfloat) {
				dxlist[iproc][ip] = dxvalues[i];
			} else if (is_real) {
				rxlist[iproc][ip] = rxvalues[i];
			} else if (is_char) {
				cxlist[iproc][ip] = cxvalues[i];
			} else {
				assert(FALSE);
			}
			ncount[iproc] = ip + 1;
		}		/* end for */
	}			/* end if(use_compress) */

	for (i = 0; i < nproc; i++) {
		iproc = i;
		is_local = (iproc == myid);
		if (is_local) {
			continue;
		}
		/* ipx "on" applies only to non-local processors */

		if (ncount[i] != 0) {

			if (use_compress) {
				nidxbytes = 2 * ncount_sets[i] * sizeof(int);
			} else {
				nidxbytes = ncount[i] * sizeof(int);
			}

			if (is_integer) {
				nxbytes = sizeof(int) * ncount[i];
			} else if (is_dfloat) {
				nxbytes = sizeof(dfloat) * ncount[i];
			} else if (is_real) {
				nxbytes = sizeof(float) * ncount[i];
			} else if (is_char) {
				nxbytes = sizeof(char) * ncount[i];
			} else {
				assert(FALSE);
			}
			lnbytes = nxbytes + nidxbytes;

			GENMSGTAG(x_msgtag[i]);
			MSGSEND(x_msgtag[i], iproc, msgbuf[i], lnbytes);

			/*
			 * dfloat dalpha, dbeta as first parameters for
			 * structure alignment with IPP invocation
			 */
			
/****on(iproc) remot_axpby(dalpha, dbeta,
					      myid, Iaf, ialpha, ibeta,
			 ncount[i], x_msgtag[i], use_compress, ncount_sets[i],
			 returnz); */

	S_remot_axpby(iproc,MT_on,dalpha, dbeta,
					      myid, Iaf, ialpha, ibeta,
			 ncount[i], x_msgtag[i], use_compress, ncount_sets[i],
			 returnz)

		}		/* end if */
	}			/* end for */

	/* handle local work */
	if (ncount[myid] != 0) {
		if (is_integer) {
			pxlist = (void *) ixlist[myid];
		} else if (is_dfloat) {
			pxlist = (void *) dxlist[myid];
		} else if (is_real) {
			pxlist = (void *) rxlist[myid];
		} else if (is_char) {
			pxlist = (void *) cxlist[myid];
		} else {
			assert(FALSE);
		}
		local_axpby(Iaf, ialpha, ibeta, dalpha, dbeta,
			    ncount[myid], pxlist, index_list[myid],
		   is_regular_mode = TRUE, use_compress, ncount_sets[myid],
		   returnz);
	}
	is_non_local = FALSE;
	for (i = 0; i < nproc; i++) {
		if (myid == i) {
			continue;
		}
		is_non_local = (ncount[i] > 0);
		if (is_non_local) {
			break;
		}
	}			/* end for */

	if (is_non_local) {
		finish(0);
	}			/* wait for all to complete */
	/* deallocate all memory */
	MSGWAIT();		/* wait for aysnchronous send's to complete */



	if (returnz) {
		/* For simplicity, wait for all messages to return
		   before copying into user buffer */

	    for(iproc=0; iproc < nproc; iproc++ ) {
		if (myid == iproc) { continue; };

		if (ncount[iproc] >= 1) {

		if (is_integer) {
		   nxbytes = ncount[iproc]*sizeof(int);
		   buf = (void *) &(ixlist[iproc][0]); 
		   }
		else if (is_dfloat) {
		   nxbytes = ncount[iproc]*sizeof(dfloat);
		   buf = (void *) &(dxlist[iproc][0]);
		   }
		else if (is_real) {
		   nxbytes = ncount[iproc]*sizeof(float);
		   buf = (void *) &(rxlist[iproc][0]);
		   }
		else if (is_char) {
		   nxbytes = ncount[iproc]*sizeof(char);
		   buf = (void *) &(cxlist[iproc][0]);
		   };


		/* Concren about potential deadlock,
		   may need to use a polling loop */
		PMSGRECV( x_msgtag[iproc], iproc, buf, nxbytes );
		}; /* if (ncount[iproc]) */

		}; /* for(iproc) */


	    /* copy results back into user buffer */

	    /* reuse array ncount[0..(nproc-1)] as a counter */
	    for(iproc=0; iproc < nproc; iproc++ ) {
		    ncount[iproc] = 0;
		    };


		/* the following is based on code to copy the x array
		   into the message buffer */

	    if (use_compress) {

		for(i=0; i < nsets; i++ ) {
		

                        istart = start_list[i];
                        is_valid = (0 <= istart) && (istart < nsize);
                        assert(is_valid);

                        isize = size_list[i];
                        is_valid = ((1 <= isize) && (isize <= nsize));

                        assert(is_valid);

                        if (is_block_mode) {
                                gni = LIST(istart);
                        } else {
                                gni = list[istart];
                        };

                        is_valid = ((1 <= gni) && (gni <= gsize));
                        assert(is_valid);

                        iproc = gni2proc(Iaf, gni);
                        is_valid = ((0 <= iproc) && (iproc < nproc));
                        assert(is_valid);


			ip = ncount[iproc];
			if (is_integer) {
			   isrc = (int *) (&(ixlist[iproc][ip]));
			   idest = (int *) (&(ixvalues[istart]));
			   ICOPY( isrc, idest, isize );
			   }
			else if (is_dfloat) {
			   dsrc = (dfloat *) (&(dxlist[iproc][ip]));
			   ddest = (dfloat *) (&(dxvalues[istart])); 
			   DCOPY( dsrc, ddest, isize );
			   }
			else if (is_real) {
			   rsrc = (float *) (&(rxlist[iproc][ip]));
			   rdest = (float *) (&(rxvalues[istart]));
			   RCOPY( rsrc, rdest, isize );
			   }
			else if (is_char) {
			   csrc = (char *) (&(cxlist[iproc][ip]));
			   cdest = (char *) (&(cxvalues[istart]));
			   CCOPY( csrc, cdest, isize );
			   }
			else { 
				assert( FALSE ); /* impossible */
				};

			ncount[iproc] += isize;
			}; /* end for(i) */

		} /* if (use_compress) */
	else {
				
		for(i=0; i < nsize; i++ ) {
			gni = list[i];
			iproc = gni2proc( Iaf, gni );

			ip = ncount[iproc];
			if (is_integer) {
				ixvalues[i] = ixlist[iproc][ip];
				}
			else if (is_dfloat) {
				dxvalues[i] = dxlist[iproc][ip];
				}
			else if (is_real) {
				rxvalues[i] = rxlist[iproc][ip];
				}
			else if (is_char) {
				cxvalues[i] = cxlist[iproc][ip];
				}
			else {
				assert( FALSE ); /* impossible */
				};

			ncount[iproc] = ip + 1;
			}; /* end for(i) */
		}; /* end if (compress) */

	}; /* end if (returnz) */


	/* note that even if (returnz) code is executed,
		ncount[*] should be restored to non-zero values */


	for (i = 0; i < nproc; i++) {
		if (ncount[i] != 0) {
			MEMFREE(msgbuf[i]);
		}
	}

	MEMFREE(ncount);
	MEMFREE(x_msgtag);
	MEMFREE(index_list);
	MEMFREE(msgbuf);

	MEMFREE(ncount_sets);

	if (is_integer) {
		MEMFREE(ixlist);
	} else if (is_dfloat) {
		MEMFREE(dxlist);
	} else if (is_real) {
		MEMFREE(rxlist);
	} else if (is_char) {
		MEMFREE(cxlist);
	} else {
		assert(FALSE);
	}
}





void
do_axpby_C(int Iaf, int nsize, int *list, void *xvalues,
	 int ialpha, int ibeta, dfloat dalpha, dfloat dbeta, 
	  logical returnz)
{

	logical         use_compress, is_valid;
	int            *start_list;
	int            *size_list;
	int             maxsets, nsets, segment_size, index_start;




	struct Iarray_node *anp;
	int             gni, i, gsize;

	is_valid = ((1 <= Iaf) && (Iaf <= MAX_GLOBAL_ARRAY));
	assert(is_valid);

	anp = Global_array[Iaf];
	assert(anp != NULL);


	/*
	 * Aug 22, Ed, make sure cacheing is turned off in performing updates
	 */

	assert(!(anp->cache_is_on));




	/* verify that LIST(:) is valid */

	gsize = anp->gsize;
	for (i = 0; i < nsize; i++) {
		gni = list[i];
		is_valid = ((1 <= gni) && (gni <= gsize));
		assert(is_valid);
	};

	/* attempt compress */

	maxsets = DEFAULT_MAXSETS;
	MEMALLOC(start_list, (int *), maxsets * sizeof(int));
	MEMALLOC(size_list, (int *), maxsets * sizeof(int));

	/* segment_size = (anp->page_size)*(anp->block_size) */
	segment_size = anp->page_size;

	nsets = encode_list(nsize, list, segment_size,
			    maxsets, start_list, size_list);
	use_compress = (nsets >= 1);
	if (maxsets >= nsize) {
	    if (!use_compress) {
	       printf("**  error in do_axpby: \n");
	       printf("maxsets %d nsize %d use_compress %d\n", 
				maxsets,nsize,use_compress );
	       };
	    assert(use_compress);
	};

	index_start = list[0];
	do_bset_axpby(Iaf, nsize,
		      xvalues, list,
		      nsets, index_start, start_list, size_list,
		      ialpha, ibeta, dalpha, dbeta,
		      returnz);

	MEMFREE(start_list);
	MEMFREE(size_list);
}

void
do_axpby(int Iaf, int nsize, int *list, void *xvalues,
         int ialpha, int ibeta, dfloat dalpha, dfloat dbeta)
{
	
logical returnz;

do_axpby_C(Iaf, nsize, list, xvalues,
         ialpha, ibeta, dalpha, dbeta, 
          returnz = FALSE);
}


void
do_axpbyz(int Iaf, int nsize, int *list, void *xvalues,
         int ialpha, int ibeta, dfloat dalpha, dfloat dbeta)
{
logical returnz;
        
do_axpby_C(Iaf, nsize, list, xvalues,
         ialpha, ibeta, dalpha, dbeta, 
          returnz = TRUE);
}
