/*
*                               imass.c
*
*       Copyright 1999 by The University at Stony Brook, All rights reserved.
*/

#include <tri/trilocaldecs.h>

#if defined(TWOD)

#define     SWAP(a,b)    {temp=(a); (a) = (b); (b) = temp;}

#if defined(__cplusplus)
extern "C" {
#endif /* defined(__cplusplus) */
    FORTRAN int    FORTRAN_NAME(dgetri)(int*, double*, int*, int*, double*, int *, int *);
                   // routine to compute the inverse of a matrix using the LU factorization provided by dgetrf
    FORTRAN int    FORTRAN_NAME(dgetrf)(int*, int*, double*, int*, int*, int*);
                   // routine for doing LU factorization of general m x n matrix
#if defined(__cplusplus)
}
#endif /* defined(__cplusplus) */

// IMPORT void      matrix_inv(long double**,int,long double**);
// IMPORT void      comp_mass_matrix(int,TRI*,int,long double**);
// IMPORT void      comp_mass_matrix_1st_row(int,TRI*,int,long double**);
// IMPORT void      inverse_matrix(long double**,int,long double**);
// IMPORT void      print_ldb_matrix(const char*,int,int,long double**,const char*);

LOCAL long double     integral_eta_xi(int,int);
LOCAL int             factorial(int);
//IMPORT long double     int_x(float*,float*,float*,long double);
//IMPORT long double     int_y(float*,float*,float*,long double);
//IMPORT long double     int_x2(float*,float*,float*,long double);
//IMPORT long double     int_y2(float*,float*,float*,long double);
//IMPORT long double     int_xy(float*,float*,float*,long double);
//IMPORT long double     int_x3(float*,float*,float*,long double);
//IMPORT long double     int_y3(float*,float*,float*,long double);
//IMPORT long double     int_x2y(float*,float*,float*,long double);
//IMPORT long double     int_xy2(float*,float*,float*,long double);
//IMPORT long double     int_x4(float*,float*,float*,long double);
//IMPORT long double     int_y4(float*,float*,float*,long double);
//IMPORT long double     int_x3y(float*,float*,float*,long double);
//IMPORT long double     int_x2y2(float*,float*,float*,long double);
//IMPORT long double     int_xy3(float*,float*,float*,long double);

LOCAL void      lubksb(long double**,int,int*,long double*);
LOCAL void      ludcmp(long double**,int,int*,long double*);
LOCAL void      gaussj(long double**,int,long double**,int);
LOCAL void      inverse_matrix_gj(double**,int,double**);
LOCAL void      comp_mass_matrix_p2(int,TRI*,int,double**);
LOCAL void      comp_mass_matrix_p3(int,TRI*,int,double**);
LOCAL void      comp_mass_matrix_p4(int,TRI*,int,double**);
LOCAL void      comp_mass_matrix_1st_row_p2(int,TRI*,int,double*,double**);
LOCAL void      comp_mass_matrix_1st_row_p3(int,TRI*,int,double*,double**);
LOCAL void      comp_mass_matrix_1st_row_p4(int,TRI*,int,double*,double**);


LOCAL long double B_val(float crds[][2],double*,int,int);



/*
  Transform onto the right triangle 
  (reference triangle in Xi-Eta Coords) and
  compute coeff.
*/
EXPORT void comp_mass_matrix(
        int     n_coeff,
        TRI     *tri,
        int     dim,
        double   **mass_m)
{
        if(MAX_N_COEF == 6)
        {
            comp_mass_matrix_p2(n_coeff, tri, dim, mass_m);
        }
        else if(MAX_N_COEF == 10)
        {
            comp_mass_matrix_p3(n_coeff, tri, dim, mass_m);
        }
        else if(MAX_N_COEF == 15)
        {
            comp_mass_matrix_p4(n_coeff, tri, dim, mass_m);
        }
        else
        {
            printf("ERROR: implement comp_mass_matrix for MAX_N_COEF = %d\n",
                MAX_N_COEF);
            clean_up(ERROR);
        }
}

LOCAL void comp_mass_matrix_p2(
        int     n_coeff,
        TRI     *tri,
        int     dim,
        double   **mass_m)
{
        int     i, j;
        double   *cent = fg_centroid(tri);
        POINT   *p[3];
        float   *pcrds[3]; 
        long double   det;
        
        for(i = 0; i < 3; i++)
        {
            p[i] = Point_of_tri(tri)[i];
            pcrds[i] = Coords(p[i]);
        }

        det = (long double)(pcrds[1][0]-pcrds[0][0])*(pcrds[2][1]-pcrds[0][1]) - 
              (long double)(pcrds[2][0]-pcrds[0][0])*(pcrds[1][1]-pcrds[0][1]);

        mass_m[0][0] = det*0.5;

        mass_m[0][1] = int_x(pcrds[0],pcrds[1],pcrds[2],det) - (long double)cent[0]*mass_m[0][0];
        // mass_m[0][1] = 0.0;

        // TMP
        // printf("cent[%g %g]\n", cent[0], cent[1]);
        // printf("int_x = %g\n", int_x(pcrds[0],pcrds[1],pcrds[2],det) );

        mass_m[0][2] = int_y(pcrds[0],pcrds[1],pcrds[2],det) - (long double)cent[1]*mass_m[0][0]; 
        // mass_m[0][2] = 0.0; 

        mass_m[0][3] = int_x2(pcrds[0],pcrds[1],pcrds[2],det) - (long double)(2.0*cent[0])*
                       int_x(pcrds[0],pcrds[1],pcrds[2],det) + (long double)sqr(cent[0])*mass_m[0][0];

        mass_m[0][4] = int_xy(pcrds[0],pcrds[1],pcrds[2],det) - 
                       (long double)cent[1]*int_x(pcrds[0],pcrds[1],pcrds[2],det)-
                       (long double)cent[0]*int_y(pcrds[0],pcrds[1],pcrds[2],det) +
                       (long double)(cent[0]*cent[1])*mass_m[0][0];

        mass_m[0][5] = int_y2(pcrds[0],pcrds[1],pcrds[2],det) - (long double)(2.0*cent[1])*
                       int_y(pcrds[0],pcrds[1],pcrds[2],det) + (long double)sqr(cent[1])*mass_m[0][0];

        mass_m[1][0] = mass_m[0][1];
        mass_m[1][1] = mass_m[0][3];
        mass_m[1][2] = mass_m[0][4];
        mass_m[1][3] = int_x3(pcrds[0],pcrds[1],pcrds[2],det) - (long double)cub(cent[0])*mass_m[0][0] -
                        (long double)3.0*cent[0]*int_x2(pcrds[0],pcrds[1],pcrds[2],det) + 
                        (long double)3.0*sqr(cent[0])*int_x(pcrds[0],pcrds[1],pcrds[2],det);

        mass_m[1][4] = int_x2y(pcrds[0],pcrds[1],pcrds[2],det) - 
                       (long double)cent[1]*int_x2(pcrds[0],pcrds[1],pcrds[2],det) - 
                       (long double)(2.0*cent[0])*int_xy(pcrds[0],pcrds[1],pcrds[2],det) + 
                       (long double)(2.0*cent[0]*cent[1])*int_x(pcrds[0],pcrds[1],pcrds[2],det) +
                       (long double)sqr(cent[0])*int_y(pcrds[0],pcrds[1],pcrds[2],det) -
                       (long double)(sqr(cent[0])*cent[1])*mass_m[0][0];

        /**
        if(debug_flag == YES)
        {
            printf("IN comp_mass_matrix(), Tri(%d) debug mass_m[1][5]\n", tri->id);
            printf("int_x2y = %22.20Lg\n", int_x2y(pcrds[0],pcrds[1],pcrds[2],det));
            printf("int_x2 = %22.20Lg\n", int_x2(pcrds[0],pcrds[1],pcrds[2],det));
            printf("int_xy = %22.20Lg\n", int_xy(pcrds[0],pcrds[1],pcrds[2],det));
            printf("int_y = %22.20Lg\n", int_y(pcrds[0],pcrds[1],pcrds[2],det));
            // clean_up(0);
        }
        **/

        mass_m[1][5] = int_xy2(pcrds[0],pcrds[1],pcrds[2],det) - 
                       (long double)(2.0*cent[1])*int_xy(pcrds[0],pcrds[1],pcrds[2],det) +
                       (long double)(sqr(cent[1]))*int_x(pcrds[0],pcrds[1],pcrds[2],det) -
                       (long double)cent[0]*int_y2(pcrds[0],pcrds[1],pcrds[2],det) + 
                       (long double)(2.0*cent[0]*cent[1])*int_y(pcrds[0],pcrds[1],pcrds[2],det) -
                       (long double)(sqr(cent[1])*cent[0])*mass_m[0][0];  

        /**
        if(debug_flag == YES)
        {
            printf("IN comp_mass_matrix(), Tri(%d) debug mass_m[1][4]\n", tri->id);
            printf("int_xy2 = %22.20Lg\n", int_xy2(pcrds[0],pcrds[1],pcrds[2],det));
            printf("int_y2 = %22.20Lg\n", int_y2(pcrds[0],pcrds[1],pcrds[2],det));
            // clean_up(0);
        }
        **/

        mass_m[2][0] = mass_m[0][2];
        mass_m[2][1] = mass_m[0][4];
        mass_m[2][2] = mass_m[0][5];
        mass_m[2][3] = mass_m[1][4];
        mass_m[2][4] = mass_m[1][5];      
        mass_m[2][5] = int_y3(pcrds[0],pcrds[1],pcrds[2],det) - 
                       (long double)cub(cent[1])*mass_m[0][0] - 
                       (long double)(3.0*cent[1])*int_y2(pcrds[0],pcrds[1],pcrds[2],det) + 
                       (long double)(3.0*sqr(cent[1]))*int_y(pcrds[0],pcrds[1],pcrds[2],det);

        mass_m[3][0] = mass_m[0][3];
        mass_m[3][1] = mass_m[1][3];
        mass_m[3][2] = mass_m[2][3];
        mass_m[3][3] = int_x4(pcrds[0],pcrds[1],pcrds[2],det) + 
                   (long double)(sqr(cent[0])*sqr(cent[0]))*mass_m[0][0] +
                   (long double)(6.0*sqr(cent[0]))*int_x2(pcrds[0],pcrds[1],pcrds[2],det) -
                   (long double)(4.0*cent[0])*int_x3(pcrds[0],pcrds[1],pcrds[2],det) - 
                   (long double)(4.0*cub(cent[0]))*int_x(pcrds[0],pcrds[1],pcrds[2],det); 

        mass_m[3][4] = int_x3y(pcrds[0],pcrds[1],pcrds[2],det) -
                       (long double)cent[1]*int_x3(pcrds[0],pcrds[1],pcrds[2],det) -
                       (long double)(3.0*cent[0])*int_x2y(pcrds[0],pcrds[1],pcrds[2],det) +
                       (long double)(3.0*cent[0]*cent[1])*int_x2(pcrds[0],pcrds[1],pcrds[2],det) +
                       (long double)(3.0*sqr(cent[0]))*int_xy(pcrds[0],pcrds[1],pcrds[2],det) -
                       (long double)(3.0*sqr(cent[0])*cent[1])*int_x(pcrds[0],pcrds[1],pcrds[2],det) -
                       (long double)cub(cent[0])*int_y(pcrds[0],pcrds[1],pcrds[2],det) +
                       (long double)(cub(cent[0])*cent[1])*mass_m[0][0];
        // TMP
        /**
        if(debug_flag == YES)
        {
            printf("IN comp_mass_matrix(), Tri(%d) debug mass_m[3][4]\n", tri->id);
            printf("int_x3y = %Lg\n", int_x3y(pcrds[0],pcrds[1],pcrds[2],det));
            printf("int_x3 = %22.20Lg\n",  int_x3(pcrds[0],pcrds[1],pcrds[2],det));
            printf("int_x2y = %Lg\n", int_x2y(pcrds[0],pcrds[1],pcrds[2],det));
            printf("int_x2 = %Lg\n", int_x2(pcrds[0],pcrds[1],pcrds[2],det));
            printf("int_xy = %Lg\n", int_xy(pcrds[0],pcrds[1],pcrds[2],det));
            printf("int_x = %Lg\n", int_x(pcrds[0],pcrds[1],pcrds[2],det));
            printf("int_y = %Lg\n", int_y(pcrds[0],pcrds[1],pcrds[2],det));
            // clean_up(0);
        }
        **/

        mass_m[3][5] = int_x2y2(pcrds[0],pcrds[1],pcrds[2],det)-
                       (long double)(2.0*cent[1])*int_x2y(pcrds[0],pcrds[1],pcrds[2],det)+
                       (long double)(sqr(cent[1]))*int_x2(pcrds[0],pcrds[1],pcrds[2],det)-
                       (long double)(2.0*cent[0])*int_xy2(pcrds[0],pcrds[1],pcrds[2],det)+
                       (long double)(4.0*cent[0]*cent[1])*int_xy(pcrds[0],pcrds[1],pcrds[2],det)-
                       (long double)(2.0*cent[0]*sqr(cent[1]))*int_x(pcrds[0],pcrds[1],pcrds[2],det)+
                       (long double)sqr(cent[0])*int_y2(pcrds[0],pcrds[1],pcrds[2],det)-
                       (long double)(2.0*cent[1]*sqr(cent[0]))*int_y(pcrds[0],pcrds[1],pcrds[2],det) +
                       (long double)(sqr(cent[0])*sqr(cent[1]))*mass_m[0][0];

        /**
        if(debug_flag == YES)
        {
            printf("IN comp_mass_matrix(), Tri(%d) debug mass_m[3][4]\n", tri->id);
            printf("int_x2y2 = %22.20Lg\n", int_x2y2(pcrds[0],pcrds[1],pcrds[2],det));
            printf("int_x2y = %22.20Lg\n",  int_x2y(pcrds[0],pcrds[1],pcrds[2],det));
            printf("int_x2 = %22.20Lg\n", int_x2(pcrds[0],pcrds[1],pcrds[2],det));
            printf("int_xy2 = %22.20Lg\n", int_xy2(pcrds[0],pcrds[1],pcrds[2],det));
            printf("int_xy = %22.20Lg\n", int_xy(pcrds[0],pcrds[1],pcrds[2],det));
            printf("int_y2 = %22.20Lg\n", int_y2(pcrds[0],pcrds[1],pcrds[2],det));
            printf("int_y = %22.20Lg\n", int_y(pcrds[0],pcrds[1],pcrds[2],det));
            printf("int_x = %22.20Lg\n", int_x(pcrds[0],pcrds[1],pcrds[2],det));
            // clean_up(0);
        }
        **/

        mass_m[4][0] = mass_m[0][4];
        mass_m[4][1] = mass_m[1][4];
        mass_m[4][2] = mass_m[2][4];
        mass_m[4][3] = mass_m[3][4];
        mass_m[4][4] = mass_m[3][5];
        mass_m[4][5] = int_xy3(pcrds[0],pcrds[1],pcrds[2],det) -
                       (long double)(3.0*cent[1])*int_xy2(pcrds[0],pcrds[1],pcrds[2],det)+
                       (long double)(3.0*sqr(cent[1]))*int_xy(pcrds[0],pcrds[1],pcrds[2],det)-
                       (long double)(cub(cent[1]))*int_x(pcrds[0],pcrds[1],pcrds[2],det)-
                       (long double)(cent[0])*int_y3(pcrds[0],pcrds[1],pcrds[2],det) +
                       (long double)(3.0*cent[0]*cent[1])*int_y2(pcrds[0],pcrds[1],pcrds[2],det)-
                       (long double)(3.0*cent[0]*sqr(cent[1]))*int_y(pcrds[0],pcrds[1],pcrds[2],det)+
                       (long double)(cent[0]*cub(cent[1]))*mass_m[0][0];
        /**
        if(tri->id == 61)
        {
            printf("IN comp_mass_matrix(), Tri(%d) debug mass_m[5][4]\n", tri->id);
            printf("int_xy3 = %22.20Lg\n", int_xy3(pcrds[0],pcrds[1],pcrds[2],det));
            printf("int_xy2 = %22.20Lg\n",  int_xy2(pcrds[0],pcrds[1],pcrds[2],det));
            printf("int_xy = %22.20Lg\n", int_xy(pcrds[0],pcrds[1],pcrds[2],det));
            printf("int_y2 = %22.20Lg\n", int_y2(pcrds[0],pcrds[1],pcrds[2],det));
            printf("int_y3 = %22.20Lg\n", int_y3(pcrds[0],pcrds[1],pcrds[2],det));
            printf("int_y = %22.20Lg\n", int_y(pcrds[0],pcrds[1],pcrds[2],det));
            printf("int_x = %22.20Lg\n", int_x(pcrds[0],pcrds[1],pcrds[2],det));
            // clean_up(0);
        }
        **/

        mass_m[5][0] = mass_m[0][5];
        mass_m[5][1] = mass_m[1][5];
        mass_m[5][2] = mass_m[2][5];
        mass_m[5][3] = mass_m[3][5];
        mass_m[5][4] = mass_m[4][5];
        mass_m[5][5] = int_y4(pcrds[0],pcrds[1],pcrds[2],det) +
                   (long double)(sqr(cent[1])*sqr(cent[1]))*mass_m[0][0] +
                   (long double)(6.0*sqr(cent[1]))*int_y2(pcrds[0],pcrds[1],pcrds[2],det) -
                   (long double)(4.0*cent[1])*int_y3(pcrds[0],pcrds[1],pcrds[2],det) -
                   (long double)(4.0*cub(cent[1]))*int_y(pcrds[0],pcrds[1],pcrds[2],det);

        /**
        if(debug_flag == YES)
        {
            printf("IN comp_mass_matrix(), Tri(%d) debug mass_m[5][4]\n", tri->id);
            printf("int_y4 = %22.20Lg\n", int_y4(pcrds[0],pcrds[1],pcrds[2],det));
            printf("int_y3 = %22.20Lg\n",  int_y3(pcrds[0],pcrds[1],pcrds[2],det));
            printf("int_y2 = %22.20Lg\n", int_y2(pcrds[0],pcrds[1],pcrds[2],det));
            printf("int_y = %22.20Lg\n", int_y(pcrds[0],pcrds[1],pcrds[2],det));
            // clean_up(0);
        }
        **/


        /***
        for(i = 0; i < MAX_N_COEF; i++)
        {
            for(j = 0; j < MAX_N_COEF; j++)
            {
                if(fabs(mass_m[i][j]) < dmach)
                    mass_m[i][j] = 0.0;
            }
        }
        **/
} 

LOCAL void comp_mass_matrix_p3(
        int     n_coeff,
        TRI     *tri,
        int     dim,
        double   **mass_m)
{
        int     i, j;
        double   *cent = fg_centroid(tri);
        POINT   *p[3];
        float   *pcrds[3]; 
        long double   det;
        float   crds[13][2]; 
        long double   tmpans[13];
        long double w1 =-0.149570044467670, w2 = 0.053347235608839,
                    w3 = 0.175615257433204,  w4 = 0.077113760890257;
        long double   area;

        area = fg_area(tri);
        
        for(i = 0; i < 3; i++)
        {
            p[i] = Point_of_tri(tri)[i];
            pcrds[i] = Coords(p[i]);
        }

        tri_quadrature_13_pts(pcrds[0], pcrds[1], pcrds[2], crds);

        det = (long double)(pcrds[1][0]-pcrds[0][0])*(pcrds[2][1]-pcrds[0][1]) - 
              (long double)(pcrds[2][0]-pcrds[0][0])*(pcrds[1][1]-pcrds[0][1]);

        mass_m[0][0] = det*0.5;

        mass_m[0][1] = int_x(pcrds[0],pcrds[1],pcrds[2],det) - (long double)cent[0]*mass_m[0][0];

        mass_m[0][2] = int_y(pcrds[0],pcrds[1],pcrds[2],det) - (long double)cent[1]*mass_m[0][0]; 

        mass_m[0][3] = int_x2(pcrds[0],pcrds[1],pcrds[2],det) - (long double)(2.0*cent[0])*
                       int_x(pcrds[0],pcrds[1],pcrds[2],det) + (long double)sqr(cent[0])*mass_m[0][0];

        mass_m[0][4] = int_xy(pcrds[0],pcrds[1],pcrds[2],det) - 
                       (long double)cent[1]*int_x(pcrds[0],pcrds[1],pcrds[2],det)-
                       (long double)cent[0]*int_y(pcrds[0],pcrds[1],pcrds[2],det) +
                       (long double)(cent[0]*cent[1])*mass_m[0][0];

        mass_m[0][5] = int_y2(pcrds[0],pcrds[1],pcrds[2],det) - (long double)(2.0*cent[1])*
                       int_y(pcrds[0],pcrds[1],pcrds[2],det) + (long double)sqr(cent[1])*mass_m[0][0];

        for(i = 6; i< 10; i++)
        {
            for(j = 0; j < 13; j++)
                tmpans[j] = B_val(crds, cent, j, i);
            mass_m[0][i] =  (w1*tmpans[0] + w2*(tmpans[1] + tmpans[2] + tmpans[3]) +
                             w3*(tmpans[4] + tmpans[5] + tmpans[6]) +
                             w4*(tmpans[7] + tmpans[8] + tmpans[9] +
                                 tmpans[10] + tmpans[11] + tmpans[12]))*area;
        }

        // (x-x_i)*all bases functions
        mass_m[1][0] = mass_m[0][1];
        mass_m[1][1] = mass_m[0][3];
        mass_m[1][2] = mass_m[0][4];
        mass_m[1][3] = int_x3(pcrds[0],pcrds[1],pcrds[2],det) - (long double)cub(cent[0])*mass_m[0][0] -
                        (long double)3.0*cent[0]*int_x2(pcrds[0],pcrds[1],pcrds[2],det) + 
                        (long double)3.0*sqr(cent[0])*int_x(pcrds[0],pcrds[1],pcrds[2],det);

        mass_m[1][4] = int_x2y(pcrds[0],pcrds[1],pcrds[2],det) - 
                       (long double)cent[1]*int_x2(pcrds[0],pcrds[1],pcrds[2],det) - 
                       (long double)(2.0*cent[0])*int_xy(pcrds[0],pcrds[1],pcrds[2],det) + 
                       (long double)(2.0*cent[0]*cent[1])*int_x(pcrds[0],pcrds[1],pcrds[2],det) +
                       (long double)sqr(cent[0])*int_y(pcrds[0],pcrds[1],pcrds[2],det) -
                       (long double)(sqr(cent[0])*cent[1])*mass_m[0][0];

        mass_m[1][5] = int_xy2(pcrds[0],pcrds[1],pcrds[2],det) - 
                       (long double)(2.0*cent[1])*int_xy(pcrds[0],pcrds[1],pcrds[2],det) +
                       (long double)(sqr(cent[1]))*int_x(pcrds[0],pcrds[1],pcrds[2],det) -
                       (long double)cent[0]*int_y2(pcrds[0],pcrds[1],pcrds[2],det) + 
                       (long double)(2.0*cent[0]*cent[1])*int_y(pcrds[0],pcrds[1],pcrds[2],det) -
                       (long double)(sqr(cent[1])*cent[0])*mass_m[0][0];  
        
        for(i = 6; i< 10; i++)
        {
            for(j = 0; j < 13; j++)
                tmpans[j] = B_val(crds, cent, j, i)*B_val(crds, cent, j, 1);
            mass_m[1][i] =  (w1*tmpans[0] + w2*(tmpans[1] + tmpans[2] + tmpans[3]) +
                             w3*(tmpans[4] + tmpans[5] + tmpans[6]) +
                             w4*(tmpans[7] + tmpans[8] + tmpans[9] +
                                 tmpans[10] + tmpans[11] + tmpans[12]))*area;
        }
        // mass_m[1][6] = mass_m[3][3];
        // mass_m[1][7] = mass_m[3][4];
        // mass_m[1][8] = mass_m[3][5];
        // mass_m[1][9] = mass_m[4][5];

        mass_m[2][0] = mass_m[0][2];
        mass_m[2][1] = mass_m[0][4];
        mass_m[2][2] = mass_m[0][5];
        mass_m[2][3] = mass_m[1][4];
        mass_m[2][4] = mass_m[1][5];      
        mass_m[2][5] = int_y3(pcrds[0],pcrds[1],pcrds[2],det) - 
                       (long double)cub(cent[1])*mass_m[0][0] - 
                       (long double)(3.0*cent[1])*int_y2(pcrds[0],pcrds[1],pcrds[2],det) + 
                       (long double)(3.0*sqr(cent[1]))*int_y(pcrds[0],pcrds[1],pcrds[2],det);
        mass_m[2][6] = mass_m[1][7];
        for(i = 7; i< 10; i++)
        {
            for(j = 0; j < 13; j++)
                tmpans[j] = B_val(crds, cent, j, i)*B_val(crds, cent, j, 2);
            mass_m[2][i] =  (w1*tmpans[0] + w2*(tmpans[1] + tmpans[2] + tmpans[3]) +
                             w3*(tmpans[4] + tmpans[5] + tmpans[6]) +
                             w4*(tmpans[7] + tmpans[8] + tmpans[9] +
                                 tmpans[10] + tmpans[11] + tmpans[12]))*area;
        }
        // mass_m[2][7] = mass_m[3][5];
        // mass_m[2][8] = mass_m[1][9];
        // mass_m[2][9] = mass_m[5][5];

        mass_m[3][0] = mass_m[0][3];
        mass_m[3][1] = mass_m[1][3];
        mass_m[3][2] = mass_m[2][3];
        mass_m[3][3] = int_x4(pcrds[0],pcrds[1],pcrds[2],det) + 
                   (long double)(sqr(cent[0])*sqr(cent[0]))*mass_m[0][0] +
                   (long double)(6.0*sqr(cent[0]))*int_x2(pcrds[0],pcrds[1],pcrds[2],det) -
                   (long double)(4.0*cent[0])*int_x3(pcrds[0],pcrds[1],pcrds[2],det) - 
                   (long double)(4.0*cub(cent[0]))*int_x(pcrds[0],pcrds[1],pcrds[2],det); 

        mass_m[3][4] = int_x3y(pcrds[0],pcrds[1],pcrds[2],det) -
                       (long double)cent[1]*int_x3(pcrds[0],pcrds[1],pcrds[2],det) -
                       (long double)(3.0*cent[0])*int_x2y(pcrds[0],pcrds[1],pcrds[2],det) +
                       (long double)(3.0*cent[0]*cent[1])*int_x2(pcrds[0],pcrds[1],pcrds[2],det) +
                       (long double)(3.0*sqr(cent[0]))*int_xy(pcrds[0],pcrds[1],pcrds[2],det) -
                       (long double)(3.0*sqr(cent[0])*cent[1])*int_x(pcrds[0],pcrds[1],pcrds[2],det) -
                       (long double)cub(cent[0])*int_y(pcrds[0],pcrds[1],pcrds[2],det) +
                       (long double)(cub(cent[0])*cent[1])*mass_m[0][0];

        mass_m[3][5] = int_x2y2(pcrds[0],pcrds[1],pcrds[2],det)-
                       (long double)(2.0*cent[1])*int_x2y(pcrds[0],pcrds[1],pcrds[2],det)+
                       (long double)(sqr(cent[1]))*int_x2(pcrds[0],pcrds[1],pcrds[2],det)-
                       (long double)(2.0*cent[0])*int_xy2(pcrds[0],pcrds[1],pcrds[2],det)+
                       (long double)(4.0*cent[0]*cent[1])*int_xy(pcrds[0],pcrds[1],pcrds[2],det)-
                       (long double)(2.0*cent[0]*sqr(cent[1]))*int_x(pcrds[0],pcrds[1],pcrds[2],det)+
                       (long double)sqr(cent[0])*int_y2(pcrds[0],pcrds[1],pcrds[2],det)-
                       (long double)(2.0*cent[1]*sqr(cent[0]))*int_y(pcrds[0],pcrds[1],pcrds[2],det) +
                       (long double)(sqr(cent[0])*sqr(cent[1]))*mass_m[0][0];
        for(i = 6; i< 10; i++)
        {
            for(j = 0; j < 13; j++)
                tmpans[j] = B_val(crds, cent, j, i)*B_val(crds, cent, j, 3);
            mass_m[3][i] =  (w1*tmpans[0] + w2*(tmpans[1] + tmpans[2] + tmpans[3]) +
                             w3*(tmpans[4] + tmpans[5] + tmpans[6]) +
                             w4*(tmpans[7] + tmpans[8] + tmpans[9] +
                                 tmpans[10] + tmpans[11] + tmpans[12]))*area;
        }

        mass_m[4][0] = mass_m[0][4];
        mass_m[4][1] = mass_m[1][4];
        mass_m[4][2] = mass_m[2][4];
        mass_m[4][3] = mass_m[3][4];
        mass_m[4][4] = mass_m[3][5];
        mass_m[4][5] = int_xy3(pcrds[0],pcrds[1],pcrds[2],det) -
                       (long double)(3.0*cent[1])*int_xy2(pcrds[0],pcrds[1],pcrds[2],det)+
                       (long double)(3.0*sqr(cent[1]))*int_xy(pcrds[0],pcrds[1],pcrds[2],det)-
                       (long double)(cub(cent[1]))*int_x(pcrds[0],pcrds[1],pcrds[2],det)-
                       (long double)(cent[0])*int_y3(pcrds[0],pcrds[1],pcrds[2],det) +
                       (long double)(3.0*cent[0]*cent[1])*int_y2(pcrds[0],pcrds[1],pcrds[2],det)-
                       (long double)(3.0*cent[0]*sqr(cent[1]))*int_y(pcrds[0],pcrds[1],pcrds[2],det)+
                       (long double)(cent[0]*cub(cent[1]))*mass_m[0][0];
        // mass_m[4][6] = mass_m[3][7];
        // mass_m[4][7] = mass_m[3][8];
        // mass_m[4][8] = mass_m[3][9];
        for(i = 6; i< 10; i++)
        {
            for(j = 0; j < 13; j++)
                tmpans[j] = B_val(crds, cent, j, i)*B_val(crds, cent, j, 4);
            mass_m[4][i] =  (w1*tmpans[0] + w2*(tmpans[1] + tmpans[2] + tmpans[3]) +
                             w3*(tmpans[4] + tmpans[5] + tmpans[6]) +
                             w4*(tmpans[7] + tmpans[8] + tmpans[9] +
                                 tmpans[10] + tmpans[11] + tmpans[12]))*area;
        }

        mass_m[5][0] = mass_m[0][5];
        mass_m[5][1] = mass_m[1][5];
        mass_m[5][2] = mass_m[2][5];
        mass_m[5][3] = mass_m[3][5];
        mass_m[5][4] = mass_m[4][5];
        mass_m[5][5] = int_y4(pcrds[0],pcrds[1],pcrds[2],det) +
                   (long double)(sqr(cent[1])*sqr(cent[1]))*mass_m[0][0] +
                   (long double)(6.0*sqr(cent[1]))*int_y2(pcrds[0],pcrds[1],pcrds[2],det) -
                   (long double)(4.0*cent[1])*int_y3(pcrds[0],pcrds[1],pcrds[2],det) -
                   (long double)(4.0*cub(cent[1]))*int_y(pcrds[0],pcrds[1],pcrds[2],det);
        // mass_m[5][6] = mass_m[3][8];
        // mass_m[5][7] = mass_m[3][9];
        for(i = 6; i< 10; i++)
        {
            for(j = 0; j < 13; j++)
                tmpans[j] = B_val(crds, cent, j, i)*B_val(crds, cent, j, 5);
            mass_m[5][i] =  (w1*tmpans[0] + w2*(tmpans[1] + tmpans[2] + tmpans[3]) +
                             w3*(tmpans[4] + tmpans[5] + tmpans[6]) +
                             w4*(tmpans[7] + tmpans[8] + tmpans[9] +
                                 tmpans[10] + tmpans[11] + tmpans[12]))*area;
        }

        mass_m[6][0] = mass_m[0][6];
        mass_m[6][1] = mass_m[1][6];
        mass_m[6][2] = mass_m[2][6];
        mass_m[6][3] = mass_m[3][6];
        mass_m[6][4] = mass_m[4][6];
        mass_m[6][5] = mass_m[5][6];
        for(i = 6; i< 10; i++)
        {
            for(j = 0; j < 13; j++)
                tmpans[j] = B_val(crds, cent, j, i)*B_val(crds, cent, j, 6);
            mass_m[6][i] =  (w1*tmpans[0] + w2*(tmpans[1] + tmpans[2] + tmpans[3]) +
                             w3*(tmpans[4] + tmpans[5] + tmpans[6]) +
                             w4*(tmpans[7] + tmpans[8] + tmpans[9] +
                                 tmpans[10] + tmpans[11] + tmpans[12]))*area;
        }

        mass_m[7][0] = mass_m[0][7];
        mass_m[7][1] = mass_m[1][7];
        mass_m[7][2] = mass_m[2][7];
        mass_m[7][3] = mass_m[3][7];
        mass_m[7][4] = mass_m[4][7];
        mass_m[7][5] = mass_m[5][7];
        mass_m[7][6] = mass_m[6][7];
        for(i = 7; i< 10; i++)
        {
            for(j = 0; j < 13; j++)
                tmpans[j] = B_val(crds, cent, j, i)*B_val(crds, cent, j, 7);
            mass_m[7][i] =  (w1*tmpans[0] + w2*(tmpans[1] + tmpans[2] + tmpans[3]) +
                             w3*(tmpans[4] + tmpans[5] + tmpans[6]) +
                             w4*(tmpans[7] + tmpans[8] + tmpans[9] +
                                 tmpans[10] + tmpans[11] + tmpans[12]))*area;
        }
        mass_m[8][0] = mass_m[0][8];
        mass_m[8][1] = mass_m[1][8];
        mass_m[8][2] = mass_m[2][8];
        mass_m[8][3] = mass_m[3][8];
        mass_m[8][4] = mass_m[4][8];
        mass_m[8][5] = mass_m[5][8];
        mass_m[8][6] = mass_m[6][8];
        mass_m[8][7] = mass_m[7][8];
        for(i = 8; i< 10; i++)
        {
            for(j = 0; j < 13; j++)
                tmpans[j] = B_val(crds, cent, j, i)*B_val(crds, cent, j, 8);
            mass_m[8][i] =  (w1*tmpans[0] + w2*(tmpans[1] + tmpans[2] + tmpans[3]) +
                             w3*(tmpans[4] + tmpans[5] + tmpans[6]) +
                             w4*(tmpans[7] + tmpans[8] + tmpans[9] +
                                 tmpans[10] + tmpans[11] + tmpans[12]))*area;
        }
        mass_m[9][0] = mass_m[0][9];
        mass_m[9][1] = mass_m[1][9];
        mass_m[9][2] = mass_m[2][9];
        mass_m[9][3] = mass_m[3][9];
        mass_m[9][4] = mass_m[4][9];
        mass_m[9][5] = mass_m[5][9];
        mass_m[9][6] = mass_m[6][9];
        mass_m[9][7] = mass_m[7][9];
        mass_m[9][8] = mass_m[8][9];
        for(i = 9; i< 10; i++)
        {
            for(j = 0; j < 13; j++)
                tmpans[j] = B_val(crds, cent, j, i)*B_val(crds, cent, j, 9);
            mass_m[9][i] =  (w1*tmpans[0] + w2*(tmpans[1] + tmpans[2] + tmpans[3]) +
                             w3*(tmpans[4] + tmpans[5] + tmpans[6]) +
                             w4*(tmpans[7] + tmpans[8] + tmpans[9] +
                                 tmpans[10] + tmpans[11] + tmpans[12]))*area;
        }
} 

LOCAL void comp_mass_matrix_p4(
        int     n_coeff,
        TRI     *tri,
        int     dim,
        double   **mass_m)
{
        int     i, j;
        double   *cent = fg_centroid(tri);
        POINT   *p[3];
        float   *pcrds[3]; 
        long double   det;
        float   crds[13][2]; 
        long double   tmpans[13];
        long double w1 =-0.149570044467670, w2 = 0.053347235608839,
                    w3 = 0.175615257433204,  w4 = 0.077113760890257;
        long double   area;

        area = fg_area(tri);
        
        for(i = 0; i < 3; i++)
        {
            p[i] = Point_of_tri(tri)[i];
            pcrds[i] = Coords(p[i]);
        }

        tri_quadrature_13_pts(pcrds[0], pcrds[1], pcrds[2], crds);

        det = (long double)(pcrds[1][0]-pcrds[0][0])*(pcrds[2][1]-pcrds[0][1]) - 
              (long double)(pcrds[2][0]-pcrds[0][0])*(pcrds[1][1]-pcrds[0][1]);

        mass_m[0][0] = det*0.5;

        mass_m[0][1] = int_x(pcrds[0],pcrds[1],pcrds[2],det) - (long double)cent[0]*mass_m[0][0];

        mass_m[0][2] = int_y(pcrds[0],pcrds[1],pcrds[2],det) - (long double)cent[1]*mass_m[0][0]; 

        mass_m[0][3] = int_x2(pcrds[0],pcrds[1],pcrds[2],det) - (long double)(2.0*cent[0])*
                       int_x(pcrds[0],pcrds[1],pcrds[2],det) + (long double)sqr(cent[0])*mass_m[0][0];

        mass_m[0][4] = int_xy(pcrds[0],pcrds[1],pcrds[2],det) - 
                       (long double)cent[1]*int_x(pcrds[0],pcrds[1],pcrds[2],det)-
                       (long double)cent[0]*int_y(pcrds[0],pcrds[1],pcrds[2],det) +
                       (long double)(cent[0]*cent[1])*mass_m[0][0];

        mass_m[0][5] = int_y2(pcrds[0],pcrds[1],pcrds[2],det) - (long double)(2.0*cent[1])*
                       int_y(pcrds[0],pcrds[1],pcrds[2],det) + (long double)sqr(cent[1])*mass_m[0][0];

        for(i = 6; i< 15; i++)
        {
            for(j = 0; j < 13; j++)
                tmpans[j] = B_val(crds, cent, j, i);
            mass_m[0][i] =  (w1*tmpans[0] + w2*(tmpans[1] + tmpans[2] + tmpans[3]) +
                             w3*(tmpans[4] + tmpans[5] + tmpans[6]) +
                             w4*(tmpans[7] + tmpans[8] + tmpans[9] +
                                 tmpans[10] + tmpans[11] + tmpans[12]))*area;
        }

        // (x-x_i)*all bases functions
        mass_m[1][0] = mass_m[0][1];
        mass_m[1][1] = mass_m[0][3];
        mass_m[1][2] = mass_m[0][4];
        mass_m[1][3] = int_x3(pcrds[0],pcrds[1],pcrds[2],det) - (long double)cub(cent[0])*mass_m[0][0] -
                        (long double)3.0*cent[0]*int_x2(pcrds[0],pcrds[1],pcrds[2],det) + 
                        (long double)3.0*sqr(cent[0])*int_x(pcrds[0],pcrds[1],pcrds[2],det);

        mass_m[1][4] = int_x2y(pcrds[0],pcrds[1],pcrds[2],det) - 
                       (long double)cent[1]*int_x2(pcrds[0],pcrds[1],pcrds[2],det) - 
                       (long double)(2.0*cent[0])*int_xy(pcrds[0],pcrds[1],pcrds[2],det) + 
                       (long double)(2.0*cent[0]*cent[1])*int_x(pcrds[0],pcrds[1],pcrds[2],det) +
                       (long double)sqr(cent[0])*int_y(pcrds[0],pcrds[1],pcrds[2],det) -
                       (long double)(sqr(cent[0])*cent[1])*mass_m[0][0];

        mass_m[1][5] = int_xy2(pcrds[0],pcrds[1],pcrds[2],det) - 
                       (long double)(2.0*cent[1])*int_xy(pcrds[0],pcrds[1],pcrds[2],det) +
                       (long double)(sqr(cent[1]))*int_x(pcrds[0],pcrds[1],pcrds[2],det) -
                       (long double)cent[0]*int_y2(pcrds[0],pcrds[1],pcrds[2],det) + 
                       (long double)(2.0*cent[0]*cent[1])*int_y(pcrds[0],pcrds[1],pcrds[2],det) -
                       (long double)(sqr(cent[1])*cent[0])*mass_m[0][0];  
        
        for(i = 6; i< 15; i++)
        {
            for(j = 0; j < 13; j++)
                tmpans[j] = B_val(crds, cent, j, i)*B_val(crds, cent, j, 1);
            mass_m[1][i] =  (w1*tmpans[0] + w2*(tmpans[1] + tmpans[2] + tmpans[3]) +
                             w3*(tmpans[4] + tmpans[5] + tmpans[6]) +
                             w4*(tmpans[7] + tmpans[8] + tmpans[9] +
                                 tmpans[10] + tmpans[11] + tmpans[12]))*area;
        }

        mass_m[2][0] = mass_m[0][2];
        mass_m[2][1] = mass_m[0][4];
        mass_m[2][2] = mass_m[0][5];
        mass_m[2][3] = mass_m[1][4];
        mass_m[2][4] = mass_m[1][5];      
        mass_m[2][5] = int_y3(pcrds[0],pcrds[1],pcrds[2],det) - 
                       (long double)cub(cent[1])*mass_m[0][0] - 
                       (long double)(3.0*cent[1])*int_y2(pcrds[0],pcrds[1],pcrds[2],det) + 
                       (long double)(3.0*sqr(cent[1]))*int_y(pcrds[0],pcrds[1],pcrds[2],det);
        mass_m[2][6] = mass_m[1][7];
        mass_m[2][7] = mass_m[0][12];
        mass_m[2][8] = mass_m[0][13];
        mass_m[2][9] = mass_m[0][14];
        for(i = 10; i< 15; i++)
        {
            for(j = 0; j < 13; j++)
                tmpans[j] = B_val(crds, cent, j, i)*B_val(crds, cent, j, 2);
            mass_m[2][i] =  (w1*tmpans[0] + w2*(tmpans[1] + tmpans[2] + tmpans[3]) +
                             w3*(tmpans[4] + tmpans[5] + tmpans[6]) +
                             w4*(tmpans[7] + tmpans[8] + tmpans[9] +
                                 tmpans[10] + tmpans[11] + tmpans[12]))*area;
        }

        mass_m[3][0] = mass_m[0][3];
        mass_m[3][1] = mass_m[1][3];
        mass_m[3][2] = mass_m[2][3];
        mass_m[3][3] = int_x4(pcrds[0],pcrds[1],pcrds[2],det) + 
                   (long double)(sqr(cent[0])*sqr(cent[0]))*mass_m[0][0] +
                   (long double)(6.0*sqr(cent[0]))*int_x2(pcrds[0],pcrds[1],pcrds[2],det) -
                   (long double)(4.0*cent[0])*int_x3(pcrds[0],pcrds[1],pcrds[2],det) - 
                   (long double)(4.0*cub(cent[0]))*int_x(pcrds[0],pcrds[1],pcrds[2],det); 

        mass_m[3][4] = int_x3y(pcrds[0],pcrds[1],pcrds[2],det) -
                       (long double)cent[1]*int_x3(pcrds[0],pcrds[1],pcrds[2],det) -
                       (long double)(3.0*cent[0])*int_x2y(pcrds[0],pcrds[1],pcrds[2],det) +
                       (long double)(3.0*cent[0]*cent[1])*int_x2(pcrds[0],pcrds[1],pcrds[2],det) +
                       (long double)(3.0*sqr(cent[0]))*int_xy(pcrds[0],pcrds[1],pcrds[2],det) -
                       (long double)(3.0*sqr(cent[0])*cent[1])*int_x(pcrds[0],pcrds[1],pcrds[2],det) -
                       (long double)cub(cent[0])*int_y(pcrds[0],pcrds[1],pcrds[2],det) +
                       (long double)(cub(cent[0])*cent[1])*mass_m[0][0];

        mass_m[3][5] = int_x2y2(pcrds[0],pcrds[1],pcrds[2],det)-
                       (long double)(2.0*cent[1])*int_x2y(pcrds[0],pcrds[1],pcrds[2],det)+
                       (long double)(sqr(cent[1]))*int_x2(pcrds[0],pcrds[1],pcrds[2],det)-
                       (long double)(2.0*cent[0])*int_xy2(pcrds[0],pcrds[1],pcrds[2],det)+
                       (long double)(4.0*cent[0]*cent[1])*int_xy(pcrds[0],pcrds[1],pcrds[2],det)-
                       (long double)(2.0*cent[0]*sqr(cent[1]))*int_x(pcrds[0],pcrds[1],pcrds[2],det)+
                       (long double)sqr(cent[0])*int_y2(pcrds[0],pcrds[1],pcrds[2],det)-
                       (long double)(2.0*cent[1]*sqr(cent[0]))*int_y(pcrds[0],pcrds[1],pcrds[2],det) +
                       (long double)(sqr(cent[0])*sqr(cent[1]))*mass_m[0][0];
        mass_m[3][6] = mass_m[1][10];
        mass_m[3][7] = mass_m[2][10];
        mass_m[3][8] = mass_m[2][11];
        mass_m[3][9] = mass_m[1][13];
        for(i = 10; i< 15; i++)
        {
            for(j = 0; j < 13; j++)
                tmpans[j] = B_val(crds, cent, j, i)*B_val(crds, cent, j, 3);
            mass_m[3][i] =  (w1*tmpans[0] + w2*(tmpans[1] + tmpans[2] + tmpans[3]) +
                             w3*(tmpans[4] + tmpans[5] + tmpans[6]) +
                             w4*(tmpans[7] + tmpans[8] + tmpans[9] +
                                 tmpans[10] + tmpans[11] + tmpans[12]))*area;
        }

        mass_m[4][0] = mass_m[0][4];
        mass_m[4][1] = mass_m[1][4];
        mass_m[4][2] = mass_m[2][4];
        mass_m[4][3] = mass_m[3][4];
        mass_m[4][4] = mass_m[3][5];
        mass_m[4][5] = int_xy3(pcrds[0],pcrds[1],pcrds[2],det) -
                       (long double)(3.0*cent[1])*int_xy2(pcrds[0],pcrds[1],pcrds[2],det)+
                       (long double)(3.0*sqr(cent[1]))*int_xy(pcrds[0],pcrds[1],pcrds[2],det)-
                       (long double)(cub(cent[1]))*int_x(pcrds[0],pcrds[1],pcrds[2],det)-
                       (long double)(cent[0])*int_y3(pcrds[0],pcrds[1],pcrds[2],det) +
                       (long double)(3.0*cent[0]*cent[1])*int_y2(pcrds[0],pcrds[1],pcrds[2],det)-
                       (long double)(3.0*cent[0]*sqr(cent[1]))*int_y(pcrds[0],pcrds[1],pcrds[2],det)+
                       (long double)(cent[0]*cub(cent[1]))*mass_m[0][0];
        mass_m[4][6] = mass_m[2][10];
        mass_m[4][7] = mass_m[1][12];
        mass_m[4][8] = mass_m[1][13];
        mass_m[4][9] = mass_m[1][14];
        mass_m[4][10] = mass_m[3][11];
        mass_m[4][11] = mass_m[3][12];
        mass_m[4][12] = mass_m[3][13];
        for(i = 13; i< 15; i++)
        {
            for(j = 0; j < 13; j++)
                tmpans[j] = B_val(crds, cent, j, i)*B_val(crds, cent, j, 4);
            mass_m[4][i] =  (w1*tmpans[0] + w2*(tmpans[1] + tmpans[2] + tmpans[3]) +
                             w3*(tmpans[4] + tmpans[5] + tmpans[6]) +
                             w4*(tmpans[7] + tmpans[8] + tmpans[9] +
                                 tmpans[10] + tmpans[11] + tmpans[12]))*area;
        }

        mass_m[5][0] = mass_m[0][5];
        mass_m[5][1] = mass_m[1][5];
        mass_m[5][2] = mass_m[2][5];
        mass_m[5][3] = mass_m[3][5];
        mass_m[5][4] = mass_m[4][5];
        mass_m[5][5] = int_y4(pcrds[0],pcrds[1],pcrds[2],det) +
                   (long double)(sqr(cent[1])*sqr(cent[1]))*mass_m[0][0] +
                   (long double)(6.0*sqr(cent[1]))*int_y2(pcrds[0],pcrds[1],pcrds[2],det) -
                   (long double)(4.0*cent[1])*int_y3(pcrds[0],pcrds[1],pcrds[2],det) -
                   (long double)(4.0*cub(cent[1]))*int_y(pcrds[0],pcrds[1],pcrds[2],det);
        mass_m[5][6] = mass_m[1][13];
        mass_m[5][7] = mass_m[3][9];
        mass_m[5][8] = mass_m[1][14];
        mass_m[5][9] = mass_m[2][14];
        mass_m[5][10] = mass_m[4][11];
        mass_m[5][11] = mass_m[4][12];
        mass_m[5][12] = mass_m[4][13];
        mass_m[5][13] = mass_m[4][14];
        for(i = 14; i< 15; i++)
        {
            for(j = 0; j < 13; j++)
                tmpans[j] = B_val(crds, cent, j, i)*B_val(crds, cent, j, 5);
            mass_m[5][i] =  (w1*tmpans[0] + w2*(tmpans[1] + tmpans[2] + tmpans[3]) +
                             w3*(tmpans[4] + tmpans[5] + tmpans[6]) +
                             w4*(tmpans[7] + tmpans[8] + tmpans[9] +
                                 tmpans[10] + tmpans[11] + tmpans[12]))*area;
        }

        mass_m[6][0] = mass_m[0][6];
        mass_m[6][1] = mass_m[1][6];
        mass_m[6][2] = mass_m[2][6];
        mass_m[6][3] = mass_m[3][6];
        mass_m[6][4] = mass_m[4][6];
        mass_m[6][5] = mass_m[5][6];
        for(i = 6; i< 15; i++)
        {
            for(j = 0; j < 13; j++)
                tmpans[j] = B_val(crds, cent, j, i)*B_val(crds, cent, j, 6);
            mass_m[6][i] =  (w1*tmpans[0] + w2*(tmpans[1] + tmpans[2] + tmpans[3]) +
                             w3*(tmpans[4] + tmpans[5] + tmpans[6]) +
                             w4*(tmpans[7] + tmpans[8] + tmpans[9] +
                                 tmpans[10] + tmpans[11] + tmpans[12]))*area;
        }

        mass_m[7][0] = mass_m[0][7];
        mass_m[7][1] = mass_m[1][7];
        mass_m[7][2] = mass_m[2][7];
        mass_m[7][3] = mass_m[3][7];
        mass_m[7][4] = mass_m[4][7];
        mass_m[7][5] = mass_m[5][7];
        mass_m[7][6] = mass_m[6][7];
        for(i = 7; i< 15; i++)
        {
            for(j = 0; j < 13; j++)
                tmpans[j] = B_val(crds, cent, j, i)*B_val(crds, cent, j, 7);
            mass_m[7][i] =  (w1*tmpans[0] + w2*(tmpans[1] + tmpans[2] + tmpans[3]) +
                             w3*(tmpans[4] + tmpans[5] + tmpans[6]) +
                             w4*(tmpans[7] + tmpans[8] + tmpans[9] +
                                 tmpans[10] + tmpans[11] + tmpans[12]))*area;
        }
        mass_m[8][0] = mass_m[0][8];
        mass_m[8][1] = mass_m[1][8];
        mass_m[8][2] = mass_m[2][8];
        mass_m[8][3] = mass_m[3][8];
        mass_m[8][4] = mass_m[4][8];
        mass_m[8][5] = mass_m[5][8];
        mass_m[8][6] = mass_m[6][8];
        mass_m[8][7] = mass_m[7][8];
        for(i = 8; i< 15; i++)
        {
            for(j = 0; j < 13; j++)
                tmpans[j] = B_val(crds, cent, j, i)*B_val(crds, cent, j, 8);
            mass_m[8][i] =  (w1*tmpans[0] + w2*(tmpans[1] + tmpans[2] + tmpans[3]) +
                             w3*(tmpans[4] + tmpans[5] + tmpans[6]) +
                             w4*(tmpans[7] + tmpans[8] + tmpans[9] +
                                 tmpans[10] + tmpans[11] + tmpans[12]))*area;
        }
        mass_m[9][0] = mass_m[0][9];
        mass_m[9][1] = mass_m[1][9];
        mass_m[9][2] = mass_m[2][9];
        mass_m[9][3] = mass_m[3][9];
        mass_m[9][4] = mass_m[4][9];
        mass_m[9][5] = mass_m[5][9];
        mass_m[9][6] = mass_m[6][9];
        mass_m[9][7] = mass_m[7][9];
        mass_m[9][8] = mass_m[8][9];
        for(i = 9; i< 15; i++)
        {
            for(j = 0; j < 13; j++)
                tmpans[j] = B_val(crds, cent, j, i)*B_val(crds, cent, j, 9);
            mass_m[9][i] =  (w1*tmpans[0] + w2*(tmpans[1] + tmpans[2] + tmpans[3]) +
                             w3*(tmpans[4] + tmpans[5] + tmpans[6]) +
                             w4*(tmpans[7] + tmpans[8] + tmpans[9] +
                                 tmpans[10] + tmpans[11] + tmpans[12]))*area;
        }
        mass_m[10][0] = mass_m[0][10];
        mass_m[10][1] = mass_m[1][10];
        mass_m[10][2] = mass_m[2][10];
        mass_m[10][3] = mass_m[3][10];
        mass_m[10][4] = mass_m[4][10];
        mass_m[10][5] = mass_m[5][10];
        mass_m[10][6] = mass_m[6][10];
        mass_m[10][7] = mass_m[7][10];
        mass_m[10][8] = mass_m[8][10];
        mass_m[10][9] = mass_m[9][10];
        for(i = 10; i< 15; i++)
        {
            for(j = 0; j < 13; j++)
                tmpans[j] = B_val(crds, cent, j, i)*B_val(crds, cent, j, 9);
            mass_m[10][i] =  (w1*tmpans[0] + w2*(tmpans[1] + tmpans[2] + tmpans[3]) +
                             w3*(tmpans[4] + tmpans[5] + tmpans[6]) +
                             w4*(tmpans[7] + tmpans[8] + tmpans[9] +
                                 tmpans[10] + tmpans[11] + tmpans[12]))*area;
        }

        mass_m[11][0] = mass_m[0][11];
        mass_m[11][1] = mass_m[1][11];
        mass_m[11][2] = mass_m[2][11];
        mass_m[11][3] = mass_m[3][11];
        mass_m[11][4] = mass_m[4][11];
        mass_m[11][5] = mass_m[5][11];
        mass_m[11][6] = mass_m[6][11];
        mass_m[11][7] = mass_m[7][11];
        mass_m[11][8] = mass_m[8][11];
        mass_m[11][9] = mass_m[9][11];
        mass_m[11][10] = mass_m[10][11];
        for(i = 11; i< 15; i++)
        {
            for(j = 0; j < 13; j++)
                tmpans[j] = B_val(crds, cent, j, i)*B_val(crds, cent, j, 9);
            mass_m[11][i] =  (w1*tmpans[0] + w2*(tmpans[1] + tmpans[2] + tmpans[3]) +
                             w3*(tmpans[4] + tmpans[5] + tmpans[6]) +
                             w4*(tmpans[7] + tmpans[8] + tmpans[9] +
                                 tmpans[10] + tmpans[11] + tmpans[12]))*area;
        }

        mass_m[12][0] = mass_m[0][12];
        mass_m[12][1] = mass_m[1][12];
        mass_m[12][2] = mass_m[2][12];
        mass_m[12][3] = mass_m[3][12];
        mass_m[12][4] = mass_m[4][12];
        mass_m[12][5] = mass_m[5][12];
        mass_m[12][6] = mass_m[6][12];
        mass_m[12][7] = mass_m[7][12];
        mass_m[12][8] = mass_m[8][12];
        mass_m[12][9] = mass_m[9][12];
        mass_m[12][10] = mass_m[10][12];
        mass_m[12][11] = mass_m[11][12];
        for(i = 12; i< 15; i++)
        {
            for(j = 0; j < 13; j++)
                tmpans[j] = B_val(crds, cent, j, i)*B_val(crds, cent, j, 9);
            mass_m[12][i] =  (w1*tmpans[0] + w2*(tmpans[1] + tmpans[2] + tmpans[3]) +
                             w3*(tmpans[4] + tmpans[5] + tmpans[6]) +
                             w4*(tmpans[7] + tmpans[8] + tmpans[9] +
                                 tmpans[10] + tmpans[11] + tmpans[12]))*area;
        }

        mass_m[13][0] = mass_m[0][13];
        mass_m[13][1] = mass_m[1][13];
        mass_m[13][2] = mass_m[2][13];
        mass_m[13][3] = mass_m[3][13];
        mass_m[13][4] = mass_m[4][13];
        mass_m[13][5] = mass_m[5][13];
        mass_m[13][6] = mass_m[6][13];
        mass_m[13][7] = mass_m[7][13];
        mass_m[13][8] = mass_m[8][13];
        mass_m[13][9] = mass_m[9][13];
        mass_m[13][10] = mass_m[10][13];
        mass_m[13][11] = mass_m[11][13];
        mass_m[13][12] = mass_m[12][13];
        for(i = 13; i< 15; i++)
        {
            for(j = 0; j < 13; j++)
                tmpans[j] = B_val(crds, cent, j, i)*B_val(crds, cent, j, 9);
            mass_m[13][i] =  (w1*tmpans[0] + w2*(tmpans[1] + tmpans[2] + tmpans[3]) +
                             w3*(tmpans[4] + tmpans[5] + tmpans[6]) +
                             w4*(tmpans[7] + tmpans[8] + tmpans[9] +
                                 tmpans[10] + tmpans[11] + tmpans[12]))*area;
        }

        mass_m[14][0] = mass_m[0][14];
        mass_m[14][1] = mass_m[1][14];
        mass_m[14][2] = mass_m[2][14];
        mass_m[14][3] = mass_m[3][14];
        mass_m[14][4] = mass_m[4][14];
        mass_m[14][5] = mass_m[5][14];
        mass_m[14][6] = mass_m[6][14];
        mass_m[14][7] = mass_m[7][14];
        mass_m[14][8] = mass_m[8][14];
        mass_m[14][9] = mass_m[9][14];
        mass_m[14][10] = mass_m[10][14];
        mass_m[14][11] = mass_m[11][14];
        mass_m[14][12] = mass_m[12][14];
        mass_m[14][13] = mass_m[13][14];
        for(i = 14; i< 15; i++)
        {
            for(j = 0; j < 13; j++)
                tmpans[j] = B_val(crds, cent, j, i)*B_val(crds, cent, j, 9);
            mass_m[14][i] =  (w1*tmpans[0] + w2*(tmpans[1] + tmpans[2] + tmpans[3]) +
                             w3*(tmpans[4] + tmpans[5] + tmpans[6]) +
                             w4*(tmpans[7] + tmpans[8] + tmpans[9] +
                                 tmpans[10] + tmpans[11] + tmpans[12]))*area;
        }
} 


EXPORT void comp_mass_matrix_1st_row(
        int     n_coeff,
        TRI     *tri,
        int     dim,
        double   *cent,
        double   **mass_m)
{
        if(MAX_N_COEF == 6)
        {
            comp_mass_matrix_1st_row_p2(n_coeff, tri, dim, cent, mass_m);
        }
        else if(MAX_N_COEF == 10)
        {
            comp_mass_matrix_1st_row_p3(n_coeff, tri, dim, cent, mass_m);
        }
        else if(MAX_N_COEF == 15)
        {
            comp_mass_matrix_1st_row_p4(n_coeff, tri, dim, cent, mass_m);
        }
        else
        {
            printf("ERROR: implement comp_mass_matrix_1st_row for MAX_N_COEF = %d\n",
                MAX_N_COEF);
            clean_up(ERROR);
        }
}

LOCAL void comp_mass_matrix_1st_row_p2(
        int     n_coeff,
        TRI     *tri,
        int     dim,
        double   *cent,
        double   **mass_m)
{
        int     i;
        // long double   *cent = fg_centroid(tri);
        POINT   *p[3];
        float   *pcrds[3];
        long double   det;

        for(i = 0; i < 3; i++)
        {
            p[i] = Point_of_tri(tri)[i];
            pcrds[i] = Coords(p[i]);
        }

        det = (long double)(pcrds[1][0]-pcrds[0][0])*(pcrds[2][1]-pcrds[0][1]) -
              (long double)(pcrds[2][0]-pcrds[0][0])*(pcrds[1][1]-pcrds[0][1]);

        mass_m[0][0] = det*0.5;

        mass_m[0][1] = int_x(pcrds[0],pcrds[1],pcrds[2],det) - (long double)cent[0]*mass_m[0][0];

        mass_m[0][2] = int_y(pcrds[0],pcrds[1],pcrds[2],det) - (long double)cent[1]*mass_m[0][0];

        mass_m[0][3] = int_x2(pcrds[0],pcrds[1],pcrds[2],det) - (long double)2.0*cent[0]*
                       int_x(pcrds[0],pcrds[1],pcrds[2],det) + (long double)sqr(cent[0])*mass_m[0][0];

        mass_m[0][4] = int_xy(pcrds[0],pcrds[1],pcrds[2],det) -
                       (long double)cent[1]*int_x(pcrds[0],pcrds[1],pcrds[2],det)-
                       (long double)cent[0]*int_y(pcrds[0],pcrds[1],pcrds[2],det) +
                       (long double)cent[0]*cent[1]*mass_m[0][0];

        mass_m[0][5] = int_y2(pcrds[0],pcrds[1],pcrds[2],det) - (long double)2.0*cent[1]*
                       int_y(pcrds[0],pcrds[1],pcrds[2],det) + (long double)sqr(cent[1])*mass_m[0][0];
}

LOCAL void comp_mass_matrix_1st_row_p3(
        int     n_coeff,
        TRI     *tri,
        int     dim,
        double   *cent,
        double   **mass_m)
{
        int     i, j;
        POINT   *p[3];
        float   *pcrds[3];
        long double   det;
        float   crds[13][2]; 
        long double   tmpans[13];
        static long double w1 =-0.149570044467670, w2 = 0.053347235608839,
                           w3 = 0.175615257433204,  w4 = 0.077113760890257;
        long double   area;

        area = fg_area(tri);

        for(i = 0; i < 3; i++)
        {
            p[i] = Point_of_tri(tri)[i];
            pcrds[i] = Coords(p[i]);
        }

        tri_quadrature_13_pts(pcrds[0], pcrds[1], pcrds[2], crds);

        det = (long double)(pcrds[1][0]-pcrds[0][0])*(pcrds[2][1]-pcrds[0][1]) -
              (long double)(pcrds[2][0]-pcrds[0][0])*(pcrds[1][1]-pcrds[0][1]);

        mass_m[0][0] = det*0.5;

        mass_m[0][1] = int_x(pcrds[0],pcrds[1],pcrds[2],det) - (long double)cent[0]*mass_m[0][0];

        mass_m[0][2] = int_y(pcrds[0],pcrds[1],pcrds[2],det) - (long double)cent[1]*mass_m[0][0];

        mass_m[0][3] = int_x2(pcrds[0],pcrds[1],pcrds[2],det) - (long double)2.0*cent[0]*
                       int_x(pcrds[0],pcrds[1],pcrds[2],det) + (long double)sqr(cent[0])*mass_m[0][0];

        mass_m[0][4] = int_xy(pcrds[0],pcrds[1],pcrds[2],det) -
                       (long double)cent[1]*int_x(pcrds[0],pcrds[1],pcrds[2],det)-
                       (long double)cent[0]*int_y(pcrds[0],pcrds[1],pcrds[2],det) +
                       (long double)cent[0]*cent[1]*mass_m[0][0];

        mass_m[0][5] = int_y2(pcrds[0],pcrds[1],pcrds[2],det) - (long double)2.0*cent[1]*
                       int_y(pcrds[0],pcrds[1],pcrds[2],det) + (long double)sqr(cent[1])*mass_m[0][0];
        for(i = 6; i< 10; i++)
        {
            for(j = 0; j < 13; j++)
                tmpans[j] = B_val(crds, cent, j, i);
            mass_m[0][i] =  (w1*tmpans[0] + w2*(tmpans[1] + tmpans[2] + tmpans[3]) +
                             w3*(tmpans[4] + tmpans[5] + tmpans[6]) +
                             w4*(tmpans[7] + tmpans[8] + tmpans[9] +
                                 tmpans[10] + tmpans[11] + tmpans[12]))*area;
        }

}

LOCAL void comp_mass_matrix_1st_row_p4(
        int     n_coeff,
        TRI     *tri,
        int     dim,
        double   *cent,
        double   **mass_m)
{
        int     i, j;
        POINT   *p[3];
        float   *pcrds[3];
        long double   det;
        float   crds[13][2]; 
        long double   tmpans[13];
        long double w1 =-0.149570044467670, w2 = 0.053347235608839,
                    w3 = 0.175615257433204,  w4 = 0.077113760890257;
        long double   area;

        area = fg_area(tri);

        for(i = 0; i < 3; i++)
        {
            p[i] = Point_of_tri(tri)[i];
            pcrds[i] = Coords(p[i]);
        }

        tri_quadrature_13_pts(pcrds[0], pcrds[1], pcrds[2], crds);

        det = (long double)(pcrds[1][0]-pcrds[0][0])*(pcrds[2][1]-pcrds[0][1]) -
              (long double)(pcrds[2][0]-pcrds[0][0])*(pcrds[1][1]-pcrds[0][1]);

        mass_m[0][0] = det*0.5;

        mass_m[0][1] = int_x(pcrds[0],pcrds[1],pcrds[2],det) - (long double)cent[0]*mass_m[0][0];

        mass_m[0][2] = int_y(pcrds[0],pcrds[1],pcrds[2],det) - (long double)cent[1]*mass_m[0][0];

        mass_m[0][3] = int_x2(pcrds[0],pcrds[1],pcrds[2],det) - (long double)2.0*cent[0]*
                       int_x(pcrds[0],pcrds[1],pcrds[2],det) + (long double)sqr(cent[0])*mass_m[0][0];

        mass_m[0][4] = int_xy(pcrds[0],pcrds[1],pcrds[2],det) -
                       (long double)cent[1]*int_x(pcrds[0],pcrds[1],pcrds[2],det)-
                       (long double)cent[0]*int_y(pcrds[0],pcrds[1],pcrds[2],det) +
                       (long double)cent[0]*cent[1]*mass_m[0][0];

        mass_m[0][5] = int_y2(pcrds[0],pcrds[1],pcrds[2],det) - (long double)2.0*cent[1]*
                       int_y(pcrds[0],pcrds[1],pcrds[2],det) + (long double)sqr(cent[1])*mass_m[0][0];
        for(i = 6; i< 15; i++)
        {
            for(j = 0; j < 13; j++)
                tmpans[j] = B_val(crds, cent, j, i);
            mass_m[0][i] =  (w1*tmpans[0] + w2*(tmpans[1] + tmpans[2] + tmpans[3]) +
                             w3*(tmpans[4] + tmpans[5] + tmpans[6]) +
                             w4*(tmpans[7] + tmpans[8] + tmpans[9] +
                                 tmpans[10] + tmpans[11] + tmpans[12]))*area;
        }
}


EXPORT long double int_x(
      float *crds0,
      float *crds1,
      float *crds2,
      long double det)
{
      long double ans;
      ans = det*((long double)(crds1[0]-crds0[0])*integral_eta_xi(1,0) +
                 (long double)(crds2[0]-crds0[0])*integral_eta_xi(0,1) +
                 (long double)crds0[0]*0.5); 
      return ans; 
}

EXPORT long double int_y(
      float *crds0,
      float *crds1,
      float *crds2,
      long double det)
{
      long double ans;
      ans = det*((long double)(crds1[1]-crds0[1])*integral_eta_xi(1,0) +
                 (long double)(crds2[1]-crds0[1])*integral_eta_xi(0,1) +
                 (long double)crds0[1]*0.5);
      return ans; 
}

EXPORT long double int_x2(
      float *crds0,
      float *crds1,
      float *crds2,
      long double det)
{
      long double ans;
      ans = det*((long double)sqr(crds1[0]-crds0[0])*integral_eta_xi(2,0) +
            (long double)sqr(crds2[0]-crds0[0])*integral_eta_xi(0,2) +
            (long double)sqr(crds0[0])*0.5 +
            (long double)2.0*(crds1[0]-crds0[0])*(crds2[0]-crds0[0])*integral_eta_xi(1,1) +
            (long double)2.0*(crds1[0]-crds0[0])*crds0[0]*integral_eta_xi(1,0) +
            (long double)2.0*(crds2[0]-crds0[0])*crds0[0]*integral_eta_xi(0,1));
      return ans;
}

EXPORT long double int_y2(
      float *crds0,
      float *crds1,
      float *crds2,
      long double det)
{
      long double ans;
      ans = det*((long double)sqr(crds1[1]-crds0[1])*integral_eta_xi(2,0) +
            (long double)sqr(crds2[1]-crds0[1])*integral_eta_xi(0,2) +
            (long double)sqr(crds0[1])*0.5 +
            (long double)2.0*(crds1[1]-crds0[1])*(crds2[1]-crds0[1])*integral_eta_xi(1,1) +
            (long double)2.0*(crds1[1]-crds0[1])*crds0[1]*integral_eta_xi(1,0) +
            (long double)2.0*(crds2[1]-crds0[1])*crds0[1]*integral_eta_xi(0,1));
      return ans;
}

EXPORT long double int_xy(
      float *crds0,
      float *crds1,
      float *crds2,
      long double det)
{
      long double ans;
      ans = det*((long double)(crds1[0]-crds0[0])*(crds1[1]-crds0[1])*integral_eta_xi(2,0) +
                 (long double)(crds1[0]-crds0[0])*(crds2[1]-crds0[1])*integral_eta_xi(1,1) + 
                 (long double)crds0[1]*(crds1[0]-crds0[0])*integral_eta_xi(1,0) + 
                 (long double)(crds2[0]-crds0[0])*(crds1[1]-crds0[1])*integral_eta_xi(1,1) +
                 (long double)(crds2[0]-crds0[0])*(crds2[1]-crds0[1])*integral_eta_xi(0,2) +
                 (long double)crds0[1]*(crds2[0]-crds0[0])*integral_eta_xi(0,1) +
                 (long double)crds0[0]*(crds1[1]-crds0[1])*integral_eta_xi(1,0) + 
                 (long double)crds0[0]*(crds2[1]-crds0[1])*integral_eta_xi(0,1) +
                 (long double)crds0[0]*crds0[1]*0.5  
                );
      return ans;
}

EXPORT long double int_x3(
      float *crds0,
      float *crds1,
      float *crds2,
      long double det)
{
      long double ans;
      ans = det*( (long double)cub(crds1[0]-crds0[0])*integral_eta_xi(3,0) +
                  (long double)sqr(crds1[0]-crds0[0])*(crds2[0]-crds0[0])*integral_eta_xi(2,1) + 
                  (long double)sqr(crds1[0]-crds0[0])*crds0[0]*integral_eta_xi(2,0) +

                  (long double)sqr(crds2[0]-crds0[0])*(crds1[0]-crds0[0])*integral_eta_xi(1,2) +
                  (long double)cub(crds2[0]-crds0[0])*integral_eta_xi(0,3) +
                  (long double)crds0[0]*sqr(crds2[0]-crds0[0])*integral_eta_xi(0,2) +
                  (long double)sqr(crds0[0])*(crds1[0]-crds0[0])*integral_eta_xi(1,0) + 
                  (long double)sqr(crds0[0])*(crds2[0]-crds0[0])*integral_eta_xi(0,1) + 
                  (long double)cub(crds0[0])*0.5 + 
                  (long double)2.0*sqr(crds1[0]-crds0[0])*(crds2[0]-crds0[0])*integral_eta_xi(2,1) +
                  (long double)2.0*sqr(crds2[0]-crds0[0])*(crds1[0]-crds0[0])*integral_eta_xi(1,2) +
                  (long double)2.0*crds0[0]*(crds1[0]-crds0[0])*(crds2[0]-crds0[0])*integral_eta_xi(1,1) +
                  (long double)2.0*sqr(crds1[0]-crds0[0])*crds0[0]*integral_eta_xi(2,0) +
                  (long double)2.0*crds0[0]*(crds1[0]-crds0[0])*(crds2[0]-crds0[0])*integral_eta_xi(1,1) +
                  (long double)2.0*(crds1[0]-crds0[0])*sqr(crds0[0])*integral_eta_xi(1,0) +
                  (long double)2.0*crds0[0]*(crds1[0]-crds0[0])*(crds2[0]-crds0[0])*integral_eta_xi(1,1) +
                  (long double)2.0*crds0[0]*sqr(crds2[0]-crds0[0])*integral_eta_xi(0,2) +
                  (long double)2.0*sqr(crds0[0])*(crds2[0]-crds0[0])*integral_eta_xi(0,1)
                );
      return ans;
}

EXPORT long double int_y3(
      float *crds0,
      float *crds1,
      float *crds2,
      long double det)
{
      long double ans;
      ans = det*( (long double)cub(crds1[1]-crds0[1])*integral_eta_xi(3,0) +
                  (long double)(sqr(crds1[1]-crds0[1])*(crds2[1]-crds0[1]))*integral_eta_xi(2,1) +
                  (long double)(sqr(crds1[1]-crds0[1])*crds0[1])*integral_eta_xi(2,0) +
                  (long double)(sqr(crds2[1]-crds0[1])*(crds1[1]-crds0[1]))*integral_eta_xi(1,2) +
                  (long double)(cub(crds2[1]-crds0[1]))*integral_eta_xi(0,3) +
                  (long double)(crds0[1]*sqr(crds2[1]-crds0[1]))*integral_eta_xi(0,2) +
                  (long double)(sqr(crds0[1])*(crds1[1]-crds0[1]))*integral_eta_xi(1,0) +
                  (long double)(sqr(crds0[1])*(crds2[1]-crds0[1]))*integral_eta_xi(0,1) +
                  (long double)(cub(crds0[1])*0.5) +
                  (long double)(2.0*sqr(crds1[1]-crds0[1])*(crds2[1]-crds0[1]))*integral_eta_xi(2,1) +
                  (long double)(2.0*sqr(crds2[1]-crds0[1])*(crds1[1]-crds0[1]))*integral_eta_xi(1,2) +
                  (long double)(2.0*crds0[1]*(crds1[1]-crds0[1])*(crds2[1]-crds0[1]))*integral_eta_xi(1,1) +
                  (long double)(2.0*sqr(crds1[1]-crds0[1])*crds0[1])*integral_eta_xi(2,0) +
                  (long double)(2.0*crds0[1]*(crds1[1]-crds0[1])*(crds2[1]-crds0[1]))*integral_eta_xi(1,1) +
                  (long double)(2.0*(crds1[1]-crds0[1])*sqr(crds0[1]))*integral_eta_xi(1,0) +
                  (long double)(2.0*crds0[1]*(crds1[1]-crds0[1])*(crds2[1]-crds0[1]))*integral_eta_xi(1,1) +
                  (long double)(2.0*crds0[1]*sqr(crds2[1]-crds0[1]))*integral_eta_xi(0,2) +
                  (long double)(2.0*sqr(crds0[1])*(crds2[1]-crds0[1]))*integral_eta_xi(0,1)
                );
      return ans;
}

EXPORT long double int_x2y(
      float *crds0,
      float *crds1,
      float *crds2,
      long double det)
{
      long double ans;
      ans = det*( 
           (long double)sqr(crds1[0]-crds0[0])*(crds1[1]-crds0[1])*integral_eta_xi(3,0) + 
           (long double)sqr(crds1[0]-crds0[0])*(crds2[1]-crds0[1])*integral_eta_xi(2,1) + 
           (long double)sqr(crds1[0]-crds0[0])*crds0[1]*integral_eta_xi(2,0) + 
           (long double)2.0*(crds1[0]-crds0[0])*(crds2[0]-crds0[0])*(crds1[1]-crds0[1])*integral_eta_xi(2,1) +
           (long double)2.0*(crds1[0]-crds0[0])*(crds2[0]-crds0[0])*(crds2[1]-crds0[1])*integral_eta_xi(1,2) + 
           (long double)2.0*(crds1[0]-crds0[0])*(crds2[0]-crds0[0])*crds0[1]*integral_eta_xi(1,1)+
           (long double)sqr(crds2[0]-crds0[0])*(crds1[1]-crds0[1])*integral_eta_xi(1,2) + 
           (long double)sqr(crds2[0]-crds0[0])*(crds2[1]-crds0[1])*integral_eta_xi(0,3)+
           (long double)sqr(crds2[0]-crds0[0])*crds0[1]*integral_eta_xi(0,2)+
           (long double)2.0*(crds1[0]-crds0[0])*(crds1[1]-crds0[1])*crds0[0]*integral_eta_xi(2,0)+
           (long double)2.0*(crds1[0]-crds0[0])*(crds2[1]-crds0[1])*crds0[0]*integral_eta_xi(1,1)+
           (long double)2.0*crds0[0]*(crds1[0]-crds0[0])*crds0[1]*integral_eta_xi(1,0)+
           (long double)2.0*crds0[0]*(crds2[0]-crds0[0])*(crds1[1]-crds0[1])*integral_eta_xi(1,1)+
           (long double)2.0*crds0[0]*(crds2[0]-crds0[0])*(crds2[1]-crds0[1])*integral_eta_xi(0,2)+
           (long double)2.0*crds0[0]*crds0[1]*(crds2[0]-crds0[0])*integral_eta_xi(0,1)+
           (long double)sqr(crds0[0])*(crds1[1]-crds0[1])*integral_eta_xi(1,0)+
           (long double)sqr(crds0[0])*(crds2[1]-crds0[1])*integral_eta_xi(0,1)+
           (long double)sqr(crds0[0])*crds0[1]*0.5
            );
      return ans;
}

EXPORT long double int_xy2(
      float *crds0,
      float *crds1,
      float *crds2,
      long double det)
{
      long double ans;
      ans = det*(
           (long double)((long double)sqr(crds1[1]-crds0[1])*(long double)(crds1[0]-crds0[0]))*integral_eta_xi(3,0) +
           (long double)((long double)sqr(crds2[1]-crds0[1])*(long double)(crds1[0]-crds0[0]))*integral_eta_xi(1,2) +
           (long double)(crds1[0]-crds0[0])*(long double)sqr(crds0[1])*integral_eta_xi(1,0) +
           (long double)(2.0*(crds1[0]-crds0[0])*(long double)(crds2[1]-crds0[1])*(crds1[1]-crds0[1]))*integral_eta_xi(2,1) +
           (long double)2.0*(crds1[0]-crds0[0])*(long double)crds0[1]*(long double)(crds1[1]-crds0[1])*integral_eta_xi(2,0) +
           (long double)2.0*(crds1[0]-crds0[0])*(long double)(crds2[1]-crds0[1])*(long double)crds0[1]*integral_eta_xi(1,1)+
           (long double)sqr(crds1[1]-crds0[1])*(long double)(crds2[0]-crds0[0])*integral_eta_xi(2,1) +
           (long double)sqr(crds2[1]-crds0[1])*(long double)(crds2[0]-crds0[0])*integral_eta_xi(0,3)+
           (long double)sqr(crds0[1])*(long double)(crds2[0]-crds0[0])*integral_eta_xi(0,1)+
           (long double)2.0*(crds2[0]-crds0[0])*(long double)(crds1[1]-crds0[1])*(long double)(crds2[1]-crds0[1])*integral_eta_xi(1,2)+
           (long double)2.0*(crds2[0]-crds0[0])*(crds1[1]-crds0[1])*crds0[1]*integral_eta_xi(1,1)+
           (long double)2.0*crds0[1]*(crds2[0]-crds0[0])*(crds2[1]-crds0[1])*integral_eta_xi(0,2)+
           (long double)crds0[0]*sqr(crds1[1]-crds0[1])*integral_eta_xi(2,0)+
           (long double)crds0[0]*sqr(crds2[1]-crds0[1])*integral_eta_xi(0,2)+
           (long double)crds0[0]*sqr(crds0[1])*0.5+
           (long double)2.0*crds0[0]*(crds1[1]-crds0[1])*(crds2[1]-crds0[1])*integral_eta_xi(1,1)+
           (long double)2.0*crds0[0]*(crds1[1]-crds0[1])*crds0[1]*integral_eta_xi(1,0)+
           (long double)2.0*crds0[0]*crds0[1]*(crds2[1]-crds0[1])*integral_eta_xi(0,1)
            );
      return ans;
}

EXPORT long double int_x4(
      float *crds0,
      float *crds1,
      float *crds2,
      long double det)
{
      long double ans;
      ans = det*(
           (long double)sqr(crds1[0]-crds0[0])*sqr(crds1[0]-crds0[0])*integral_eta_xi(4,0) + 
           (long double)sqr(crds2[0]-crds0[0])*sqr(crds2[0]-crds0[0])*integral_eta_xi(0,4) + 
           (long double)sqr(crds0[0])*sqr(crds0[0])*0.5+
           (long double)6.0*sqr(crds1[0]-crds0[0])*sqr(crds2[0]-crds0[0])*integral_eta_xi(2,2) + 
           (long double)6.0*sqr(crds1[0]-crds0[0])*sqr(crds0[0])*integral_eta_xi(2,0) +
           (long double)6.0*sqr(crds2[0]-crds0[0])*sqr(crds0[0])*integral_eta_xi(0,2) + 
           (long double)4.0*cub(crds1[0]-crds0[0])*(crds2[0]-crds0[0])*integral_eta_xi(3,1) + 
           (long double)4.0*cub(crds1[0]-crds0[0])*(crds0[0])*integral_eta_xi(3,0) +
           (long double)4.0*(crds1[0]-crds0[0])*cub(crds2[0]-crds0[0])*integral_eta_xi(1,3) +
           (long double)4.0*cub(crds2[0]-crds0[0])*crds0[0]*integral_eta_xi(0,3) + 
           (long double)4.0*(crds1[0]-crds0[0])*cub(crds0[0])*integral_eta_xi(1,0) + 
           (long double)4.0*(crds2[0]-crds0[0])*cub(crds0[0])*integral_eta_xi(0,1) + 
           (long double)12.0*(crds1[0]-crds0[0])*(crds2[0]-crds0[0])*sqr(crds0[0])*integral_eta_xi(1,1) +
           (long double)12.0*sqr(crds1[0]-crds0[0])*(crds2[0]-crds0[0])*crds0[0]*integral_eta_xi(2,1) +
           (long double)12.0*(crds1[0]-crds0[0])*sqr(crds2[0]-crds0[0])*crds0[0]*integral_eta_xi(1,2)
                );
      return ans;
}

EXPORT long double int_y4(
      float *crds0,
      float *crds1,
      float *crds2,
      long double det)
{
      long double ans;
      ans = det*(
           (long double)sqr(crds1[1]-crds0[1])*sqr(crds1[1]-crds0[1])*integral_eta_xi(4,0) +
           (long double)sqr(crds2[1]-crds0[1])*sqr(crds2[1]-crds0[1])*integral_eta_xi(0,4) +
           (long double)sqr(crds0[1])*sqr(crds0[1])*0.5+
           (long double)6.0*sqr(crds1[1]-crds0[1])*sqr(crds2[1]-crds0[1])*integral_eta_xi(2,2) +
           (long double)6.0*sqr(crds1[1]-crds0[1])*sqr(crds0[1])*integral_eta_xi(2,0) +
           (long double)6.0*sqr(crds2[1]-crds0[1])*sqr(crds0[1])*integral_eta_xi(0,2) +
           (long double)4.0*cub(crds1[1]-crds0[1])*(crds2[1]-crds0[1])*integral_eta_xi(3,1) +
           (long double)4.0*cub(crds1[1]-crds0[1])*(crds0[1])*integral_eta_xi(3,0) +
           (long double)4.0*(crds1[1]-crds0[1])*cub(crds2[1]-crds0[1])*integral_eta_xi(1,3) +
           (long double)4.0*cub(crds2[1]-crds0[1])*crds0[1]*integral_eta_xi(0,3) +
           (long double)4.0*(crds1[1]-crds0[1])*cub(crds0[1])*integral_eta_xi(1,0) +
           (long double)4.0*(crds2[1]-crds0[1])*cub(crds0[1])*integral_eta_xi(0,1) +
           (long double)12.0*(crds1[1]-crds0[1])*(crds2[1]-crds0[1])*sqr(crds0[1])*integral_eta_xi(1,1) +
           (long double)12.0*sqr(crds1[1]-crds0[1])*(crds2[1]-crds0[1])*crds0[1]*integral_eta_xi(2,1) +
           (long double)12.0*(crds1[1]-crds0[1])*sqr(crds2[1]-crds0[1])*crds0[1]*integral_eta_xi(1,2)
                );
      return ans;
}

EXPORT long double int_x3y(
      float *crds0,
      float *crds1,
      float *crds2,
      long double det)
{
      long double ans;
      ans = det*(
           (long double)cub(crds1[0]-crds0[0])*(crds1[1]-crds0[1])*integral_eta_xi(4,0) +
           (long double)cub(crds1[0]-crds0[0])*(crds2[1]-crds0[1])*integral_eta_xi(3,1) + 
           (long double)cub(crds1[0]-crds0[0])*crds0[1]*integral_eta_xi(3,0) + 
           (long double)cub(crds2[0]-crds0[0])*(crds1[1]-crds0[1])*integral_eta_xi(1,3) +
           (long double)cub(crds2[0]-crds0[0])*(crds2[1]-crds0[1])*integral_eta_xi(0,4) +
           (long double)cub(crds2[0]-crds0[0])*crds0[1]*integral_eta_xi(0,3) +
           (long double)cub(crds0[0])*(crds1[1]-crds0[1])*integral_eta_xi(1,0) +
           (long double)cub(crds0[0])*(crds2[1]-crds0[1])*integral_eta_xi(0,1) +
           (long double)cub(crds0[0])*crds0[1]*0.5 +

           (long double)3.0*sqr(crds1[0]-crds0[0])*(crds2[0]-crds0[0])*(crds1[1]-crds0[1])*integral_eta_xi(3,1) +
           (long double)3.0*sqr(crds1[0]-crds0[0])*(crds2[0]-crds0[0])*(crds2[1]-crds0[1])*integral_eta_xi(2,2) +
           (long double)3.0*sqr(crds1[0]-crds0[0])*(crds2[0]-crds0[0])*crds0[1]*integral_eta_xi(2,1) +

           (long double)3.0*sqr(crds1[0]-crds0[0])*crds0[0]*(crds1[1]-crds0[1])*integral_eta_xi(3,0) +
           (long double)3.0*sqr(crds1[0]-crds0[0])*crds0[0]*(crds2[1]-crds0[1])*integral_eta_xi(2,1) +
           (long double)3.0*sqr(crds1[0]-crds0[0])*crds0[0]*crds0[1]*integral_eta_xi(2,0) +

           (long double)3.0*(crds1[0]-crds0[0])*sqr(crds2[0]-crds0[0])*(crds1[1]-crds0[1])*integral_eta_xi(2,2) +
           (long double)3.0*(crds1[0]-crds0[0])*sqr(crds2[0]-crds0[0])*(crds2[1]-crds0[1])*integral_eta_xi(1,3) +
           (long double)3.0*(crds1[0]-crds0[0])*sqr(crds2[0]-crds0[0])*crds0[1]*integral_eta_xi(1,2)+
           
           (long double)3.0*sqr(crds2[0]-crds0[0])*crds0[0]*(crds1[1]-crds0[1])*integral_eta_xi(1,2) +
           (long double)3.0*sqr(crds2[0]-crds0[0])*crds0[0]*(crds2[1]-crds0[1])*integral_eta_xi(0,3) +
           (long double)3.0*sqr(crds2[0]-crds0[0])*crds0[0]*crds0[1]*integral_eta_xi(0,2) +

           (long double)3.0*(crds1[0]-crds0[0])*sqr(crds0[0])*(crds1[1]-crds0[1])*integral_eta_xi(2,0) +
           (long double)3.0*(crds1[0]-crds0[0])*sqr(crds0[0])*(crds2[1]-crds0[1])*integral_eta_xi(1,1) +
           (long double)3.0*(crds1[0]-crds0[0])*sqr(crds0[0])*crds0[1]*integral_eta_xi(1,0) +

           (long double)3.0*(crds2[0]-crds0[0])*sqr(crds0[0])*(crds1[1]-crds0[1])*integral_eta_xi(1,1) +
           (long double)3.0*(crds2[0]-crds0[0])*sqr(crds0[0])*(crds2[1]-crds0[1])*integral_eta_xi(0,2) +
           (long double)3.0*(crds2[0]-crds0[0])*sqr(crds0[0])*crds0[1]*integral_eta_xi(0,1) +

      (long double)6.0*(crds1[0]-crds0[0])*(crds2[0]-crds0[0])*crds0[0]*(crds1[1]-crds0[1])*integral_eta_xi(2,1) +
      (long double)6.0*(crds1[0]-crds0[0])*(crds2[0]-crds0[0])*crds0[0]*(crds2[1]-crds0[1])*integral_eta_xi(1,2) + 
      (long double)6.0*(crds1[0]-crds0[0])*(crds2[0]-crds0[0])*crds0[0]*crds0[1]*integral_eta_xi(1,1) 
                );
      return ans;
}


EXPORT long double int_x2y2(
      float *crds0,
      float *crds1,
      float *crds2,
      long double det)
{
      long double ans;
      ans = det*(
        (long double)sqr(crds1[0]-crds0[0])*(
        (long double)sqr(crds1[1]-crds0[1])*integral_eta_xi(4,0)+
        (long double)sqr(crds2[1]-crds0[1])*integral_eta_xi(2,2)+
        (long double)sqr(crds0[1])*integral_eta_xi(2,0)+
        (long double)2.0*(crds1[1]-crds0[1])*(crds2[1]-crds0[1])*integral_eta_xi(3,1)+
        (long double)2.0*(crds1[1]-crds0[1])*crds0[1]*integral_eta_xi(3,0)+
        (long double)2.0*(crds2[1]-crds0[1])*crds0[1]*integral_eta_xi(2,1))+

        (long double)sqr(crds2[0]-crds0[0])*(
        (long double)sqr(crds1[1]-crds0[1])*integral_eta_xi(2,2)+
        (long double)sqr(crds2[1]-crds0[1])*integral_eta_xi(0,4)+
        (long double)sqr(crds0[1])*integral_eta_xi(0,2)+
        (long double)2.0*(crds1[1]-crds0[1])*(crds2[1]-crds0[1])*integral_eta_xi(1,3)+
        (long double)2.0*(crds1[1]-crds0[1])*crds0[1]*integral_eta_xi(1,2)+
        (long double)2.0*(crds2[1]-crds0[1])*crds0[1]*integral_eta_xi(0,3))+

        (long double)sqr(crds0[0])*(sqr(crds1[1]-crds0[1])*integral_eta_xi(2,0)+
        (long double)sqr(crds2[1]-crds0[1])*integral_eta_xi(0,2)+
        (long double)sqr(crds0[1])*0.5+
        (long double)2.0*(crds1[1]-crds0[1])*(crds2[1]-crds0[1])*integral_eta_xi(1,1)+
        (long double)2.0*(crds1[1]-crds0[1])*crds0[1]*integral_eta_xi(1,0)+
        (long double)2.0*(crds2[1]-crds0[1])*crds0[1]*integral_eta_xi(0,1))+

        (long double)2.0*(crds1[0]-crds0[0])*(crds2[0]-crds0[0])*(
        (long double)sqr(crds1[1]-crds0[1])*integral_eta_xi(3,1)+
        (long double)sqr(crds2[1]-crds0[1])*integral_eta_xi(1,3)+
        (long double)sqr(crds0[1])*integral_eta_xi(1,1)+
   (long double)2.0*(crds1[1]-crds0[1])*(crds2[1]-crds0[1])*integral_eta_xi(2,2)+
   (long double)2.0*(crds1[1]-crds0[1])*crds0[1]*integral_eta_xi(2,1)+
   (long double)2.0*(crds2[1]-crds0[1])*crds0[1]*integral_eta_xi(1,2))+

        (long double)2.0*(crds1[0]-crds0[0])*crds0[0]*(
        (long double)sqr(crds1[1]-crds0[1])*integral_eta_xi(3,0)+
        (long double)sqr(crds2[1]-crds0[1])*integral_eta_xi(1,2)+
        (long double)sqr(crds0[1])*integral_eta_xi(1,0)+
        (long double)2.0*(crds1[1]-crds0[1])*(crds2[1]-crds0[1])*integral_eta_xi(2,1)+
        (long double)2.0*(crds1[1]-crds0[1])*crds0[1]*integral_eta_xi(2,0)+
        (long double)2.0*(crds2[1]-crds0[1])*crds0[1]*integral_eta_xi(1,1))+

        (long double)2.0*(crds2[0]-crds0[0])*crds0[0]*(
        (long double)sqr(crds1[1]-crds0[1])*integral_eta_xi(2,1)+
        (long double)sqr(crds2[1]-crds0[1])*integral_eta_xi(0,3)+
        (long double)sqr(crds0[1])*integral_eta_xi(0,1)+
        (long double)2.0*(crds1[1]-crds0[1])*(crds2[1]-crds0[1])*integral_eta_xi(1,2)+ 
        (long double)2.0*(crds1[1]-crds0[1])*crds0[1]*integral_eta_xi(1,1)+
        (long double)2.0*(crds2[1]-crds0[1])*crds0[1]*integral_eta_xi(0,2))
         );
  
      // printf("print x2y2 result long db = %23.21Lg\n", ans); 
      // printf("print x2y2 result db      = %23.21g\n", (double)ans); 
      return ans;
}

EXPORT long double int_xy3(
      float *crds0,
      float *crds1,
      float *crds2,
      long double det)
{
      long double ans;
      ans = det*(
           (long double)((long double)cub(crds1[1]-crds0[1])*(crds1[0]-crds0[0]))*integral_eta_xi(4,0) +
           (long double)((long double)cub(crds1[1]-crds0[1])*(crds2[0]-crds0[0]))*integral_eta_xi(3,1) +
           (long double)((long double)cub(crds1[1]-crds0[1])*crds0[0])*integral_eta_xi(3,0) +

           (long double)((long double)cub(crds2[1]-crds0[1])*(crds1[0]-crds0[0]))*integral_eta_xi(1,3) +
           (long double)((long double)cub(crds2[1]-crds0[1])*(crds2[0]-crds0[0]))*integral_eta_xi(0,4) +
           (long double)((long double)cub(crds2[1]-crds0[1])*crds0[0])*integral_eta_xi(0,3) +

           (long double)((long double)cub(crds0[1])*(crds1[0]-crds0[0]))*integral_eta_xi(1,0) +
           (long double)((long double)cub(crds0[1])*(crds2[0]-crds0[0]))*integral_eta_xi(0,1) +
           (long double)((long double)cub(crds0[1])*crds0[0]*0.5) +

           (long double)(3.0*(long double)sqr(crds1[1]-crds0[1])*(crds2[1]-crds0[1])*(crds1[0]-crds0[0]))*integral_eta_xi(3,1) +
           (long double)(3.0*(long double)sqr(crds1[1]-crds0[1])*(crds2[1]-crds0[1])*(crds2[0]-crds0[0]))*integral_eta_xi(2,2) +
           (long double)(3.0*(long double)sqr(crds1[1]-crds0[1])*(crds2[1]-crds0[1])*crds0[0])*integral_eta_xi(2,1) +

           (long double)(3.0*(long double)sqr(crds1[1]-crds0[1])*crds0[1]*(crds1[0]-crds0[0]))*integral_eta_xi(3,0) +
           (long double)(3.0*(long double)sqr(crds1[1]-crds0[1])*crds0[1]*(crds2[0]-crds0[0]))*integral_eta_xi(2,1) +
           (long double)(3.0*(long double)sqr(crds1[1]-crds0[1])*crds0[1]*crds0[0])*integral_eta_xi(2,0) +

           (long double)(3.0*(crds1[1]-crds0[1])*(long double)sqr(crds2[1]-crds0[1])*(crds1[0]-crds0[0]))*integral_eta_xi(2,2) +
           (long double)(3.0*(crds1[1]-crds0[1])*(long double)sqr(crds2[1]-crds0[1])*(crds2[0]-crds0[0]))*integral_eta_xi(1,3) +
           (long double)(3.0*(crds1[1]-crds0[1])*(long double)sqr(crds2[1]-crds0[1])*crds0[0])*integral_eta_xi(1,2)+

           (long double)(3.0*(long double)sqr(crds2[1]-crds0[1])*crds0[1]*(crds1[0]-crds0[0]))*integral_eta_xi(1,2) +
           (long double)(3.0*(long double)sqr(crds2[1]-crds0[1])*crds0[1]*(crds2[0]-crds0[0]))*integral_eta_xi(0,3) +
           (long double)(3.0*(long double)sqr(crds2[1]-crds0[1])*crds0[1]*crds0[0])*integral_eta_xi(0,2) +

           (long double)(3.0*(crds1[1]-crds0[1])*(long double)sqr(crds0[1])*(crds1[0]-crds0[0]))*integral_eta_xi(2,0) +
           (long double)(3.0*(crds1[1]-crds0[1])*(long double)sqr(crds0[1])*(crds2[0]-crds0[0]))*integral_eta_xi(1,1) +
           (long double)(3.0*(crds1[1]-crds0[1])*(long double)sqr(crds0[1])*crds0[0])*integral_eta_xi(1,0) +

           (long double)(3.0*(crds2[1]-crds0[1])*(long double)sqr(crds0[1])*(crds1[0]-crds0[0]))*integral_eta_xi(1,1) +
           (long double)(3.0*(crds2[1]-crds0[1])*(long double)sqr(crds0[1])*(crds2[0]-crds0[0]))*integral_eta_xi(0,2) +
           (long double)(3.0*(crds2[1]-crds0[1])*(long double)sqr(crds0[1])*crds0[0])*integral_eta_xi(0,1) +

      (long double)(6.0*(crds1[1]-crds0[1])*(crds2[1]-crds0[1])*crds0[1]*(crds1[0]-crds0[0]))*integral_eta_xi(2,1) +
      (long double)(6.0*(crds1[1]-crds0[1])*(crds2[1]-crds0[1])*crds0[1]*(crds2[0]-crds0[0]))*integral_eta_xi(1,2) +
      (long double)(6.0*(crds1[1]-crds0[1])*(crds2[1]-crds0[1])*crds0[1]*crds0[0])*integral_eta_xi(1,1)
                );
      return ans;
}

LOCAL long double integral_eta_xi(
        int n_xi,
        int n_eta)
{
        int   l;
        int   sign;
        long double ans = 0.0;
        for(l = 0; l <= n_eta+1; l++)
        {
            sign = (l%2 == 0? 1:-1); 
            ans += (long double)1.0*factorial(n_eta+1)/(factorial(l)*factorial(n_eta+1-l))*sign/(n_xi+l+1);     
        }    
        return ans/(n_eta+1);
}

LOCAL int factorial(int n)
{
    if(n == 1 || n == 0)
        return 1;
    else 
        return (factorial(n-1)*n);
}

/* inverse of a square matrix */
EXPORT void  matrix_inv(
	double     **mat,
        int        size,
        double     **inv)
{
        int       i, j;
        static int size_ = 0;
        static int *IPIV = NULL;
        int       info;
                  /*IPIV: (output) INTEGER array, dimension (min(M,N))
                     The pivot indices; for 1 <= i <= min(M,N), row i of the
                     matrix was interchanged with row IPIV(i).
                   info: = 0 --- successful exit
                         < 0:  if INFO = -i, the i-th argument had an illegal value
                         > 0:  if INFO = i, U(i,i) is exactly zero. The factorization
                          has been completed, but the factor U is exactly
                          singular, and division by zero will occur if it is used
                          to solve a system of equations.
                   */
        static float     *A, *work;

        if(size_ != size)
        {
            size_ = size;  
            if(IPIV != NULL)
                free_these(3,A, work, IPIV);

            vector(&A,size*size,sizeof(float));
            vector(&work,size*size,sizeof(float));
            vector(&IPIV, size,sizeof(int));
        }
        
        for(j = 0; j < size; j++)
        {
            for(i = 0; i < size; i++)
            {
                A[j*size+i] = mat[i][j];
            }
        }
         
        FORTRAN_NAME(dgetrf)(&size_, &size_, A, &size_, IPIV, &info);
        if(info != 0)
        {
            printf("ERROR: matrix_inv, dgetrf failed, info = %d\n", info);
            clean_up(ERROR);
        }
        FORTRAN_NAME(dgetri)(&size_, A, &size_, IPIV, work, &size_, &info);
        if(info != 0)
        {
            printf("ERROR: matrix_inv, dgetri failed, info = %d\n", info);
            clean_up(ERROR);
        }

        for(j = 0; j < size; j++)
        {
            for(i = 0; i < size; i++)
            {
                inv[i][j] = A[j*size+i];
            }
        }

        // free_these(3,A, work, IPIV);
}

/*
The matrix index starts from 1 to N
*/
EXPORT void  inverse_matrix(
        double      **mat,
        int        size,
        double      **inv)
{

        int        i, j, ii, jj;
        static int size_ = 0, *indx;
        static long double *col = NULL, **a, *colcp, *r;
        long double d, sdp;
        long double **iden;

        return inverse_matrix_gj(mat, size, inv);

        // TMP
        // inv[0][0] = 1.0/mat[0][0];
        // return;

        if(size != size_)
        {
            if(col != NULL)
            {
                free(col);
                free(a);
                free(indx);
                free(colcp);
                free(r);
            }
            vector(&col, (size+1), sizeof(long double));
            vector(&colcp, (size+1), sizeof(long double));
            vector(&r, (size+1), sizeof(long double));
            vector(&indx, (size+1), sizeof(int));
            matrix(&a, (size+1), (size+1), sizeof(long double));
            size_ = size;
        }
        for(j = 0; j < size; j++)
        {
            for(i = 0; i < size; i++)
                a[i+1][j+1] = mat[i][j];
        }

        ludcmp(a, size, indx, &d);

        // TMP
        /**
        printf("print LU decomposition:\n");
        for (j = 0; j < size; j++)
        {
            for(i=0; i<size; i++)
                printf(" %22.20g", a[j][i]);
           printf("\n");
        }
        **/

        for(j = 1; j <= size; j++)
        {
            for(i = 1; i <= size; i++)
            {
                colcp[i] = col[i] = 0.0;
            }
            colcp[j] = col[j] = 1.0;

            lubksb(a,size,indx,col);

            // for(i = 1; i <= size; i++)
            //     inv[i-1][j-1] = (col[i]); 

            // improve the soln
            for(ii = 1; ii <= size; ii++)
            {
                sdp = -colcp[ii];
                for(jj = 1; jj <= size; jj++)
                    sdp += mat[ii-1][jj-1]*col[jj]; 
                r[ii] = sdp;
            }
            lubksb(a,size,indx,r);
            for(i = 1; i <= size; i++)
                inv[i-1][j-1] = (col[i] - r[i]); 
            // END improve the soln
        }

        /* test ident
        matrix(&iden, MAX_N_COEF, MAX_N_COEF, sizeof(long double));
        matrix_matrix_mult(inv, mat, MAX_N_COEF, MAX_N_COEF, iden);

        print_ldb_matrix("mass_matrix:",MAX_N_COEF, MAX_N_COEF, mat," %18.17Le");
        print_ldb_matrix("Inverse by inverse_matrix:", MAX_N_COEF, MAX_N_COEF, inv," %22.20Lg");
        print_ldb_matrix("ident:", MAX_N_COEF, MAX_N_COEF, iden," %22.20Lg");
        clean_up(0);
        **/
}

LOCAL void lubksb(
	long double **a,
        int         n,
        int         *indx,
        long double *b)
{
        int          i, ii = 0, ip, j;
        long double  sum; 
        
        for(i = 1; i <= n; i++)
        {
            ip = indx[i];
            sum = b[ip];
            b[ip] = b[i];
            if(ii)
            {
                for(j = ii; j <= i-1; j++)
                    sum -= a[i][j]*b[j]; 
            }
            else if (sum != 0.0) 
                ii = i;
            // for(j = 0; j <= i-1; j++)
            //     sum -= a[i][j]*b[j];
            b[i] = sum;
        }
        for(i = n; i >= 1; i--)
        {
            sum = b[i];
            for(j = i+1; j <= n; j++)
                sum -= a[i][j]*b[j];

            /**
            if(i == n-1)
                printf("print sum %g, a[i][i] = %g\n", sum, a[i][i]);
            **/

            b[i] = sum/a[i][i]; 
            
        }
} 

LOCAL void ludcmp(
	long double **a,
        int         n,
        int         *indx,
        long double *d)
{
        int i, imax, j, k;
        long double big, dum, sum, temp;
        static long double *vv = NULL; 
        static int       size_ = 0;

        if(n != size_)
        {
            if(vv != NULL)
            {
                free(vv);
            }
            vector(&vv, (n+1), sizeof(long double));
            size_ = n;
        }

        *d = 1.0;
        for(i = 1; i <= n; i++)
        {
            big = 0.0;
            for(j = 1; j <= n; j++)
            {
                if( (temp = fabsl(a[i][j])) > big) 
                    big = temp;   
            }
            if(big == 0.0)
            {
                printf("ERROR: Singular matrix in ludcmp\n");
                clean_up(ERROR);
            }
            vv[i] = 1.0/big; 
        }

        for(j = 1; j <= n; j++)
        {
            for(i = 1; i < j; i++)
            {
                sum = a[i][j];
                for(k = 1; k < i; k++)
                    sum -= a[i][k]*a[k][j];
                a[i][j] = sum;
            } 
            big = 0.0;
            for(i = j; i <= n; i++)
            {
                sum = a[i][j];
                for(k = 1; k < j; k++)
                    sum -= a[i][k]*a[k][j];
                a[i][j] = sum;
                if( (dum = vv[i]*fabsl(sum)) >= big )
                {
                    big = dum;
                    imax = i;
                }
            }
            if(j != imax)
            {
                for(k = 1; k <= n; k++)
                {
                    dum = a[imax][k];
                    a[imax][k] = a[j][k];
                    a[j][k] = dum;
                }
                *d = -(*d);
                vv[imax] = vv[j];
            }
            indx[j] = imax;
            if(a[j][j] == 0.0) a[j][j] = 0.0;
            if(j != n)
            {
                dum = 1.0/a[j][j];
                for(i = j+1; i <= n; i++)
                    a[i][j] *= dum;
            }  
        }
}

EXPORT  void    print_ldb_matrix(
        const char      *title,
        int             rows,
        int             cols,
        double     **matrix,
        const char      *format)
{
        int             row,col;

        (void) printf("\n\n");
        if (title != NULL) (void) printf("%s\n",title);
        // for (row = rows-1; row >= 0; row--)
        for (row = 0; row < rows; row++)
        {
            for(col=0; col<cols; col++)
                (void) printf(format,matrix[row][col]);
            (void) printf("\n");
        }
        (void) printf("\n\n");
}               

/* comput Ax */
EXPORT void matrix_vec_mult(
        double    **mat,
        double    *vec,
        int      row,
        int      col,
        double    *ans)
{
        int      i, j;

        for(i = 0; i < row; i++)
        {
            ans[i] = 0.0;
            for(j = 0; j < col; j++)
            {
                ans[i] += mat[i][j]*vec[j];
            }
        }
}

EXPORT void matrix_matrix_mult(
        double    **mat,
        double    **matr,
        int      row,
        int      col,
        double    **ans)
{
        int      i, j, k;

        for(i = 0; i < row; i++)
        {
            for(j = 0; j < col; j++)
            {
                ans[i][j] = 0.0;
                for(k = 0; k < col; k++)
                    ans[i][j] += mat[i][k]*matr[k][j];
            }
        }
}

EXPORT void trans_matrix(
            double **mat,
            int  row,
            int  col,
            double **tra)
{
            int i,   j;
            for(i = 0; i < row; i++)
            {
                for(j = 0; j < col; j++)
                {
                    tra[j][i] = mat[i][j];
                }
            }
}

EXPORT void d_trans_matrix(
            double **mat,
            int  row,
            int  col,
            double **tra)
{
            int i,   j;
            for(i = 0; i < row; i++)
            {
                for(j = 0; j < col; j++)
                {
                    tra[j][i] = mat[i][j];
                }
            }
}


EXPORT float vh_val(
        float *crds,
        double *cent,
        int   indx)
{
        float ans;
        switch(indx)
        {
        case 0:
            ans = 1.0;
        break;
        case 1:
            ans = crds[0]-cent[0];
        break;
        case 2:
            ans = crds[1]-cent[1];
        break;
        case 3:
            ans = sqr(crds[0]-cent[0]);
        break;
        case 4:
            ans = (crds[0]-cent[0])*(crds[1]-cent[1]);
        break;
        case 5:
            ans = sqr(crds[1]-cent[1]);
        break;
        case 6:
            ans = cub(crds[0]-cent[0]);
        break;
        case 7:
            ans = sqr(crds[0]-cent[0])*(crds[1]-cent[1]); 
        break;
        case 8:
            ans = (crds[0]-cent[0])*sqr(crds[1]-cent[1]);
        break;
        case 9:
            ans = cub(crds[1]-cent[1]);
        break;
        default:
            printf("ERROR vh_val, implement 2D degree %d\n", indx);
            clean_up(ERROR);
        }
        return ans;
}

EXPORT TRI *find_corres_tri(
        float     *cn,
        INTERFACE *mesh)
{
        TRI      *tri;
        SURFACE  **surf = mesh->surfaces;
        float    *h = computational_grid(mesh)->h;
        double    *cent;

        for(; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf);
                 !at_end_of_tri_list(tri,*surf); tri = tri->next)
            {
                cent = fg_centroid(tri);
                if(fabs(cent[0]-cn[0]) < min(h[0],h[1])*0.0001 &&
                   fabs(cent[1]-cn[1]) < min(h[0],h[1])*0.0001)
                {
                    return tri;
                }
            }
        }

        /**
        for(surf = mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf);
                 !at_end_of_tri_list(tri,*surf); tri = tri->next)
            {
                cent = fg_centroid(tri);
                if(fabs(cent[0]-cn[0]) < 0.000001 &&
                   fabs(cent[1]-cn[1]) < 0.000001)
                {
                    printf("WARNING: tri center (%22.20g, %22.20g). h[%g, %g]\n",
                        cent[0], cent[1], h[0], h[1]);
                }
            }
        }
        **/

        return NULL;
}

// gauss-Jordan elimination with pivoting
LOCAL void gaussj(
	long double **a,
        int         n,
        long double **b,
        int         m)
{
	static int size_ = 0,  *indxc = NULL, *indxr, *ipiv;
	int   i, icol, irow, j, k , l, ll;
        long double big, dum, pivinv, temp;

        if(n != size_)
        {
            if(indxc != NULL)
            {
                free(indxc); free(indxr); free(ipiv);
            }
            vector(&indxc, (n+1),  sizeof(int));
            vector(&indxr, (n+1),  sizeof(int));
            vector(&ipiv, (n+1),  sizeof(int));
            size_ = n;
        }

        for(i = 1; i <= n; i++) ipiv[i] = 0;
        for(i = 1; i <= n; i++)
        {
            big = 0.0;
            for(j = 1; j <= n; j++)
            {
                if(ipiv[j] != 1)
                {
                    for(k =1; k<=n; k++)
                    {
                        if(ipiv[k] == 0)
                        {
                            if(fabsl(a[j][k]) >= big)
                            {
                                big = fabsl(a[j][k]);
                                irow = j;
                                icol = k;
                            }  
                        }
                    } 
                }
            }
            ++(ipiv[icol]);

            if(irow != icol)
            {
                for(l = 1; l <= n; l++) SWAP(a[irow][l], a[icol][l]);
                for(l = 1; l <= m; l++) SWAP(b[irow][l], b[icol][l]);
            } 
            indxr[i] = irow;
            indxc[i] = icol;
            if(a[icol][icol] == 0.0)
            {
                printf("ERROR() gaussj, main ele = 0.0\n");
                clean_up(ERROR);
            }
            pivinv = 1.0/a[icol][icol];
            a[icol][icol] = 1.0;
            for(l = 1; l <= n; l++) a[icol][l] *= pivinv;
            for(l = 1; l <= m; l++) b[icol][l] *= pivinv;
            for(ll= 1; ll <= n; ll++)
            {
                if(ll != icol)
                {
                    dum = a[ll][icol];
                    a[ll][icol] = 0.0;
                    for(l = 1; l <= n; l++) a[ll][l] -= a[icol][l]*dum;
                    for(l = 1; l <= m; l++) b[ll][l] -= b[icol][l]*dum;
                }
            }
        }

        for(l =n; l >=1; l--)
        {
            if(indxr[l] != indxc[l])
            {
                for(k =1; k <= n; k++)
                    SWAP(a[k][indxr[l]], a[k][indxc[l]]);
            }
        } 
}

// inverse by Gauss-Jordan
LOCAL void  inverse_matrix_gj(
        double      **mat,
        int        size,
        double      **inv)
{
	static int size_ = 0;
        static long double **b = NULL, **a;
        int i, j;
        long double **tmpm, **iden;

        if(size != size_)
        {
            if(b != NULL)
            {
                free(a); free(b);
            }
            size_ = size;
            matrix(&a, (size+1), (size+1), sizeof(long double));
            matrix(&b, (size+1), (size+1), sizeof(long double));
        }
        for(j = 0; j < size; j++)
        {
            for(i = 0; i < size; i++)
                a[i+1][j+1] = mat[i][j];
        }

        for(i = 1; i <= size; i++)
        {
            b[i][1] = 0.0;
            b[i][2] = 0.0;
        }
        b[1][1] = 1.0;
        b[2][2] = 1.0;

        gaussj(a,size,b,2);
 
        for(j = 0; j < size; j++)
        {
            for(i = 0; i < size; i++)
                inv[i][j] = a[i+1][j+1];
        }

        /** test ident
        matrix(&iden, MAX_N_COEF, MAX_N_COEF, sizeof(long double));
        matrix_matrix_mult(inv, mat, MAX_N_COEF, MAX_N_COEF, iden);

        print_ldb_matrix("mass_matrix:",MAX_N_COEF, MAX_N_COEF, mat," %18.17Le");
        print_ldb_matrix("Inverse by inverse_matrix_gj:", MAX_N_COEF, MAX_N_COEF, inv," %22.20Lg");
        print_ldb_matrix("ident:", MAX_N_COEF, MAX_N_COEF, iden," %22.20Lg");
        clean_up(0);
        **/
}

EXPORT void tri_quadrature_13_pts(
	float       *pcrds0,
        float       *pcrds1,
        float       *pcrds2,
        float       crds[][2])
{
        float a = 0.065130102902216, b = 0.869739794195568;
        float c = 0.312865496004875, d = 0.638444188569809;
        float e = 0.048690315425316, f = 0.260345966079038;
        float g = 0.479308067841923, third;
        // float w1 =-0.149570044467670, w2 = 0.053347235608839,
        //       w3 = 0.175615257433204,  w4 = 0.077113760890257;
        third = 1.0/3.0;

        crds[0][0] = (pcrds0[0] + pcrds1[0] + pcrds2[0]) * third;
        crds[0][1] = (pcrds0[1] + pcrds1[1] + pcrds2[1]) * third;

        crds[1][0] = b*pcrds0[0] + a*(pcrds1[0] + pcrds2[0]);
        crds[1][1] = b*pcrds0[1] + a*(pcrds1[1] + pcrds2[1]);
        crds[2][0] = b*pcrds1[0] + a*(pcrds0[0] + pcrds2[0]);
        crds[2][1] = b*pcrds1[1] + a*(pcrds0[1] + pcrds2[1]);
        crds[3][0] = b*pcrds2[0] + a*(pcrds0[0] + pcrds1[0]);
        crds[3][1] = b*pcrds2[1] + a*(pcrds0[1] + pcrds1[1]);

        crds[4][0] = g*pcrds0[0] + f*(pcrds1[0] + pcrds2[0]);
        crds[4][1] = g*pcrds0[1] + f*(pcrds1[1] + pcrds2[1]);
        crds[5][0] = g*pcrds1[0] + f*(pcrds0[0] + pcrds2[0]);
        crds[5][1] = g*pcrds1[1] + f*(pcrds0[1] + pcrds2[1]);
        crds[6][0] = g*pcrds2[0] + f*(pcrds1[0] + pcrds0[0]);
        crds[6][1] = g*pcrds2[1] + f*(pcrds1[1] + pcrds0[1]);

        crds[7][0] = c*pcrds0[0] + d*pcrds1[0] + e*pcrds2[0];
        crds[7][1] = c*pcrds0[1] + d*pcrds1[1] + e*pcrds2[1];

        crds[8][0] = d*pcrds0[0] + c*pcrds1[0] + e*pcrds2[0];
        crds[8][1] = d*pcrds0[1] + c*pcrds1[1] + e*pcrds2[1];

        crds[9][0] = d*pcrds0[0] + e*pcrds1[0] + c*pcrds2[0];
        crds[9][1] = d*pcrds0[1] + e*pcrds1[1] + c*pcrds2[1];

        crds[10][0] = e*pcrds0[0] + d*pcrds1[0] + c*pcrds2[0];
        crds[10][1] = e*pcrds0[1] + d*pcrds1[1] + c*pcrds2[1];

        crds[11][0] = e*pcrds0[0] + c*pcrds1[0] + d*pcrds2[0];
        crds[11][1] = e*pcrds0[1] + c*pcrds1[1] + d*pcrds2[1];

        crds[12][0] = c*pcrds0[0] + e*pcrds1[0] + d*pcrds2[0];
        crds[12][1] = c*pcrds0[1] + e*pcrds1[1] + d*pcrds2[1];

        // cavef13 = w1* ff(x1,y1,m) +
        //           w2*(ff(x2,y2,m)+ff(x3,y3,m)+ff(x4,y4,m))  +
        //           w3*(ff(x5,y5,m)+ff(x6,y6,m)+ff(x7,y7,m))  +
        //           w4*(ff(x8,y8,m)+ff(x9,y9,m)+ff(x10,y10,m)+
        //              ff(x11,y11,m)+ff(x12,y12,m)+ff(x13,y13,m));
}

// Order, vertices &  edge centers & centriod
EXPORT void tri_quadrature_7_pts(
        float       *pcrds0,
        float       *pcrds1,
        float       *pcrds2,
        double *cent,
        float       crds[][2])
{
        int i;
        for(i = 0; i < 2; i++)
        {
            crds[0][i] = (pcrds0[i]);
            crds[1][i] = (pcrds1[i]);
            crds[2][i] = (pcrds2[i]);
        }
        for(i = 0; i < 2; i++)
        {
            crds[3][i] = 0.5*(pcrds0[i] + pcrds1[i]);
            crds[4][i] = 0.5*(pcrds1[i] + pcrds2[i]);
            crds[5][i] = 0.5*(pcrds2[i] + pcrds0[i]);
        }
        for(i = 0; i < 2; i++)
            crds[6][i] =cent[i];
}

EXPORT void print_tri_crds(
        TRI *tri)
{
        printf("Triangle(%d) ceontriod (%g, %g)\n", tri->id,
                fg_centroid(tri)[0], fg_centroid(tri)[1]);
        print_general_vector("Tri_pt", Coords(Point_of_tri(tri)[0]), 2, "\n");
        print_general_vector("Tri_pt", Coords(Point_of_tri(tri)[1]), 2, "\n");
        print_general_vector("Tri_pt", Coords(Point_of_tri(tri)[2]), 2, ";\n\n");
}

EXPORT int tri_on_phy_bdry(
        TRI      *tri)
{
        if((tri)->BC_type == NEUMANN ||
           (tri)->BC_type == IN_FLOW ||
           (tri)->BC_type == OUT_FLOW ||
           (tri)->BC_type == CONST_P)
        {
            return YES;
        }

        return NO;
}

LOCAL long double B_val(
        float crds[][2],
        double   *cent,
        int   pos,
        int   indx)
{
        long double tmpx, tmpy;

	switch(indx)
        {
        case 0:
            return 1.0;
        break;
        case 1:
            tmpx = crds[pos][0] - cent[0];
            return tmpx;
        break;
        case 2:
            tmpy = crds[pos][1] - cent[1];
            return tmpy;
        break;
        case 3:
            tmpx = crds[pos][0] - cent[0];
            return sqr(tmpx);
        break;
        case 4:
            tmpx = crds[pos][0] - cent[0];
            tmpy = crds[pos][1] - cent[1];
            return tmpx*tmpy;
        break;
        case 5:
            tmpy = crds[pos][1] - cent[1];
            return sqr(tmpy);
        break;
        case 6:
            tmpx = crds[pos][0] - cent[0];
            return cub(tmpx);
        break;
        case 7:
            tmpx = crds[pos][0] - cent[0];
            tmpy = crds[pos][1] - cent[1];
            return (sqr(tmpx)*tmpy);
        break;
        case 8:
            tmpx = crds[pos][0] - cent[0];
            tmpy = crds[pos][1] - cent[1];
            return (tmpx*sqr(tmpy));
        break;
        case 9:
            tmpy = crds[pos][1] - cent[1];
            return cub(tmpy);
        break;
        case 10:
             tmpx = crds[pos][0] - cent[0];
             return sqr(tmpx)*sqr(tmpx);
        break; 
        case 11:
             tmpx = crds[pos][0] - cent[0];
             tmpy = crds[pos][1] - cent[1];
             return cub(tmpx)*tmpy;
        break;
        case 12:
             tmpx = crds[pos][0] - cent[0];
             tmpy = crds[pos][1] - cent[1];
             return sqr(tmpx)*sqr(tmpy);
        break;
        case 13:
             tmpx = crds[pos][0] - cent[0];
             tmpy = crds[pos][1] - cent[1];
             return (tmpx)*cub(tmpy);
        break;
        case 14:
             tmpy = crds[pos][1] - cent[1];
             return sqr(tmpy)*sqr(tmpy);
        break;
        }

        printf("ERROR: B_val\n");
        clean_up(ERROR);
}

EXPORT bool tri_out_rect(
        TRI             *tri,
        float           *L,
        float           *U)
{
        float           *cent;

        cent = fg_centroid(tri);
        if(cent[0] < L[0] || cent[0] > U[0])
            return YES;
        if(cent[1] < L[1] || cent[1] > U[1])
            return YES;
        return NO;
}       

#endif /* #if defined(TWOD) */






