/*
*                               gDG.c
*
*       Copyright 1999 by The University at Stony Brook, All rights reserved.
*/

#include <ghyp/ghyp.h>
#include <gdecs/vecdecs.h>

#define state_id(i)     (STATE_ID + (i+1))
LOCAL size_t BLOCK_SIZE = 0; /*TOLERANCE - TODO: what is a good value*/

// #define NEW_LIMIT

#if defined(TWOD)

// LOCAL void      matrix_vec_mult(double**,float*,int,int,float*);

// LOCAL void      matrix_inv(double**,int,double**);
// LOCAL void      comp_mass_matrix(int,TRI*,int,double**);
// LOCAL void      comp_mass_matrix_1st_row(int,TRI*,int,double**);

LOCAL void      grad_vh(float*,double*,int,float*);
// LOCAL float     vh_val(float*,double*,int);
// LOCAL void      con_u_at_pt(Locstate,float*,double*,float*);
LOCAL void      flux_at_pt(float*,Locstate,float*,double*,float*,float*);
LOCAL float     inter_integr(TRI*,Locstate,int,float*,int);
LOCAL float     inter_integr_13_quad(TRI*,Locstate,int,float*,int);
LOCAL float     inter_integr_center(TRI*,Locstate,int,float*,int);
LOCAL void      edge_integr(TRI*,TRI*,Locstate,Locstate,float,int,int,float*,Front*,float,Mid_soln*,int);
LOCAL void      adv_fw(TRI*,TRI*,float,float,Mid_soln*,int,Front*);
LOCAL int       bdry_tri_adv_fw(TRI*,TRI*,Front*);
LOCAL int       tri_cent_outside(TRI*,float*,float*,int,BDRY_SIDE*);
// LOCAL void      update_buffer(Front*,Mid_soln*,int);
// LOCAL void      update_buffer_x_per_y_ref(Front*,Mid_soln*,int);
// LOCAL void      update_buffer_x_ref(Front*,Mid_soln*,int);

LOCAL void      limiting_soln_with_buffer_tris(Front*,Mid_soln*,int,int,int);
LOCAL void      local_limiting_soln_with_buffer_tris(Front*,Mid_soln*,int);
LOCAL void      redo_limiting_3rd_degreeP3(TRI*,TRI *nbtri[3],Mid_soln*,int);
LOCAL void      limiting_3rd_degreeP3(TRI*,TRI *nbtri[3],Mid_soln*,int,int);
LOCAL void      limiting_2nd_degreeP3(TRI*,TRI *nbtri[3],Mid_soln*,int);
LOCAL void      limiting_2nd_degree(TRI*,TRI *nbtri[3],Mid_soln*,int);
LOCAL void      limiting_1st_degreeP3(TRI*,TRI *nbtri[3],Mid_soln*,int,int);
LOCAL void      limiting_1st_degree(TRI*,TRI *nbtri[3],Mid_soln*,int);

LOCAL float     maxmod(float, float);
LOCAL float     minmod2(float, float);
LOCAL float     TVB_minmod(float,float,float);
LOCAL void      weight_mod(float*,int,float*);
LOCAL int       WENO_mod(float*,float*,float*,int,float,float*);
LOCAL int       WENO_mod_on_3rd(float*,float*,float*,int,float,float*);
LOCAL int       WENO_mod_on_2nd(float*,float*,float*,int,float,float*);
LOCAL int       WENO_mod_1(float*,float*,float*,int,float*);

// LOCAL void      update_coef(TRI*,Mid_soln*,int,size_t);
LOCAL int       find_tri(double*);
LOCAL int       find_tri_from_ver(float*,float*,float*);
// LOCAL void      print_matrix(float**,int,int);
// LOCAL void      matrix_matrix_mult(double**,double**,int,int,double**);
LOCAL void      print_state_coef(Locstate);
LOCAL void      print_mass_matrix_1st_row(double**);
LOCAL float     compute_alpha_of_LF_flux(Locstate,Locstate,float*);
// LOCAL void      inverse_matrix(double**,int,double**);
// LOCAL void      print_ldb_matrix(const char*,int,int,double**,const char*);
LOCAL void      g_exact_R_flux(float*,Locstate,Locstate,float*);

LOCAL void      DG_boundary_state(Locstate,TRI*,float*,Front*,int,float*,float,Mid_soln*,int);
LOCAL void      impose_gas_sine_states(Locstate,TRI*,float*,Front*,int,float*,float,Mid_soln*,int,float);
LOCAL void      DG_NEU_boundary_state(Locstate,TRI*,float*,Front*,int,float*,float,Mid_soln*,int);
LOCAL void      DG_CONST_P_boundary_state(Locstate,TRI*,float*,Front*,int,float*,float,Mid_soln*,int);
LOCAL void      flux_from_st(Locstate,float*,float*);
LOCAL int       solution_from_tri(Locstate,Front*,float*);
LOCAL void      print_bottom_tris_state(Front*);
// LOCAL void      zero_moments(Locstate);
LOCAL void      sample_soln_along_line(const char*,int,Front*);
LOCAL void      show_boundary_tri(const char*,int,Front*);
LOCAL void      attach_buffer(Front*,Mid_soln*,int,TRI*,TRI**,int*);
LOCAL void      attach_buffer_states(Front*,Mid_soln*,int,TRI*);
LOCAL void      shock_vort_attach_buffer_states(Front*,Mid_soln*,int,TRI*);
LOCAL void      db_Mach_attach_buffer_states(Front*,Mid_soln*,int,TRI*);

LOCAL float     compute_glb_alpha_of_LF(Locstate);
LOCAL int       overshoot_state(TRI*,Mid_soln*,int);
LOCAL int       overshoot_state_Shu_V(TRI*,Mid_soln*,int);

LOCAL void      build_1st_order_poly(TRI*,TRI *nbtri[3],Mid_soln*,
                 float*,float nbuave[3][4],int);
LOCAL void      compute_L1(Front*);
LOCAL void      gas_sine_L1(Front*);
LOCAL float     vortex_evo_den_soln(float*,float);
LOCAL void      vortex_evo_init(Locstate,TRI*);
LOCAL float     FM_vort_soln(float*,Front*);
LOCAL void      HLL_flux(float*,float*,float*,Locstate,Locstate,float*,int);
LOCAL void      Burgers_flux(float*,float*,float*,Locstate,Locstate,float*,int);
LOCAL void      con_u_to_state(float*,int,Locstate);
LOCAL void      inverse_2_2_matrix(double**,int,double**);
LOCAL void      d_inverse_2_2_matrix(double**,int,double**);
LOCAL void      LR_matrix_in_dir(float*,Locstate,float**,float**);
LOCAL void      d_matrix_vec_mult(float**,float*,int,int,float*);
LOCAL void      ld_matrix_vec_mult(double**,float*,int,int,float*);
LOCAL void      d_matrix_matrix_mult(float**,float**,int,int,float**);
LOCAL float     gas_sine_den_soln(float*,float);
LOCAL void      compute_tri_geom(float*,double*,double*,double*,float*);
LOCAL int       solution_from_loc_tris(Locstate,TRI**,int,size_t,float*);
LOCAL int       install_local_tris(TRI*,TRI**);
LOCAL int       fsame_sign(float*,int);
LOCAL int       Shu_V_smooth_dect(TRI*,Mid_soln*,int,float*,float nbuave[ ][4],int*);
LOCAL void      extrema_detec(float*,float nbuave[ ][4],int*);


// LOCAL double **Lmass_matrix = NULL, **mass_inv;
LOCAL double    dmach = 0.0;
LOCAL int       debug_flag = NO; 
// LOCAL size_t    sizest = 0;
// LOCAL float     glb_alpha = -HUGE_VAL;
LOCAL double **mass_1st_row = NULL, **mass_1st_rows[3];
LOCAL float       newdt; 
LOCAL TRI         *time_on_tri;

// The following two set variables specify edge quadrature points
LOCAL int       Gauss_N = 3; // N = 4 for P3; N = 3 for P2
LOCAL double    q[9], qw[9];

// for db_Mach reflection, Mach 10 shock states
LOCAL float     post_sk_st[4] = {8, 8.25, 0.0, 116.5};  
                // in the y-direction. the velocity should be
                // v[0] = 8.25*cos(radians(60.0)), v[1] = 8.25*sin(radians(60.0))
LOCAL float     pre_sk_st[4] = {1.4, 0.0, 0.0, 1.0};
LOCAL float     sk_y_pos; // The y coords of the Shock at x = 0.0.
LOCAL float     burg_init(float);
LOCAL float     burg_sol(float*,float,float);
LOCAL double    deriv_burg_init(double);
LOCAL double    fprime(double);
LOCAL double    fprimeprime(double);
LOCAL void      Burgers_L1(Front*);
// LOCAL void      print_tri_crds(TRI*);
LOCAL int       max_angle(float*,int);
LOCAL void      g_sine_evolution_initializer(TRI*,Locstate,float);
LOCAL float     cond_num(float A[ ][2]);
LOCAL float     comput_loc_alpha(int,Locstate,float*,Locstate,float*,float);
LOCAL int       is_phy_bdry_side(TRI*,int);
LOCAL int       unphysical_state(TRI*,Mid_soln*,int);
LOCAL void      reflect_state_about_y(Locstate,double**);
LOCAL void      local_find_time_step_on_tris(Front*);
LOCAL int       jump_at_quadrature(TRI*,TRI *nbtri[3],Mid_soln*,int,
                  int*,int*,int*,int*,float*,float*,float*,float*,int);
LOCAL void      fix_overshoot(TRI*,Mid_soln*,int,Front*);
LOCAL void      fix_unphysical_st(TRI*,Mid_soln*,int,Front*);
LOCAL void      show_states_at_edge_quadrature(TRI*,Locstate);
LOCAL int       unphysical_st_at_quadrature(TRI*,Locstate);
LOCAL void      p1_L2projection_ver2(TRI*,Locstate,Locstate);
LOCAL void      u_average_indx(TRI*,Locstate,int,float*);
LOCAL void      R_degree3_term_averageP3(TRI*,Locstate,double**,int,float*);
//LOCAL void      R_degree3_term_average_liuP3(TRI*,Locstate,double**,int,float*);
LOCAL int       unify_weight(float*,int,float*);
LOCAL void      R_degree2_above_term_averageP3(TRI*,Locstate,double**,float*);
LOCAL void      Shu_V_sten_coeff(TRI*,Mid_soln*,int,float*,float nbuave[3][4],float*,float*);
LOCAL int       WENO_mod_cand1_P3(float*,float*,int,float,float*);
LOCAL int       WENO_mod_cand1_P2(float*,float*,int,float,float*);
LOCAL void      limit_by_edge_cent_val(float coef[][2],float A[][2],float*);
LOCAL void      harmonic_mod(float*,float*,int,float*);
LOCAL void      least_sqr(float*,float A[][2],float*);
LOCAL void      weno_weight_ls(float coef[][2],float A[][2],float*,float A_edge[][2],int,float*);
LOCAL void      comp_coef_3eqns(float A[][3],float*,float*);
LOCAL void      degree3_term_Int(Locstate,double**,float*);
LOCAL void      update_db_Mach_buffer(Front*,Mid_soln*,int);
LOCAL void      pp_send_interior_fields(int*,int,int,float,Front*,Mid_soln*,int);
LOCAL void      pp_receive_interior_fields(int*,int,int,float,Front*,Mid_soln*,int);
LOCAL void      tmp_set_send_domain(float*,float*,int,int,RECT_GRID*,float);
LOCAL void      tmp_set_receive_domain(float*,float*,int,int,RECT_GRID*,float);
LOCAL int       count_num_of_tris_inside(float*,float*,Front*,TRI***);
LOCAL void      unbundle_single_st(Locstate,size_t,byte*);
LOCAL void      bundle_single_st(Locstate,size_t,byte*);
LOCAL Buf_soln  *find_match_tri(float*,Buf_soln*,int);
LOCAL int       tri_outside_db_Mach_boundary(TRI*);



LOCAL void      show_boundary_tri(
        const char   *dname,
        int          step,
	Front        *fr)
{
        SURFACE      **surf;
        TRI          *tri;
        RECT_GRID    *gr = fr->rect_grid;
        float        *L = gr->L, *U = gr->U; 
        double  *cent;
        int          num_tri = 0, count = 0, k, l, j, i;

        FILE          *fp[7], *tri_fp;
        static char   *fname = NULL, *ppfname = NULL;
        static size_t fname_len = 0, ppfname_len = 0;
        char          outname[7][256],outdir[256], triname[256];
        const char    *nstep;
        float         *pts, *crds, *fsts;
        int        alloc_len_verts = 0, alloc_len_pts = 0;
        int        npts=0, ntris=0;
        int        *verts = NULL;
        POINT      *p;
        Locstate   st;

        // printf("Enter show_state_on_tri()\n");

        sprintf(outdir,"%s/%s",dname,"matlab_data");

        ppfname = set_ppfname(ppfname,"tri",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(triname,"%s.ts%s",ppfname,nstep);

        if (create_directory(dname,YES) == FUNCTION_FAILED)
        {
            (void) printf("WARNING in show_state_on_tri(), directory "
                          "%s doesn't exist and can't be created\n",dname);
            return;
        }
        if (create_directory(outdir,YES) == FUNCTION_FAILED)
        {
            (void) printf("WARNING in show_state_on_tri(), directory "
                         "%s doesn't exist and can't be created\n",outdir);
            return;
        }

        fname = get_list_file_name(fname,outdir,triname,&fname_len);
        if ((tri_fp = fopen(fname,"w")) == NULL)
        {
            (void) printf("WARNING in show_state_on_tri(), "
                           "can't open %s\n",fname);
            return;
        }

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf); !at_end_of_tri_list(tri,*surf);
                tri = tri->next)
            {
                cent = fg_centroid(tri);
                // if(Boundary_tri(tri) == YES)
                if(Boundary_tri(tri))
                {
                    for (k = 0; k < 3; ++k)
                        Index_of_point(Point_of_tri(tri)[k]) = -1;
                    num_tri++;
                    // if(num_tri == 3) break;
                }
            }
        }

        alloc_len_pts = 7*num_tri;
        alloc_len_verts = 8*num_tri;
        vector(&pts,alloc_len_pts,FLOAT);
        vector(&verts,7*num_tri,INT);

        fprintf(tri_fp,"%d\n",num_tri); 

        for(npts=0, ntris=0, surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf); !at_end_of_tri_list(tri,*surf);
                tri = tri->next)
            {
                cent = fg_centroid(tri);
                // if(Boundary_tri(tri) == YES)
                if(Boundary_tri(tri))
                {
                    st = tri->st;
                    for (k = 0; k < 3; ++k)
                    {
                        p = Point_of_tri(tri)[k];
                        if (Index_of_point(p) == -1)
                        {
                            crds = Coords(p);
                            for (l = 0; l < 2; ++l)
                                pts[2*npts+l] = crds[l];
                            ++npts;
                            Index_of_point(p) = npts;
                        }
                        // printf("tri_verts:4*(%d)+%d = %d\n", ntris, k, 4*ntris+k);
                        verts[3*ntris+k] = Index_of_point(p);     
                    }
                    ++ntris;
                }
            }
        }

        for (j = 0; j < ntris; ++j)
        {
            (void) fprintf(tri_fp,"%d %d %d %d\n",
                           verts[3*j],verts[3*j+1],verts[3*j+2], 1);
        }

        fprintf(tri_fp,"%d\n",npts); 
        for (i = 0; i < npts; ++i)
            (void) fprintf(tri_fp,"%g %g\n", pts[2*i],pts[2*i+1]);

        fclose(tri_fp);

        free(pts);
        free(verts);

        // printf("Leave show_state_on_tri()\n");
        // sample_soln_along_line(dname,step,fr);
}

EXPORT void      show_state_on_tri(
        const char   *dname,
        int          step,
	Front        *fr)
{
        SURFACE      **surf;
        TRI          *tri;
        RECT_GRID    *gr = fr->rect_grid;
        float        *L = gr->L, *U = gr->U; 
        double  *cent;
        int          num_tri = 0, count = 0, k, l, j, i;

        FILE          *fp[7], *tri_fp;
        static char   *fname = NULL, *ppfname = NULL;
        static size_t fname_len = 0, ppfname_len = 0;
        char          outname[7][256],outdir[256], triname[256];
        const char    *nstep;
        float         *pts, *crds, *fsts;
        int        alloc_len_verts = 0, alloc_len_pts = 0;
        int        npts=0, ntris=0;
        int        *verts = NULL;
        POINT      *p;
        Locstate   st;

        // printf("Leave show_state_on_tri()\n");
        if(debugging("Sod") || debugging("v_evo"))
            sample_soln_along_line(dname,step,fr);

        if(debugging("v_evo"))
            compute_L1(fr);
        if(debugging("g_sine"))
            gas_sine_L1(fr);
        if(debugging("Burgers"))
            Burgers_L1(fr);

        if(!debugging("plot_tri_state"))
            return;

        // return show_boundary_tri(dname,step,fr);

        sprintf(outdir,"%s/%s",dname,"matlab_data");

        ppfname = set_ppfname(ppfname,"dens",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname[0],"%s.ts%s",ppfname,nstep);
        ppfname = set_ppfname(ppfname,"mom0",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname[1],"%s.ts%s",ppfname,nstep);
        ppfname = set_ppfname(ppfname,"mom1",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname[2],"%s.ts%s",ppfname,nstep);
        ppfname = set_ppfname(ppfname,"energy",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname[3],"%s.ts%s",ppfname,nstep);

        ppfname = set_ppfname(ppfname,"press",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname[4],"%s.ts%s",ppfname,nstep);

        ppfname = set_ppfname(ppfname,"u",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname[5],"%s.ts%s",ppfname,nstep);
        ppfname = set_ppfname(ppfname,"v",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname[6],"%s.ts%s",ppfname,nstep);

        ppfname = set_ppfname(ppfname,"tri",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(triname,"%s.ts%s",ppfname,nstep);

        if (create_directory(dname,YES) == FUNCTION_FAILED)
        {
            (void) printf("WARNING in show_state_on_tri(), directory "
                          "%s doesn't exist and can't be created\n",dname);
            return;
        }
        if (create_directory(outdir,YES) == FUNCTION_FAILED)
        {
            (void) printf("WARNING in show_state_on_tri(), directory "
                         "%s doesn't exist and can't be created\n",outdir);
            return;
        }

        fname = get_list_file_name(fname,outdir,outname[0],&fname_len);
        if ((fp[0] = fopen(fname,"w")) == NULL)
        {
            (void) printf("WARNING in show_state_on_tri(), "
                           "can't open %s\n",fname);
            return;
        }

        fname = get_list_file_name(fname,outdir,outname[1],&fname_len);
        if ((fp[1] = fopen(fname,"w")) == NULL)
        {
            (void) printf("WARNING in show_state_on_tri(), "
                           "can't open %s\n",fname);
            return;
        }

        fname = get_list_file_name(fname,outdir,outname[2],&fname_len);
        if ((fp[2] = fopen(fname,"w")) == NULL)
        {
            (void) printf("WARNING in show_state_on_tri(), "
                           "can't open %s\n",fname);
            return;
        }

        fname = get_list_file_name(fname,outdir,outname[3],&fname_len);
        if ((fp[3] = fopen(fname,"w")) == NULL)
        {
            (void) printf("WARNING in show_state_on_tri(), "
                           "can't open %s\n",fname);
            return;
        }

        fname = get_list_file_name(fname,outdir,outname[4],&fname_len);
        if ((fp[4] = fopen(fname,"w")) == NULL)
        {
            (void) printf("WARNING in show_state_on_tri(), "
                           "can't open %s\n",fname);
            return;
        }

        fname = get_list_file_name(fname,outdir,outname[5],&fname_len);
        if ((fp[5] = fopen(fname,"w")) == NULL)
        {
            (void) printf("WARNING in show_state_on_tri(), "
                           "can't open %s\n",fname);
            return;
        }

        fname = get_list_file_name(fname,outdir,outname[6],&fname_len);
        if ((fp[6] = fopen(fname,"w")) == NULL)
        {
            (void) printf("WARNING in show_state_on_tri(), "
                           "can't open %s\n",fname);
            return;
        }

        fname = get_list_file_name(fname,outdir,triname,&fname_len);
        if ((tri_fp = fopen(fname,"w")) == NULL)
        {
            (void) printf("WARNING in show_state_on_tri(), "
                           "can't open %s\n",fname);
            return;
        }

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf); !at_end_of_tri_list(tri,*surf);
                tri = tri->next)
            {
                cent = fg_centroid(tri);
                if((L[0] < cent[0] && L[1] < cent[1] &&
                    U[1] > cent[1] && U[0] > cent[0])
                  )
                // if((L[0] < cent[0] && -0.2 < cent[1] &&
                //     0.2 > cent[1] && U[0] > cent[0])
                //   )
                {
                    for (k = 0; k < 3; ++k)
                        Index_of_point(Point_of_tri(tri)[k]) = -1;
                    num_tri++;
                }
            }
        }

        alloc_len_pts = 7*num_tri;
        alloc_len_verts = 8*num_tri;
        vector(&pts,alloc_len_pts,FLOAT);
        vector(&verts,7*num_tri,INT);
        vector(&fsts,alloc_len_verts,FLOAT);

        fprintf(tri_fp,"%d\n",num_tri); 

        // printf("num_of_tri = %d, alloc# = %d, all_verts = %d, all_fsts = %d\n",
        //          num_tri, alloc_len_pts, 4*num_tri, alloc_len_verts);

        for(npts=0, ntris=0, surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf); !at_end_of_tri_list(tri,*surf);
                tri = tri->next)
            {
                cent = fg_centroid(tri);
                if((L[0] < cent[0] && L[1] < cent[1] &&
                    U[1] > cent[1] && U[0] > cent[0])
                  )
                // if((L[0] < cent[0] && -0.2 < cent[1] &&
                //     0.2 > cent[1] && U[0] > cent[0])
                //   )
                {
                    st = tri->st;
                    for (k = 0; k < 3; ++k)
                    {
                        p = Point_of_tri(tri)[k];
                        if (Index_of_point(p) == -1)
                        {
                            crds = Coords(p);
                            for (l = 0; l < 2; ++l)
                                pts[2*npts+l] = crds[l];
                            ++npts;
                            Index_of_point(p) = npts;
                        }
                        // printf("tri_verts:4*(%d)+%d = %d\n", ntris, k, 4*ntris+k);
                        verts[3*ntris+k] = Index_of_point(p);     
                    }
                    fsts[7*ntris] = Dens(st);
                    fsts[7*ntris+1] = Mom(st)[0];
                    fsts[7*ntris+2] = Mom(st)[1];
                    fsts[7*ntris+3] = Energy(st);
                    fsts[7*ntris+4] = pressure(st);
                    fsts[7*ntris+5] = Mom(st)[0]/Dens(st);
                    fsts[7*ntris+6] = Mom(st)[1]/Dens(st);
                    ++ntris;

                    // if(ntris == 3) break;
                }
            }
        }

        for (j = 0; j < ntris; ++j)
        {
            (void) fprintf(tri_fp,"%d %d %d %d\n",
                           verts[3*j],verts[3*j+1],verts[3*j+2], 1);
        }

        fprintf(tri_fp,"%d\n",npts); 
        for (i = 0; i < npts; ++i)
            (void) fprintf(tri_fp,"%g %g\n", pts[2*i],pts[2*i+1]);

        fprintf(fp[0],"%d\n",ntris); 
        for (i = 0; i < ntris; ++i)
            (void) fprintf(fp[0],"%g\n", fsts[7*i]);
        fprintf(fp[1],"%d\n",ntris); 
        for (i = 0; i < ntris; ++i)
            (void) fprintf(fp[1],"%g\n", fsts[7*i+1]);
        fprintf(fp[2],"%d\n",ntris); 
        for (i = 0; i < ntris; ++i)
            (void) fprintf(fp[2],"%g\n", fsts[7*i+2]);
        fprintf(fp[3],"%d\n",ntris); 
        for (i = 0; i < ntris; ++i)
            (void) fprintf(fp[3],"%g\n", fsts[7*i+3]);
        fprintf(fp[4],"%d\n",ntris); 
        for (i = 0; i < ntris; ++i)
            (void) fprintf(fp[4],"%g\n", fsts[7*i+4]);
        fprintf(fp[5],"%d\n",ntris); 
        for (i = 0; i < ntris; ++i)
            (void) fprintf(fp[5],"%g\n", fsts[7*i+5]);
        fprintf(fp[6],"%d\n",ntris); 
        for (i = 0; i < ntris; ++i)
            (void) fprintf(fp[6],"%g\n", fsts[7*i+6]);

        fclose(fp[0]);
        fclose(fp[1]);
        fclose(fp[2]);
        fclose(fp[3]);
        fclose(fp[4]);
        fclose(fp[5]);
        fclose(fp[6]);
        fclose(tri_fp);

        free(pts);
        free(verts);
        free(fsts); 
}

EXPORT void      show_state_to_rect(
        const char   *dname,
        int          step,
	Front        *fr)
{
        SURFACE      **surf;
        TRI          *tri;
        RECT_GRID    *gr = fr->rect_grid;
        float        *L = gr->L, *U = gr->U, con_u[4];
        double  *cent;
        int          k, l, j, i, dim = 2;
        FILE         *fp[7], *crdsfp[MAXD];
        static char  *fname = NULL, *ppfname = NULL;
        static size_t fname_len = 0, ppfname_len = 0;
        char         outname[7][256],outdir[256], crdsname[MAXD][256];
        const char   *nstep;
        static Locstate st = NULL;
        float      dx, dy, crds[MAXD], dent[MAXD];
        float      **denst, diam, **xcrds, **ycrds, **press;
        int        Xnumpts = 201, Ynumpts = 801, Nx, Ny, ic[MAXD], ixmin, ixmax, iymin, iymax, ratio;
        static int first = YES;
        int        myid, ICOORDS[MAXD];

        if(debugging("shock_vort"))
        {
            Xnumpts = 201; Ynumpts = 401;
        }
        else if(debugging("twod_riemann"))
        {
            Xnumpts = 501; Ynumpts = 501;
        }
        else if(debugging("db_Mach"))
        {
            int Nnodes;
#if defined(__MPI__)
            /** 
              wired for 2 by 2 partition
            **/
            // Xnumpts = 251; Ynumpts = 1001; 

            // after 062608, use this resolution for 2 by 8 partition
            // the resolution is 800 by  3200, subdomain: 400 
            Nnodes = pp_numnodes();
            Xnumpts = 401; Ynumpts = 3200/(Nnodes/2) + 1;
            // Xnumpts = 26; Ynumpts = 101; // debug 
#else
            Xnumpts = 501; Ynumpts = 2001;
#endif // if defined(_MPI_)
        }
        else
            return;

        if(st == NULL)
            g_alloc_state(&st, fr->sizest);

        sprintf(outdir,"%s/%s",dname,"matlab_data");

        ppfname = set_ppfname(ppfname,"dens",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname[0],"%s.ts%s",ppfname,nstep);
        ppfname = set_ppfname(ppfname,"mom0",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname[1],"%s.ts%s",ppfname,nstep);
        ppfname = set_ppfname(ppfname,"mom1",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname[2],"%s.ts%s",ppfname,nstep);
        ppfname = set_ppfname(ppfname,"energy",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname[3],"%s.ts%s",ppfname,nstep);

        ppfname = set_ppfname(ppfname,"press",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname[4],"%s.ts%s",ppfname,nstep);

        ppfname = set_ppfname(ppfname,"u",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname[5],"%s.ts%s",ppfname,nstep);
        ppfname = set_ppfname(ppfname,"v",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname[6],"%s.ts%s",ppfname,nstep);


        ppfname = set_ppfname(ppfname,"X_crds",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(crdsname[0],"%s.ts%s",ppfname,nstep);

        ppfname = set_ppfname(ppfname,"Y_crds",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(crdsname[1],"%s.ts%s",ppfname,nstep);

        if (create_directory(dname,YES) == FUNCTION_FAILED)
        {
            (void) printf("WARNING in show_state_to_rect(), directory "
                          "%s doesn't exist and can't be created\n",dname);
            return;
        }
        if (create_directory(outdir,YES) == FUNCTION_FAILED)
        {
            (void) printf("WARNING in show_state_to_rect(), directory "
                         "%s doesn't exist and can't be created\n",outdir);
            return;
        }

        fname = get_list_file_name(fname,outdir,outname[0],&fname_len);
        if ((fp[0] = fopen(fname,"w")) == NULL)
        {
            (void) printf("WARNING in show_state_to_rect(), "
                           "can't open %s\n",fname);
            return;
        }

        if(debugging("db_Mach") || debugging("shock_vort"))
        {
            // pressure
            fname = get_list_file_name(fname,outdir,outname[4],&fname_len);
            if ((fp[4] = fopen(fname,"w")) == NULL)
            {
                (void) printf("WARNING in show_state_to_rect(), "
                           "can't open %s\n",fname);
                return;
            }
        }

        if(first == YES)
        {
            fname = get_list_file_name(fname,outdir,crdsname[0],&fname_len);
            if ((crdsfp[0] = fopen(fname,"w")) == NULL)
            {
                (void) printf("WARNING in show_state_to_rect(), "
                           "can't open %s\n",fname);
                return;
            }
            fname = get_list_file_name(fname,outdir,crdsname[1],&fname_len);
            if ((crdsfp[1] = fopen(fname,"w")) == NULL)
            {
                (void) printf("WARNING in show_state_to_rect(), "
                           "can't open %s\n",fname);
                return;
            }
        }

        // db_Mach, show [0,1]*[0,3] domain
        // printf("db_Mach, show: output data size %d %d\n", Xnumpts, (Ynumpts-1)/4*3+1);
        // fprintf(crdsfp[0],"%d %d\n", Xnumpts, (Ynumpts-1)/4*3+1);
        // fprintf(crdsfp[1],"%d %d\n", Xnumpts, (Ynumpts-1)/4*3+1);
        // fprintf(fp[0],"%d %d\n", Xnumpts, (Ynumpts-1)/4*3+1);

        // Compute crds
        dx = (gr->U[0]-gr->L[0])/(Xnumpts-1);
        dy = (gr->U[1]-gr->L[1])/(Ynumpts-1);
#if defined(__MPI__)
        Nx = Xnumpts;
        Ny = Ynumpts;
#else
        if(debugging("shock_vort") || debugging("twod_riemann"))
        {
            Nx = Xnumpts;
            Ny = Ynumpts;
        }
        else
        {
            Ny = (Ynumpts-1)/4*3+1;
            Nx =  Xnumpts;
        }
#endif // if defined(__MPI__)

        matrix(&denst,Nx,Ny,sizeof(float));
        matrix(&xcrds,Nx,Ny,sizeof(float));
        matrix(&ycrds,Nx,Ny,sizeof(float));
        if(debugging("db_Mach") || debugging("shock_vort"))
            matrix(&press,Nx,Ny,sizeof(float));

        for(j = 0; j < Ny; j++)
        {
            for(i = 0; i < Nx; i++)
                xcrds[i][j] = ycrds[i][j] = -0.00001;
        }

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf); !at_end_of_tri_list(tri,*surf);
                tri = tri->next)
            {
                diam = fg_length_side(tri)[0];
                break; 
            }
        }
        ratio = 5*((int)(diam/min(dx,dy)) + 1);

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf); !at_end_of_tri_list(tri,*surf);
                tri = tri->next)
            {
                for(i = 0; i < dim; i++)
                    dent[i] = fg_centroid(tri)[i];
                ic[0] = irint(floor((dent[0]-gr->L[0])/dx));
                ic[1] = irint(floor((dent[1]-gr->L[1])/dy));
                ixmin = ic[0] - ratio; ixmax = ic[0] + ratio;
                iymin = ic[1] - ratio; iymax = ic[1] + ratio;

                for(j = iymin; j <= iymax; j++)
                {
                    for(i = ixmin; i <= ixmax; i++)
                    {
                        if(i < 0 || i >= Nx || j < 0 || j >= Ny)
                            continue;
                        crds[0] = gr->L[0] + dx*i;
                        crds[1] = gr->L[1] + dy*j;
                        if(i == 0)
                            crds[0] += 0.00001*dx;
                        if(j == 0)
                            crds[1] += 0.00001*dy;
                        if(i == Xnumpts-1)
                            crds[0] -= 0.00001*dx;
                        if(j == Ynumpts-1)
                            crds[1] -= 0.00001*dy;
                        if(pt_in_tri(crds, tri) == YES)
                        {
                            con_u_at_pt(tri->st, crds, fg_centroid(tri), con_u);
                            assign(st, tri->st, fr->sizest);
                            Dens(st) = con_u[0];
                            Mom(st)[0] = con_u[1];
                            Mom(st)[1] = con_u[2];
                            Energy(st) = con_u[3];
                            denst[i][j] = Dens(st);
                            if(debugging("db_Mach") || debugging("shock_vort"))
                                press[i][j] = pressure(st);
                            xcrds[i][j] = crds[0];
                            ycrds[i][j] = crds[1];
                        }
                    }
                }
            }
        }

        for(j = 0; j < Ny; j++)
        {
            for(i = 0; i < Nx; i++)
            {
                if(i != (Xnumpts-1))
                {
                    if(first == YES)
                    {
                        fprintf(crdsfp[0],"%g\t", xcrds[i][j]);
                        fprintf(crdsfp[1],"%g\t", ycrds[i][j]);
                    }
                    fprintf(fp[0],"%g\t", denst[i][j]);
                    if(debugging("db_Mach") || debugging("shock_vort"))
                        fprintf(fp[4],"%g\t", press[i][j]);
                }
                else
                {
                    if(first == YES)
                    {
                        fprintf(crdsfp[0],"%g", xcrds[i][j]);
                        fprintf(crdsfp[1],"%g", ycrds[i][j]);
                    }
                    fprintf(fp[0],"%g", denst[i][j]);
                    if(debugging("db_Mach") || debugging("shock_vort"))
                        fprintf(fp[4],"%g", press[i][j]);
                }
            }
            if(first == YES)
            {
                fprintf(crdsfp[0],"\n");
                fprintf(crdsfp[1],"\n");
            }
            fprintf(fp[0],"\n");
            if(debugging("db_Mach") || debugging("shock_vort"))
                fprintf(fp[4],"\n");
        }

        fclose(fp[0]);
        if(debugging("db_Mach") || debugging("shock_vort"))
            fclose(fp[4]);
        // fclose(fp[1]); fclose(fp[2]); fclose(fp[3]); 
        // fclose(fp[4]); fclose(fp[5]); fclose(fp[6]);
        if(first == YES)
        {
            fclose(crdsfp[0]);
            fclose(crdsfp[1]);
        }

        free(denst);
        free(xcrds);
        free(ycrds);
        if(debugging("db_Mach") || debugging("shock_vort"))
            free(press);

        first = NO;
}

/* 
 * Currently use L-F flux. 3rd-order RK time-discretization 
 */
EXPORT void gDG_tri_vec(
        int        *iperm,
        float      *dh,
        float      dt,
        Wave       *wv,
        Wave       *nwv,
        Front      *fr,
        Front      *nfr,
        COMPONENT  max_comp)
{
        SURFACE           **surf = nfr->mesh->surfaces;
        SURFACE           **osurf = fr->mesh->surfaces;
        TRI               *otri, *tri;
        int               dim = 2, i, j, use_limiter = YES;
        float             max_speed = 0.0, velx, vely, vel;
        double       *cent, *ocent;
        // Locstate   st = MaxFrontSpeedState(fr);
        static int        first = YES;
        int        total_tri = 0;
        static Mid_soln   *midsoln = NULL;
        // size_t            sizest = fr->sizest;
        int               rk_iter;
        float             tmp_alpha;
        char              s[256];

        if(debugging("db_Mach"))
        {
        // compute exact shock speed and position at x = 0.0 in
        // the y direction
            float spd, y_incr;
            spd = (post_sk_st[0]*post_sk_st[1]-0.0)/(post_sk_st[0]-pre_sk_st[0]);   
            y_incr = spd*fr->time/sin(radians(60.0));
            sk_y_pos = 1.0/6.0 -sqrt(3)/3.0*(0.0-1.0) + y_incr;
        }

        if(mass_1st_row == NULL)
        {
            matrix(&mass_1st_row, 1, MAX_N_COEF,sizeof(double));
            for(i = 0; i < 3; i++)
                matrix(&mass_1st_rows[i], 1, MAX_N_COEF,sizeof(double));
        }

        if(first == YES)
        {
            dmach = 0.5*DBL_EPSILON;
            first = NO;
            i = 0; 
            if(Gauss_N == 3)
            {
                // 3-point 
                q[0] = -sqrt(0.6); q[1] = 0.0; q[2] = sqrt(0.6);
                qw[0] = 5.0/9.0; qw[1] = 8.0/9.0; qw[2] = 5.0/9.0;
            }
            else if(Gauss_N == 4)
            {
                q[0] = -0.86113631159405257522; q[1] = -0.33998104358485626480;
                q[2] =  0.33998104358485626480; q[3] = 0.86113631159405257522;
                qw[0] =  0.34785484513745385737; qw[1] = 0.65214515486254614263;
                qw[2] =  0.65214515486254614263; qw[3] = 0.34785484513745385737;
            }
            else if(Gauss_N == 5)
            {
                q[0] = -0.90618; q[1] = -0.538469;
                q[2] = 0.0; q[3] = 0.538469; q[4] = 0.90618;
                qw[0] = 0.236927; qw[1] = 0.478629;
                qw[2] = 0.568889; qw[3] = 0.478629; qw[4] = 0.236927;
            }
            else if(Gauss_N == 8)
            {
                qw[0] = 0.1012285; qw[1] = 0.2223810; qw[2] = 0.3137066;
                qw[3] = 0.3626838; qw[4] = 0.3626838; qw[5] = 0.3137066;
                qw[6] = 0.2223810; qw[7] = 0.1012285;
                q[0] = -0.9602899; q[1] = -0.7966665; q[2] = -0.5255324;
                q[3] = -0.1834346; q[4] = 0.1834346; q[5] = 0.5255324;
                q[6] = 0.7966665; q[7] = 0.9602899;
            }
            else
            {
                // To test mid point
                q[0] = 0.0;
                qw[0] = 1.0;
            }
            for(; osurf && *osurf;  osurf++, surf++)
            {
                for (otri = first_tri(*osurf), tri = first_tri(*surf);
                    !at_end_of_tri_list(otri,*osurf);
                     otri = otri->next, tri = tri->next)
                {
                    otri->id = i;
                    tri->id = i;
                    i++;
                }
            }
            total_tri = i;
            // print_bottom_tris_state(fr);
        }

        if(midsoln == NULL)
        {
            vector(&midsoln,total_tri,sizeof(Mid_soln));
            for(i = 0; i < total_tri; i++)
            {
                vector(&(midsoln[i].st), RK_STEP, sizeof(Locstate));
                vector(&(midsoln[i].worksp_st_store),RK_STEP,fr->sizest);
                for(j = 0; j < RK_STEP; j++)
                    midsoln[i].st[j] = midsoln[i].worksp_st_store + j*fr->sizest;
            }
	    printf("Report storage_use after alloc RK: %-d K\n", get_vmalloc_storage_use()/1000);
        }

        newdt = HUGE_VAL; 

        for(rk_iter = 0; rk_iter < RK_STEP; rk_iter++)
        {
            // printf("\n\n-------------%d ITERATION ---------\n", rk_iter);
            // sprintf(s,"iteration %d",rk_iter);
            for(osurf = fr->mesh->surfaces, surf = nfr->mesh->surfaces; 
                surf && *surf; surf++, osurf++)    
            {
                for (tri = first_tri(*surf), otri = first_tri(*osurf); 
                     !at_end_of_tri_list(tri,*surf);
                     tri = tri->next, otri = otri->next) 
                {
                    adv_fw(tri,otri,dt,max_speed,midsoln,rk_iter,fr);
                }
            }

	    // TMP
#if defined(__MPI__)
            BLOCK_SIZE = GetHypPPBlockSize();
	    if(debugging("db_Mach")) 
	    {
                update_db_Mach_buffer(nfr,midsoln,rk_iter+1);
	    }
#endif // if defined(__MPI__)	    
	    // screen("exit before communication, after adv_fw\n");
	    // clean_up(0);
	    
            // sprintf(s,"scatter %d",rk_iter);
            if(debugging("Sod") || debugging("Lax"))
            {
                update_buffer_x_ref(nfr,midsoln,rk_iter+1);
                // update_buffer_x_peri(nfr,midsoln,rk_iter+1); 
            }
            else if(debugging("shock_vort"))
            {
                update_buffer_x_ref(nfr,midsoln,rk_iter+1);
            }
            else if(debugging("v_evo") || debugging("Burgers"))
            {
                update_buffer(nfr,midsoln,rk_iter+1);
            }

            // Limiting the soln
            if(rk_iter == 0)
                nfr->time = fr->time;
            else if(rk_iter == 1)
                nfr->time = fr->time + dt;
            else
                nfr->time = fr->time + 0.5*dt;
            if(use_limiter == YES)
            {
                local_limiting_soln_with_buffer_tris(nfr,midsoln,rk_iter+1);
                // limiting_soln_with_buffer_tris(nfr,midsoln,rk_iter+1,YES,NO);
                // limiting_soln_with_buffer_tris(nfr,midsoln,rk_iter+1,YES,YES);
            }

#if defined(__MPI__)
            if((rk_iter+1) == RK_STEP)
            {
                float newdts[20];
                int   numnodes;

                numnodes = pp_numnodes();
                pp_all_gather((POINTER)&newdt,sizeof(float),
                      (POINTER)newdts,sizeof(float));
                for(i = 0; i < numnodes; i++)
                {
                    if (newdts[i] < newdt)
                        newdt = newdts[i];
                } 
            }
            if(debugging("db_Mach"))
                update_db_Mach_buffer(nfr,midsoln,rk_iter+1);
#endif // if defined(__MPI__)

            if(debugging("Sod") || debugging("Lax"))
            {
                update_buffer_x_ref(nfr,midsoln,rk_iter+1);
                // update_buffer_x_peri(nfr,midsoln,rk_iter+1);
            }
            else if(debugging("shock_vort"))
            {
                update_buffer_x_ref(nfr,midsoln,rk_iter+1);
            }
            else if(debugging("v_evo") || debugging("Burgers"))
            {
                update_buffer(nfr,midsoln,rk_iter+1);
                // update_buffer_x_per_y_ref(nfr,midsoln,rk_iter+1);
                // update_buffer_x_ref(nfr,midsoln,rk_iter+1);
                // update_buffer_x_peri(nfr,midsoln,rk_iter+1);
            }
    
            if(use_limiter == NO && (rk_iter + 1) == RK_STEP)
            {
                local_find_time_step_on_tris(nfr);
            }
            // Burgers_L1(nfr);
            // clean_up(0);
        }

        // nfr->time = dt; 
        // Burgers_L1(nfr);
        // clean_up(0);

        // printf("\n\nEXIT in gDG_tri_vec, dt = %g\n", dt);
        // clean_up(0);

        // print_bottom_tris_state(nfr);
}

LOCAL void   print_bottom_tris_state(
	Front    *fr)
{
        SURFACE      **surf;
        TRI          *tri;
        double  cent[3];

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf); !at_end_of_tri_list(tri,*surf);
                tri = tri->next)
            {
                cent[0] = fg_centroid(tri)[0];
                cent[1] = fg_centroid(tri)[1];
                // if(tri->BC_type == IN_FLOW || cent[1] < 5.1)
                if((cent[1] < 54.5 && cent[1] > 50.5) &&
                   (cent[0] > fr->rect_grid->L[0] && 
                    cent[0] < fr->rect_grid->U[0]))
                {
                    printf("cent[%g %g] tri(%d):\n", cent[0], cent[1], tri->id);
                    printf("den, p, v [%12.11g %12.11g %12.11g %12.11g]\n", Dens(tri->st),
                          pressure(tri->st), vel(0, tri->st), vel(1, tri->st));    
                }
            }
        }       
}

// The ghost states are all piecewise const.
LOCAL void attach_buffer(
        Front    *fr,
        Mid_soln *midsoln,
        int      rk_step,
	TRI      *tri,
        TRI      **ghost,
        int      *g_flag)
{
	TRI       *nbtri[3];
        int       side, i, dim = 2;
        float     nor[3], t[3], u, crds[3], fcent[3];
        POINT     *gp[3], *p[3];
        Locstate  st, gst;
        float     dirx[2] = {1.0, 0.0}, ans;
        size_t    sizest = fr->sizest;
        double   *cent; 

        if(rk_step == RK_STEP)
            st = tri->st;
        else
            st = midsoln[tri->id].st[rk_step];

        for(side = 0; side < 3; side++)
        {
            nbtri[side] = Tri_on_side(tri,side);
            if(nbtri[side] == NULL)
                g_flag[side] = YES;
            else
                g_flag[side] = NO;
        }

        // TMP
        /**
        if(tri->id == 0 && rk_step == 1)
        {
            printf("tri[%d] attach buffer, g_flag[%d %d %d], Boundary_tri(tri) = %d\n",
                  tri->id, g_flag[0], g_flag[1], g_flag[2], Boundary_tri(tri));
            printf("ntri[%d %d %d]\n", nbtri[0], nbtri[1], nbtri[2]);
            printf("ghost[%d %d %d]\n", ghost[0], ghost[1], ghost[2]);
        }
        **/

        for(side = 0; side < 3; side++)
        {
            if(nbtri[side] == NULL)
            {
                Tri_on_side(tri,side) = ghost[side];
                gst = ghost[side]->st;
                for(i = 0; i < dim; i++)
                    t[i] = fg_side_vector(tri)[side][i];
                nor[0] = t[1];
                nor[1] = -t[0];
 
                for(i = 0; i < 3; i++)
                {
                    gp[i] = Point_of_tri(ghost[side])[i];
                    p[i] = Point_of_tri(tri)[(side+i)%3]; 
                }
 
                /*
                if(gp[0] == NULL || gp[1] == NULL || gp[2] == NULL)
                {
                    printf("ERROR: tri[%d] attach buffer g_flag[%d %d %d], Boundary_tri(tri) = %d\n",
                      tri->id, g_flag[0], g_flag[1], g_flag[2], Boundary_tri(tri));
                    printf("ntri[%d %d %d]\n", nbtri[0], nbtri[1], nbtri[2]);
                    printf("ghost_tri point is NULL\n");
                    clean_up(ERROR); 
                }
                */
       
                // Let ghost side 0 be the adjcent side
                for(i = 0; i < dim; i++)
                {
                    Coords(gp[0])[i] = Coords(p[1])[i]; 
                    Coords(gp[1])[i] = Coords(p[0])[i]; 
                    Coords(gp[2])[i] = Coords(p[2])[i];
                }
                u = ((Coords(p[2])[0]-Coords(p[0])[0])*(Coords(p[1])[0]-Coords(p[0])[0]) + 
                     (Coords(p[2])[1]-Coords(p[0])[1])*(Coords(p[1])[1]-Coords(p[0])[1]) )
                        /fg_length_side(tri)[side];
                crds[0] = Coords(p[0])[0] + u*(Coords(p[1])[0]-Coords(p[0])[0]);
                crds[1] = Coords(p[0])[1] + u*(Coords(p[1])[1]-Coords(p[0])[1]);

                i_reflect_point(gp[2], crds, nor, fr->interf);

                comp_mass_matrix(MAX_N_COEF,ghost[side],2,ghost[side]->Lmass_matrix);
                inverse_matrix(ghost[side]->Lmass_matrix,MAX_N_COEF,ghost[side]->mass_inv);

                set_comput_tri_geom_flag(YES);
                set_normal_of_tri(ghost[side]);
                set_comput_tri_geom_flag(NO);
                cent = fg_centroid(ghost[side]);
                for(i = 0; i < dim; i++)
                    fcent[i] = cent[i];

                // TMP
                /**
                printf("tri[%d] side %d is null\n", tri->id, side);
                print_general_vector("Tri_pt", Coords(Point_of_tri(tri)[0]), dim, "\n");
                print_general_vector("Tri_pt", Coords(Point_of_tri(tri)[1]), dim, "\n");
                print_general_vector("Tri_pt", Coords(Point_of_tri(tri)[2]), dim, "\n");
                printf("print ghost tri\n");
                print_general_vector("Tri_pt", Coords(Point_of_tri(ghost[side])[0]), dim, "\n");
                print_general_vector("Tri_pt", Coords(Point_of_tri(ghost[side])[1]), dim, "\n");
                print_general_vector("Tri_pt", Coords(Point_of_tri(ghost[side])[2]), dim, "\n");
                clean_up(0);
                **/

                assign(gst, st, sizest);

                if(debugging("g_sine"))
                {
                    set_state(gst,GAS_STATE,gst);
                    g_sine_evolution_initializer(ghost[side],gst,fr->time);
                    continue;
                }

                set_state(gst,TGAS_STATE,gst);
                for(i = 0; i < MAX_N_COEF; i++)
                {
                    dg_Dens(gst)[i] = 0.0;
                    dg_Mom(gst)[0][i] = 0.0;
                    dg_Mom(gst)[1][i] = 0.0;
                    dg_Energy(gst)[i] = 0.0;
                }

                if(fg_e_type(tri)[side] == CONST_P)  
                {
                    ans = fabs(nor[0]*dirx[0] + nor[0]*dirx[0]);
                    if(ans > 0.5 && nor[0] > 0.5)
                    {
                        // right side, post shock condition
                        Dens(gst) = post_sk_st[0];
                        Vel(gst)[0] = post_sk_st[1]*cos(radians(60.0));
                        Vel(gst)[1] = post_sk_st[1]*sin(radians(60.0));
                        Press(gst) = post_sk_st[3];
                    }
                    else if(ans > 0.5 && nor[0] < -0.5)
                    {
                        //left side, Mach 10 shock
                        if(cent[1] < sk_y_pos)
                        {
                        // post shock condition
                            Dens(gst) = post_sk_st[0];
                            Vel(gst)[0] = post_sk_st[1]*cos(radians(60.0));
                            Vel(gst)[1] = post_sk_st[1]*sin(radians(60.0));
                            Press(gst) = post_sk_st[3];
                        }
                        else
                        {
                        // pre shock condition
                            Dens(gst) = pre_sk_st[0];
                            Vel(gst)[0] = pre_sk_st[1];
                            Vel(gst)[1] = pre_sk_st[2];
                            Press(gst) = pre_sk_st[3];
                        }
                    }
                    else
                    {
                    // bottom side, post shock condition
                        Dens(gst) = post_sk_st[0];
                        Vel(gst)[0] = post_sk_st[1]*cos(radians(60.0));
                        Vel(gst)[1] = post_sk_st[1]*sin(radians(60.0));
                        Press(gst) = post_sk_st[3];
                    }
                    set_state(gst,GAS_STATE,gst);
                } 
                else if(fg_e_type(tri)[side] == NEUMANN)
                {
                    reflect_state(gst, fr->interf, fcent, crds, nor);
                    set_state(gst,GAS_STATE,gst);

                    /**
                    printf("ERROR attach_buffer %d, implement type %d NEUMANN = %d\n",
                                 tri->id, fg_e_type(tri)[side], NEUMANN);
                    printf("normal[%g %g]\n", nor[0], nor[1]);
                    verbose_print_state("tri st", st);
                    verbose_print_state("tri ghost st", gst);
                    clean_up(ERROR);
                    **/
                }
                else if(fg_e_type(tri)[side] == OUT_FLOW)
                {
                    set_state(gst,GAS_STATE,gst);
                }
                else
                {
                    printf("ERROR attach_buffer %d, implement type %d\n",
                                 tri->id, fg_e_type(tri)[side]);
                    clean_up(ERROR);
                }
            }
        }

}

// The ghost states are set in buffer tris.
LOCAL void attach_buffer_states(
        Front    *fr,
        Mid_soln *midsoln,
        int      rk_step,
	TRI      *tri)
{
	TRI       *nbtri[3];
        int       side, i, dim = 2;
        float     nor[3], t[3], u, crds[3], fcent[3];
        Locstate  st, gst;
        float     dirx[2] = {1.0, 0.0}, ans;
        size_t    sizest = fr->sizest;
        double   *cent; 

        if(rk_step == RK_STEP)
            st = tri->st;
        else
            st = midsoln[tri->id].st[rk_step];
        for(side = 0; side < 3; side++)
            nbtri[side] = Tri_on_side(tri,side);

        for(side = 0; side < 3; side++)
        {
            if(nbtri[side]->BC_type == SUBDOMAIN)
            {
                if(rk_step == RK_STEP)
                    gst = nbtri[side]->st;
                else
                    gst = midsoln[nbtri[side]->id].st[rk_step];

                assign(gst, st, sizest);
                zero_moments(gst);
                // p1_L2projection_ver2(tri,gst,gst);
                assign(midsoln[nbtri[side]->id].st[0], gst, sizest);
            }
        }
}

// The ghost states are set in buffer tris.
LOCAL void shock_vort_attach_buffer_states(
        Front    *fr,
        Mid_soln *midsoln,
        int      rk_step,
	TRI      *tri)
{
	TRI       *nbtri[3];
        int       side, i, dim = 2;
        float     nor[3], t[3], u, crds[3], fcent[3];
        Locstate  st, gst;
        float     dirx[2] = {1.0, 0.0}, ans;
        size_t    sizest = fr->sizest;
        double   *cent; 

        if(rk_step == RK_STEP)
            st = tri->st;
        else
            st = midsoln[tri->id].st[rk_step];
        for(side = 0; side < 3; side++)
            nbtri[side] = Tri_on_side(tri,side);

        for(side = 0; side < 3; side++)
        {
            if(nbtri[side]->BC_type == SUBDOMAIN &&
               (fg_e_type(tri)[side] == IN_FLOW ||  
                fg_e_type(tri)[side] == OUT_FLOW))
            {
                if(rk_step == RK_STEP)
                    gst = nbtri[side]->st;
                else
                    gst = midsoln[nbtri[side]->id].st[rk_step];

                assign(gst, st, sizest);
                // The subdomain zero level state, for reconstruction purpose (P1 projection)
                zero_moments(gst);
                // p1_L2projection_ver2(tri,gst,gst);
                assign(midsoln[nbtri[side]->id].st[0], gst, sizest);
            }
        }
}

// The ghost states are set in buffer tris.
LOCAL void db_Mach_attach_buffer_states(
        Front    *fr,
        Mid_soln *midsoln,
        int      rk_step,
	TRI      *tri)
{
	TRI       *nbtri[3];
        int       side, i, dim = 2, indx;
        float     nor[3], t[3], u, crds[3], fcent[3];
        Locstate  st, gst;
        float     dirx[2] = {1.0, 0.0}, ans;
        size_t    sizest = fr->sizest;
        double   **Lmass_matrix, *cent = fg_centroid(tri), *nbcent;; 

        if(rk_step == RK_STEP)
            st = tri->st;
        else
            st = midsoln[tri->id].st[rk_step];
        for(side = 0; side < 3; side++)
            nbtri[side] = Tri_on_side(tri,side);

        for(side = 0; side < 3; side++)
        {
            if(nbtri[side]->BC_type == SUBDOMAIN)
            {
                if(fg_e_type(tri)[side] == NEUMANN)
                {
                    if(rk_step == RK_STEP)
                        gst = nbtri[side]->st;
                    else
                        gst = midsoln[nbtri[side]->id].st[rk_step];
                    assign(gst, st, sizest);
                    Lmass_matrix = nbtri[side]->Lmass_matrix;
                    dg_Dens(gst)[1] *= -1.0; 
                    dg_Dens(gst)[4] *= -1.0; 
                    dg_Energy(gst)[1] *= -1.0; 
                    dg_Energy(gst)[4] *= -1.0; 
                    dg_Mom(gst)[1][1] *= -1.0; 
                    dg_Mom(gst)[1][4] *= -1.0; 
                    dg_Mom(gst)[0][0] *= -1.0; 
                    dg_Mom(gst)[0][2] *= -1.0; 
                    dg_Mom(gst)[0][3] *= -1.0; 
                    dg_Mom(gst)[0][5] *= -1.0; 

                    // Compute average soln
                    Dens(gst) = 0.0;
                    Mom(gst)[0] = 0.0;
                    Mom(gst)[1] = 0.0;
                    Energy(gst) = 0.0;
                    for(indx = 0; indx < MAX_N_COEF; indx++)
                    {
                        Dens(gst) += dg_Dens(gst)[indx]*Lmass_matrix[0][indx];
                        Mom(gst)[0] += dg_Mom(gst)[0][indx]*Lmass_matrix[0][indx];
                        Mom(gst)[1] += dg_Mom(gst)[1][indx]*Lmass_matrix[0][indx];
                        Energy(gst) += dg_Energy(gst)[indx]*Lmass_matrix[0][indx];
                    }
                    Dens(gst) /= Lmass_matrix[0][0];
                    Mom(gst)[0] /= Lmass_matrix[0][0];
                    Mom(gst)[1] /= Lmass_matrix[0][0];
                    Energy(gst) /= Lmass_matrix[0][0];

                    // The subdomain zero level state, for reconstruction purpose (P1 projection)
                    assign(midsoln[nbtri[side]->id].st[0], gst, sizest);
                }
                else
                {
#if defined(__MPI__)
                    if(YES == tri_outside_db_Mach_boundary(nbtri[side]))
                    {
                        if(rk_step == RK_STEP)
                            gst = nbtri[side]->st;
                        else
                            gst = midsoln[nbtri[side]->id].st[rk_step];

                        assign(gst, st, sizest);
                        p1_L2projection_ver2(tri,gst,gst);
                        assign(midsoln[nbtri[side]->id].st[0], gst, sizest);
                    }
#else
                    if(rk_step == RK_STEP)
                        gst = nbtri[side]->st;
                    else
                        gst = midsoln[nbtri[side]->id].st[rk_step];

                    assign(gst, st, sizest);
                    // The subdomain zero level state, for reconstruction purpose (P1 projection)
                    // zero_moments(gst);
                    p1_L2projection_ver2(tri,gst,gst);
                    assign(midsoln[nbtri[side]->id].st[0], gst, sizest);
#endif // if defined(__MPI__)
                }
            }
        }
}

// Limiting the soln in interior
LOCAL void limiting_soln_with_buffer_tris(
        Front    *fr,
        Mid_soln *midsoln,
        int      rk_step,
        int      detect_extr,
        int      comput_time)
{
        RECT_GRID *gr = fr->rect_grid;
        float     *L = gr->L, *U = gr->U;
        // float     crsp_cent[3];
        TRI       *tri, *crsp_tri, *nbtri[3];
        SURFACE   **surf;
        int       dim = 2, i;
        double     *cent;
        size_t    sizest = fr->sizest;
        Locstate  st;
        float     max_dt;

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf);
             !at_end_of_tri_list(tri,*surf); tri = tri->next)
            {
                cent = fg_centroid(tri);
                if(tri->BC_type == SUBDOMAIN)
                    continue;

                if(Boundary_tri(tri) || tri_on_phy_bdry(tri))
                {
                    if(debugging("twod_riemann"))
                    {
                        attach_buffer_states(fr,midsoln,rk_step,tri);
                    }
                    else
                    {
                        if(rk_step == RK_STEP)
                            assign(midsoln[tri->id].st[0],tri->st,fr->sizest);
                        else
                            assign(midsoln[tri->id].st[0],midsoln[tri->id].st[rk_step],fr->sizest);
                        continue;
                    }
                }
                for(i = 0; i < 3; i++)
                    nbtri[i] = Tri_on_side(tri,i);
                // limiting the 2nd degree coefficients
                if(MAX_N_COEF == 10)
                {
                    limiting_3rd_degreeP3(tri,nbtri,midsoln,rk_step,detect_extr);
                    limiting_2nd_degreeP3(tri,nbtri,midsoln,rk_step);
                    limiting_1st_degreeP3(tri,nbtri,midsoln,rk_step,NO);
                }
                else if(MAX_N_COEF == 6)
                {
                    limiting_2nd_degree(tri,nbtri,midsoln,rk_step);
                    limiting_1st_degree(tri,nbtri,midsoln,rk_step);
                }
            }
        }

        if(debugging("Sod") || debugging("Lax"))
        {
            update_buffer_x_peri(fr,midsoln,0);
        }
        else if(debugging("shock_vortex"))
        {
            update_buffer_x_ref(fr,midsoln,0);
        }
        else if(debugging("v_evo") || debugging("Burgers"))
        {
            update_buffer(fr,midsoln,0);
            // update_buffer_x_per_y_ref(fr,midsoln,0);
            // update_buffer_x_ref(fr,midsoln,0);
            // update_buffer_x_peri(fr,midsoln,0);
        }

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf);
                 !at_end_of_tri_list(tri,*surf); tri = tri->next)
            {
                if(tri->BC_type == SUBDOMAIN)
                    continue;

                if(Boundary_tri(tri) || tri_on_phy_bdry(tri))
                {
                    if(debugging("g_sine") || debugging("v_evo") 
                       || debugging("Burgers") || debugging("twod_riemann"))
                    {
                        NULL;
                    }
                    else
                    {
                        zero_moments(midsoln[tri->id].st[0]);
                    }

                    if(debugging("twod_riemann"))
                    {
                        NULL;
                    } 
                    else
                    {
                        if(rk_step == RK_STEP)
                            st = tri->st;
                        else
                            st = midsoln[tri->id].st[rk_step];
                        assign(st,midsoln[tri->id].st[0],fr->sizest);
                        if(rk_step == RK_STEP)
                        {
                            max_dt = (*fr->_time_step_on_tri)(fr, tri);
                            if(max_dt < newdt) 
                                   time_on_tri = tri;
                            newdt = min(newdt, max_dt);
                        }
                        continue;
                    }
                }
                // update coefficient
                update_coef(tri,midsoln,rk_step,fr,fr->sizest,comput_time);
            }
        }
}

// Limiting the soln in interior
LOCAL void local_limiting_soln_with_buffer_tris(
        Front    *fr,
        Mid_soln *midsoln,
        int      rk_step)
{
        TRI       *tri, *crsp_tri, *nbtri[3];
        SURFACE   **surf;
        int       dim = 2, i, j, side;
        double     *cent;
        size_t    sizest = fr->sizest;
        Locstate  st2;
        float     max_dt;
        TRI       **limit_tris, **row_limit_tris[500];
        int       N_alloc = 800, N_row, N_use =0, N;

        TRI       **limit_tris2, **row_limit_tris2[500];
        int       N_alloc2 = 800, N_row2, N_use2 =0, N2;
        int       loop_num = 0, detect_extr = YES, comput_time = NO;

        vector(&limit_tris, N_alloc, sizeof(TRI*));
        row_limit_tris[0] = limit_tris;
        N_row = 1;

        if(! debugging("HRtwice"))
        {
            detect_extr = YES;
            comput_time = YES;
        }

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf);
                 !at_end_of_tri_list(tri,*surf); tri = tri->next)
            {
                if(tri->BC_type == SUBDOMAIN)
                    continue;

                if(Boundary_tri(tri) || tri_on_phy_bdry(tri))
                {
                    if(debugging("twod_riemann"))
                        attach_buffer_states(fr,midsoln,rk_step,tri);
                    if(debugging("db_Mach"))
                        db_Mach_attach_buffer_states(fr,midsoln,rk_step,tri);
                    if(debugging("shock_vort"))
                        shock_vort_attach_buffer_states(fr,midsoln,rk_step,tri);
                    if(debugging("Sod"))
                        shock_vort_attach_buffer_states(fr,midsoln,rk_step,tri);
                }

                if(overshoot_state_Shu_V(tri,midsoln,rk_step) == YES)
                {
                    limit_tris[N_use] = tri;
                    N_use++;  
                    if(N_use == N_alloc)
                    {
                        if(N_row +1 >= 500)
                        {
                            printf("ERROR: local_limiting_soln_with_buffer_tris, exceed alloc. limit\n");
                            clean_up(ERROR);
                        }
                        vector(&limit_tris, N_alloc, sizeof(TRI*));
                        row_limit_tris[N_row] = limit_tris;
                        N_row++;
                        N_use = 0;
                    }
                }
                else
                {
                    if(rk_step == RK_STEP)
                    {
                        max_dt = (*fr->_time_step_on_tri)(fr, tri);
                        if(max_dt < newdt)
                               time_on_tri = tri;
                        newdt = min(newdt, max_dt);
                        // states are saved there for redo highest degree
                        // if(!Boundary_tri(tri) && !tri_on_phy_bdry(tri))
                        assign(midsoln[tri->id].st[0],tri->st,fr->sizest);
                    }
                    else
                    {
                        // states are saved there for redo highest degree
                        // if(!Boundary_tri(tri) && !tri_on_phy_bdry(tri))
                        assign(midsoln[tri->id].st[0],midsoln[tri->id].st[rk_step],fr->sizest);
                    }
                }
            }
        }

        // TMP
        // printf("Limited size: alloc_row %d, N_alloc %d\n", N_row, N_use);
// DO HR twice
redo_HR:
        loop_num++;

        for(i = 0; i < N_row; i++)
        {
            if(i == N_row-1)
                N = N_use;
            else
                N = N_alloc;
            for(j = 0; j < N; j++)
            {
                for(side = 0; side < 3; side++)
                    nbtri[side] = Tri_on_side(row_limit_tris[i][j],side);
                if(MAX_N_COEF == 6)
                {
                    limiting_2nd_degree(row_limit_tris[i][j],nbtri,midsoln,rk_step);
                    limiting_1st_degree(row_limit_tris[i][j],nbtri,midsoln,rk_step);
                }
                else if(MAX_N_COEF == 10)
                {
                    limiting_3rd_degreeP3(row_limit_tris[i][j],nbtri,midsoln,rk_step,detect_extr);
                    limiting_2nd_degreeP3(row_limit_tris[i][j],nbtri,midsoln,rk_step);
                    limiting_1st_degreeP3(row_limit_tris[i][j],nbtri,midsoln,rk_step,NO);
                }
            }
        }

        // redo highest degree term using the reconstructed
        // polynomial to reduce overshoot
        /**
        for(i = 0; i < N_row; i++)
        {
            if(i == N_row-1)
                N = N_use;
            else
                N = N_alloc;
            for(j = 0; j < N; j++)
            {
                for(side = 0; side < 3; side++)
                    nbtri[side] = Tri_on_side(row_limit_tris[i][j],side);
                if(MAX_N_COEF == 6)
                {
                }
                else if(MAX_N_COEF == 10)
                    redo_limiting_3rd_degreeP3(row_limit_tris[i][j],nbtri,midsoln,rk_step);
            }
        }
        **/

        if(debugging("Sod") || debugging("Lax"))
        {
            // update_buffer_x_peri(fr,midsoln,0);
            update_buffer_x_ref(fr,midsoln,0);
        }
        else if(debugging("shock_vort"))
        {
            update_buffer_x_ref(fr,midsoln,0);
        }
        else if(debugging("v_evo") || debugging("Burgers"))
        {
            update_buffer(fr,midsoln,0);
            // update_buffer_x_per_y_ref(fr,midsoln,0);
            // update_buffer_x_ref(fr,midsoln,0);
            // update_buffer_x_peri(fr,midsoln,0);
        }

        for(i = 0; i < N_row; i++)
        {
            if(i == N_row-1)
                N = N_use;
            else
                N = N_alloc;
            for(j = 0; j < N; j++)
            {
                if(row_limit_tris[i][j]->redo_limiting == YES)
                    fix_unphysical_st(row_limit_tris[i][j],midsoln,0,fr);

                // update coefficient
                update_coef(row_limit_tris[i][j],midsoln,rk_step,fr,fr->sizest,comput_time);
            }
        }

        if(loop_num == 1 && debugging("HRtwice"))
        {
            detect_extr = YES; comput_time = YES;

#if defined(__MPI__)
            if(debugging("db_Mach"))
                update_db_Mach_buffer(fr,midsoln,rk_step);
#endif // if defined(__MPI__)

            for(i = 0; i < N_row; i++)
            {
                if(i == N_row-1) N = N_use;
                else N = N_alloc;
                for(j = 0; j < N; j++)
                {
                    if(Boundary_tri(row_limit_tris[i][j]) ||
                       tri_on_phy_bdry(row_limit_tris[i][j]))
                    {
                        if(debugging("twod_riemann"))
                            attach_buffer_states(fr,midsoln,rk_step,row_limit_tris[i][j]);
                        if(debugging("db_Mach"))
                            db_Mach_attach_buffer_states(fr,midsoln,rk_step,row_limit_tris[i][j]);
                        if(debugging("shock_vort"))
                            shock_vort_attach_buffer_states(fr,midsoln,rk_step,row_limit_tris[i][j]);
                        if(debugging("Sod"))
                            shock_vort_attach_buffer_states(fr,midsoln,rk_step,row_limit_tris[i][j]);
                    }
                }
            }
            goto redo_HR;
        }

        for(i = 0; i < N_row; i++)
            free(row_limit_tris[i]);
}

EXPORT void update_coef(
	 TRI       *tri,
         Mid_soln  *midsoln,
         int       rk_iter,
         Front     *fr,
         size_t    l_sizest,
         int       comput_time)
{
         Locstate    st, st2, nbst[3];
         int         i, dim = 2, indx, side;
         float       tmp_alpha;
         float       max_dt; 
         // TRI         *nbtri[3];

         if(rk_iter == RK_STEP)
             st = tri->st;
         else
             st = midsoln[tri->id].st[rk_iter];
         st2 = midsoln[tri->id].st[0];

         assign(st, st2, l_sizest);

         /*
         for(side = 0; side < 3; side++)
         {
             nbtri[side] = Tri_on_side(tri,side);
             if(rk_iter == RK_STEP)
                 nbst[side] = nbtri[side]->st;
             else
                 nbst[side] = midsoln[nbtri[side]].st[rk_iter];
         }
         */
         /*
         if(YES == find_tri(fg_centroid(tri)))
         {
             printf("IN update_coef, print HR solution of tri %d\n", tri->id);
             verbose_print_state("HR state",st);
         }
         */

         if(N_EQN != 1)
         {
             if(isnan(Dens(st)) || isnan(Mom(st)[0]) ||
                isnan(Mom(st)[1]) || isnan(Energy(st)))
             {
                 printf("ERROR: update_coef()\n");
                 printf("nan state detected\n");
                 printf("Average state of TRI (%d) after limiting:\n", tri->id);
                 print_general_vector("Tri_pt", Coords(Point_of_tri(tri)[0]), dim, "\n");
                 print_general_vector("Tri_pt", Coords(Point_of_tri(tri)[1]), dim, "\n");
                 print_general_vector("Tri_pt", Coords(Point_of_tri(tri)[2]), dim, "\n");
                 clean_up(ERROR);
             }
         }

         if(comput_time == NO)
             return;
         if(rk_iter == RK_STEP)
         {
             max_dt = (*fr->_time_step_on_tri)(fr, tri);
             if(max_dt < newdt)
                    time_on_tri = tri;
             newdt = min(newdt, max_dt); 
         }
}
         

// average of 2nd degree terms of polynomial
EXPORT void R_degree2_term_average(
         TRI       *tri,
         Locstate  st,
         float     *ave)
{
         float      area;
         int        dim = 2;
         double **Lmass_matrix = tri->Lmass_matrix;

         area = Lmass_matrix[0][0];
         ave[0] =(dg_Dens(st)[3]*Lmass_matrix[0][3] + 
                  dg_Dens(st)[4]*Lmass_matrix[0][4] +
                  dg_Dens(st)[5]*Lmass_matrix[0][5])/area; 
         ave[1] =(dg_Mom(st)[0][3]*Lmass_matrix[0][3] + 
                  dg_Mom(st)[0][4]*Lmass_matrix[0][4] +
                  dg_Mom(st)[0][5]*Lmass_matrix[0][5])/area; 
         ave[2] =(dg_Mom(st)[1][3]*Lmass_matrix[0][3] + 
                  dg_Mom(st)[1][4]*Lmass_matrix[0][4] +
                  dg_Mom(st)[1][5]*Lmass_matrix[0][5])/area; 
         ave[3] =(dg_Energy(st)[3]*Lmass_matrix[0][3] + 
                  dg_Energy(st)[4]*Lmass_matrix[0][4] +
                  dg_Energy(st)[5]*Lmass_matrix[0][5])/area; 
}

// average of 2nd degree terms of polynomial
EXPORT void R_degree2_term_average_Liu(
         TRI       *tri,
         Locstate  st,
         double **mass_1st_row,
         float     *ave)
{
         float      area;

         area = mass_1st_row[0][0];
         ave[0] =(dg_Dens(st)[3]*mass_1st_row[0][3] + 
                  dg_Dens(st)[4]*mass_1st_row[0][4] +
                  dg_Dens(st)[5]*mass_1st_row[0][5])/area; 
         ave[1] =(dg_Mom(st)[0][3]*mass_1st_row[0][3] + 
                  dg_Mom(st)[0][4]*mass_1st_row[0][4] +
                  dg_Mom(st)[0][5]*mass_1st_row[0][5])/area; 
         ave[2] =(dg_Mom(st)[1][3]*mass_1st_row[0][3] + 
                  dg_Mom(st)[1][4]*mass_1st_row[0][4] +
                  dg_Mom(st)[1][5]*mass_1st_row[0][5])/area; 
         ave[3] =(dg_Energy(st)[3]*mass_1st_row[0][3] + 
                  dg_Energy(st)[4]*mass_1st_row[0][4] +
                  dg_Energy(st)[5]*mass_1st_row[0][5])/area; 
}

// average of d_x u polynomial
EXPORT void u_x_average(
         TRI       *tri,
         Locstate  st,
         float     *uxave)
{
         /*
         float      area;
         int        dim = 2;
         double **Lmass_matrix = tri->Lmass_matrix;

         // comp_mass_matrix_1st_row(MAX_N_COEF,tri,dim,Lmass_matrix);
         area = Lmass_matrix[0][0];
          
         uxave[0] = dg_Dens(st)[1] + 2.0*dg_Dens(st)[3]*Lmass_matrix[0][1]/area+
                     dg_Dens(st)[4]*Lmass_matrix[0][2]/area;
         uxave[1] = dg_Mom(st)[0][1] + 2.0*dg_Mom(st)[0][3]*Lmass_matrix[0][1]/area+
                     dg_Mom(st)[0][4]*Lmass_matrix[0][2]/area;
         uxave[2] = dg_Mom(st)[1][1] + 2.0*dg_Mom(st)[1][3]*Lmass_matrix[0][1]/area+
                     dg_Mom(st)[1][4]*Lmass_matrix[0][2]/area;
         uxave[3] = dg_Energy(st)[1] + 2.0*dg_Energy(st)[3]*Lmass_matrix[0][1]/area+
                     dg_Energy(st)[4]*Lmass_matrix[0][2]/area;
         */
         uxave[0] = dg_Dens(st)[1];
         uxave[1] = dg_Mom(st)[0][1];
         uxave[2] = dg_Mom(st)[1][1];
         uxave[3] = dg_Energy(st)[1];
}

// average of d_y u polynomial
EXPORT void u_y_average(
         TRI       *tri,
         Locstate  st,
         float     *uyave)
{
         /*
         float      area;
         int        dim = 2;
         double **Lmass_matrix = tri->Lmass_matrix;

         // comp_mass_matrix_1st_row(MAX_N_COEF,tri,dim,Lmass_matrix);
         area = Lmass_matrix[0][0];

         uyave[0] = dg_Dens(st)[2] + dg_Dens(st)[4]*Lmass_matrix[0][1]/area+
                     2.0*dg_Dens(st)[5]*Lmass_matrix[0][2]/area;
         uyave[1] = dg_Mom(st)[0][2] + dg_Mom(st)[0][4]*Lmass_matrix[0][1]/area+
                     2.0*dg_Mom(st)[0][5]*Lmass_matrix[0][2]/area;
         uyave[2] = dg_Mom(st)[1][2] + dg_Mom(st)[1][4]*Lmass_matrix[0][1]/area+
                     2.0*dg_Mom(st)[1][5]*Lmass_matrix[0][2]/area;
         uyave[3] = dg_Energy(st)[2] + dg_Energy(st)[4]*Lmass_matrix[0][1]/area+
                     2.0*dg_Energy(st)[5]*Lmass_matrix[0][2]/area;
         */
         uyave[0] = dg_Dens(st)[2];
         uyave[1] = dg_Mom(st)[0][2];
         uyave[2] = dg_Mom(st)[1][2];
         uyave[3] = dg_Energy(st)[2]; 
}


// Reconstructed coeffs. are stored in RK_STEP[0]
LOCAL void limiting_3rd_degreeP3(
	 TRI       *tri,
         TRI       *nbtri[3], 
         Mid_soln  *midsoln,
         int       rk_iter,
         int       detect_extr)
{
         Locstate st, nbst[3], st2;
         float    uxxave[4], nbuxxave[3][4];
         float    uxyave[4], nbuxyave[3][4];
         float    uyyave[4], nbuyyave[3][4];
         int      i, dim = 2, k;
         double   *cent, *nbcent[3];
         float    rside[3], Axx[3][2][2], Axy[3][2][2], Ayy[3][2][2], 
                     rside2[3]; 
         float    coefxx[6][2], coefxy[6][2], coefyy[6][2];
         float    u6, u7, u8, u9, u7_0, u7_1, u8_0, u8_1, 
                   avg3, avg4, avg5, arrya[9], arryb[9], w[9];
         double **Lmass_matrix = tri->Lmass_matrix;
         float    eps = 0.9;
         float    dirx[3], diry[3], theta[3]; // dirx, diry: cos of the angle
         float    ud[3][2]; // soln uxx, uxy, uyy computed using one line
         int      idirx, idiry;
         float    c_num_xx[6], c_num_xy[6], c_num_yy[6]; // condition number of stencils
         float    diam, sqr_diam, sqr_sqr_diam, sv_coef[6];
         int      is_bad_stenxx[4], is_bad_stenxy[4], is_bad_stenyy[4];
         float    A_edgexx[3][2], A_edgexy[3][2], A_edgeyy[3][2], ls_soln1[3], ls_Axx[3][2],
                  ls_Axy[3][2], ls_Ayy[3][2], mid[3][2];
         float    wxx[3], wxy[3], wyy[3];

         if(rk_iter == RK_STEP)
         {
             st = tri->st;
             nbst[0] = nbtri[0]->st;
             nbst[1] = nbtri[1]->st;
             nbst[2] = nbtri[2]->st;
         }
         else
         {
             st = midsoln[tri->id].st[rk_iter];
             for(i = 0; i < 3; i++)
                 nbst[i] = midsoln[nbtri[i]->id].st[rk_iter];
         }

         // TMP
         /*
         if(YES == find_tri(fg_centroid(tri)))
         {
	     printf("\n\n---------*******************--------------\n");
             printf("TRI[%d] entered limiting_3rd_degreeP3\n",tri->id);

             printf("print neighbr [%d] (%g %g) states:", nbtri[0]->id,
                    fg_centroid(nbtri[0])[0], fg_centroid(nbtri[0])[1]);
             verbose_print_state("state:",nbst[0]); 
             printf("print neighbr [%d] (%g %g) states:", nbtri[1]->id,
                    fg_centroid(nbtri[1])[0], fg_centroid(nbtri[1])[1]);
             verbose_print_state("state:",nbst[1]); 
             printf("print neighbr [%d] (%g %g) states:", nbtri[2]->id,
                    fg_centroid(nbtri[2])[0], fg_centroid(nbtri[2])[1]);
             verbose_print_state("state:",nbst[2]); 
         }
         */

         st2 = midsoln[tri->id].st[0];
         Set_params(st2,st);
         set_type_of_state(st2,state_type(st));

         diam = fg_diam(tri);
         sqr_diam = sqr(diam);
         sqr_sqr_diam = sqr(sqr_diam);

         cent = fg_centroid(tri);
         for(i = 0; i < 3; i++)
             nbcent[i] = fg_centroid(nbtri[i]);

         u_average_indx(tri,st,3,uxxave);
         u_average_indx(tri,st,4,uxyave);
         u_average_indx(tri,st,5,uyyave);
         for(k = 0; k < N_EQN; k++)
         {
             uxxave[k] *= 2.0;
             uyyave[k] *= 2.0;
         }
         
         for(i = 0; i < 3; i++)
         {
             u_average_indx(nbtri[i],nbst[i],3,nbuxxave[i]);
             u_average_indx(nbtri[i],nbst[i],4,nbuxyave[i]);
             u_average_indx(nbtri[i],nbst[i],5,nbuyyave[i]);
             for(k = 0; k < N_EQN; k++)
             {
                 nbuxxave[i][k] *= 2.0;
                 nbuyyave[i][k] *= 2.0;
             }
         }

         if(detect_extr == YES)
         {
             extrema_detec(uxxave,nbuxxave,is_bad_stenxx);
             extrema_detec(uxyave,nbuxyave,is_bad_stenxy);
             extrema_detec(uyyave,nbuyyave,is_bad_stenyy);
         }
         else
         {
             for(k = 0; k < N_EQN; k++)
                 is_bad_stenxx[k] = is_bad_stenxy[k] = is_bad_stenyy[k] = NO;
         }

         for(i = 0; i < 3; i++)
         {
             Axx[i][0][0] = 6.0*(nbcent[i][0]-cent[0]);
             Axx[i][0][1] = 2.0*(nbcent[i][1]-cent[1]);
             Axx[i][1][0] = 6.0*(nbcent[(i+1)%3][0]-cent[0]);
             Axx[i][1][1] = 2.0*(nbcent[(i+1)%3][1]-cent[1]);
             // ls_Axx[i][0] = 6.0*(nbcent[i][0]-cent[0]);
             // ls_Axx[i][1] = 2.0*(nbcent[i][1]-cent[1]);
             c_num_xx[i] = cond_num(Axx[i]); 

             Axy[i][0][0] = 2.0*(nbcent[i][0]-cent[0]);
             Axy[i][0][1] = 2.0*(nbcent[i][1]-cent[1]);
             Axy[i][1][0] = 2.0*(nbcent[(i+1)%3][0]-cent[0]);
             Axy[i][1][1] = 2.0*(nbcent[(i+1)%3][1]-cent[1]);
             // ls_Axy[i][0] = 2.0*(nbcent[i][0]-cent[0]);
             // ls_Axy[i][1] = 2.0*(nbcent[i][1]-cent[1]);
             c_num_xy[i] = cond_num(Axy[i]); 

             Ayy[i][0][0] = 2.0*(nbcent[i][0]-cent[0]);
             Ayy[i][0][1] = 6.0*(nbcent[i][1]-cent[1]);
             Ayy[i][1][0] = 2.0*(nbcent[(i+1)%3][0]-cent[0]);
             Ayy[i][1][1] = 6.0*(nbcent[(i+1)%3][1]-cent[1]);
             // ls_Ayy[i][0] = 2.0*(nbcent[i][0]-cent[0]);
             // ls_Ayy[i][1] = 6.0*(nbcent[i][1]-cent[1]);
             c_num_yy[i] = cond_num(Ayy[i]); 
         }
         if(debugging("3rd_cent_bias"))
         {
             unify_weight(c_num_xx, 3, wxx); // For center biased limiting
             unify_weight(c_num_xy, 3, wxy); // For center biased limiting
             unify_weight(c_num_yy, 3, wyy); // For center biased limiting
         }
         /**
         for(i = 0; i < 3; i++)
         {
             mid[i][0] = (Coords(Point_of_tri(tri)[i])[0] + Coords(Point_of_tri(tri)[(i+1)%3])[0])*0.5;
             mid[i][1] = (Coords(Point_of_tri(tri)[i])[1] + Coords(Point_of_tri(tri)[(i+1)%3])[1])*0.5;
             A_edgexx[i][0] = 6.0*(mid[i][0] - cent[0]);
             A_edgexx[i][1] = 2.0*(mid[i][1] - cent[1]);
             A_edgexy[i][0] = 2.0*(mid[i][0] - cent[0]);
             A_edgexy[i][1] = 2.0*(mid[i][1] - cent[1]);
             A_edgeyy[i][0] = 2.0*(mid[i][0] - cent[0]);
             A_edgeyy[i][1] = 6.0*(mid[i][1] - cent[1]);
         }
         **/

         ////// TMP, remove effect of condition number
         // for(i = 0; i < 3; i++)
             // c_num_xx[i] = c_num_xy[i] = c_num_yy[i] = w[i] = 1.0/3.0;

         for(k = 0; k < N_EQN; k++)
         {
             // tri, nb0, nb1 // tri, nb1, nb2 // tri, nb2, nb0
             for(i = 0; i < 3; i++)
             {
                 rside[0] = nbuxxave[i][k] - uxxave[k];
                 rside[1] = nbuxxave[(i+1)%3][k] - uxxave[k];
                 comp_coef(Axx[i],rside,coefxx[i]);

                 rside[0] = nbuxyave[i][k] - uxyave[k];
                 rside[1] = nbuxyave[(i+1)%3][k] - uxyave[k];
                 comp_coef(Axy[i],rside,coefxy[i]);

                 rside[0] = nbuyyave[i][k] - uyyave[k];
                 rside[1] = nbuyyave[(i+1)%3][k] - uyyave[k];
                 comp_coef(Ayy[i],rside,coefyy[i]);
             }

             /////// limit by edge center values
             /** fail to keep accuracy
             limit_by_edge_cent_val(coefxx,A_edgexx,rside);
             u6 = rside[0]; u7_0 = rside[1];
             limit_by_edge_cent_val(coefxy,A_edgexy,rside);
             u7_1 = rside[0]; u8_0 = rside[1];
             limit_by_edge_cent_val(coefyy,A_edgeyy,rside);
             u8_1 = rside[0]; u9 = rside[1];
             u7 = minmod((1+0.05)*minmod(u7_0,u7_1), 0.5*(u7_0 + u7_1)); // eps = 0.05, 0.01
             u8 = minmod((1+0.05)*minmod(u8_0,u8_1), 0.5*(u8_0 + u8_1));
             */
             /////// END of limit by edge center values

             /////////////////// Least_square + WENO weight
             /** it seems that negative weight can not be avoided.
             for(i = 0; i < 3; i++)
                 rside2[i] = nbuxxave[i][k] - uxxave[k];
             weno_weight_ls(coefxx, ls_Axx, rside2, A_edgexx, 3, ls_soln1);
             u6 = ls_soln1[0]; u7_0 = ls_soln1[1];

             for(i = 0; i < 3; i++)
                 rside2[i] = nbuxyave[i][k] - uxyave[k];
             weno_weight_ls(coefxy, ls_Axy, rside2, A_edgexy, 3, ls_soln1);
             u7_1 = ls_soln1[0]; u8_0 = ls_soln1[1];

             for(i = 0; i < 3; i++)
                 rside2[i] = nbuyyave[i][k] - uyyave[k];
             weno_weight_ls(coefyy, ls_Ayy, rside2, A_edgeyy, 3, ls_soln1);
             u8_1 = ls_soln1[0]; u9 = ls_soln1[1];

             u7 = minmod((1+0.05)*minmod(u7_0,u7_1), 0.5*(u7_0 + u7_1)); // eps = 0.05, 0.01
             u8 = minmod((1+0.05)*minmod(u8_0,u8_1), 0.5*(u8_0 + u8_1));
             **/
             /////////////////// End  Least_square + WENO weight
             //////////////// harmonic weight
             /***
             ///// u_xx polynomial
             for(i = 0; i < 3; i++)
             {
                 arrya[i] = coefxx[i][0];
                 arryb[i] = coefxx[i][1];
             }
             // harmonic_mod(arrya,c_num_xx,3,w);
             u6   = w[0]*coefxx[0][0] + w[1]*coefxx[1][0] + w[2]*coefxx[2][0];
             // harmonic_mod(arryb,c_num_xx,3,w);
             u7_0 = w[0]*coefxx[0][1] + w[1]*coefxx[1][1] + w[2]*coefxx[2][1];
             if(is_bad_stenxx[k] == YES)
             {
                 u6 = 0.0; u7_0 = 0.0;
             }
             ///// u_xy polynomial
             for(i = 0; i < 3; i++)
             {
                 arrya[i] = coefxy[i][0];
                 arryb[i] = coefxy[i][1];
             }
             // harmonic_mod(arrya,c_num_xy,3,w);
             u7_1 = w[0]*coefxy[0][0] + w[1]*coefxy[1][0] + w[2]*coefxy[2][0];
             // harmonic_mod(arryb,c_num_xy,3,w);
             u8_0 = w[0]*coefxy[0][1] + w[1]*coefxy[1][1] + w[2]*coefxy[2][1];
             if(is_bad_stenxy[k] == YES)
             {
                 u7_1 = 0.0; u8_0 = 0.0;
             }
             ///// u_yy polynomial
             for(i = 0; i < 3; i++)
             {
                 arrya[i] = coefyy[i][0];
                 arryb[i] = coefyy[i][1];
             }
             // harmonic_mod(arrya,c_num_yy,3,w);
             u8_1 = w[0]*coefyy[0][0] + w[1]*coefyy[1][0] + w[2]*coefyy[2][0];
             // harmonic_mod(arryb,c_num_yy,3,w);
             u9   = w[0]*coefyy[0][1] + w[1]*coefyy[1][1] + w[2]*coefyy[2][1];
             if(is_bad_stenyy[k] == YES)
             {
                 u8_1 = 0.0; u9 = 0.0;
             }
             u7 = minmod2((1+0.05)*minmod2(u7_0,u7_1), 0.5*(u7_0 + u7_1)); // eps = 0.05, 0.01
             u8 = minmod2((1+0.05)*minmod2(u8_0,u8_1), 0.5*(u8_0 + u8_1));
             *****/ 
             /////////// End harmonic weight
             /////////////////// WENO
             ///// u_xx polynomial
             for(i = 0; i < 3; i++)
             {
                 arrya[i] = coefxx[i][0];
                 arryb[i] = coefxx[i][1];
             }
             WENO_mod_on_3rd(arrya,arryb,c_num_xx,3,diam,w);
             // WENO_mod_on_3rd(arrya,arryb,c_num_xx,3,1.0,w);
             // WENO_mod_cand1_P3(arrya,c_num_xx,3,sqr_sqr_diam,w);
             u6   = w[0]*coefxx[0][0] + w[1]*coefxx[1][0] + w[2]*coefxx[2][0];
             // WENO_mod_cand1_P3(arryb,c_num_xx,3,sqr_sqr_diam,w);
             u7_0 = w[0]*coefxx[0][1] + w[1]*coefxx[1][1] + w[2]*coefxx[2][1];

             if(is_bad_stenxx[k] == YES && detect_extr == YES)
             {
                 u6 = 0.0; u7_0 = 0.0;
             }

             ///// u_xy polynomial
             for(i = 0; i < 3; i++)
             {
                 arrya[i] = coefxy[i][0];
                 arryb[i] = coefxy[i][1];
             }
             WENO_mod_on_3rd(arrya,arryb,c_num_xy,3,diam,w);
             // WENO_mod_on_3rd(arrya,arryb,c_num_xy,3,1.0,w);
             // WENO_mod_cand1_P3(arrya,c_num_xy,3,sqr_sqr_diam,w);
             u7_1 = w[0]*coefxy[0][0] + w[1]*coefxy[1][0] + w[2]*coefxy[2][0];
             // WENO_mod_cand1_P3(arryb,c_num_xy,3,sqr_sqr_diam,w);
             u8_0 = w[0]*coefxy[0][1] + w[1]*coefxy[1][1] + w[2]*coefxy[2][1];

             if(is_bad_stenxy[k] == YES && detect_extr == YES)
             {
                 u7_1 = 0.0; u8_0 = 0.0;
             }
             ///// u_yy polynomial
             for(i = 0; i < 3; i++)
             {
                 arrya[i] = coefyy[i][0];
                 arryb[i] = coefyy[i][1];
             }
             WENO_mod_on_3rd(arrya,arryb,c_num_yy,3,diam,w);
             // WENO_mod(arrya,arryb,c_num_yy,3,1.0,w);
             // WENO_mod_cand1_P3(arrya,c_num_yy,3,sqr_sqr_diam,w);
             u8_1 = w[0]*coefyy[0][0] + w[1]*coefyy[1][0] + w[2]*coefyy[2][0];
             // WENO_mod_cand1_P3(arryb,c_num_yy,3,sqr_sqr_diam,w);
             u9   = w[0]*coefyy[0][1] + w[1]*coefyy[1][1] + w[2]*coefyy[2][1];

             if(is_bad_stenyy[k] == YES && detect_extr == YES)
             {
                 u8_1 = 0.0; u9 = 0.0;
             }

             u7 = minmod2((1+0.05)*minmod2(u7_0,u7_1), 0.5*(u7_0 + u7_1)); // eps = 0.05, 0.01
             u8 = minmod2((1+0.05)*minmod2(u8_0,u8_1), 0.5*(u8_0 + u8_1));
             // sv_coef[0] = u6; sv_coef[1] = u7;  
             // sv_coef[2] = u8; sv_coef[3] = u9;  
             /////////////////// End WENO
             ///////////////////// center_biased
             if(debugging("3rd_cent_bias"))
             {
                 avg3 = (wxx[0]*coefxx[0][0] + wxx[1]*coefxx[1][0] + wxx[2]*coefxx[2][0]);
                 u6 = minmod(coefxx[0][0],coefxx[1][0]);
                 u6 = minmod(coefxx[2][0],u6);
                 u6 = minmod(((1+eps)*u6), avg3);
                 avg3 = (wxx[0]*coefxx[0][1] + wxx[1]*coefxx[1][1] + wxx[2]*coefxx[2][1]);
                 u7_0 = minmod(coefxx[0][1],coefxx[1][1]);
                 u7_0 = minmod(coefxx[2][1],u7_0);
                 u7_0 = minmod(((1+eps)*u7_0), avg3);

                 avg3 = (wxy[0]*coefxy[0][0] + wxy[1]*coefxy[1][0] + wxy[2]*coefxy[2][0]);
                 u7_1 = minmod(coefxy[0][0],coefxy[1][0]);
                 u7_1 = minmod(coefxy[2][0],u7_1);
                 u7_1 = minmod(((1+eps)*u7_1), avg3);
                 avg3 = (wxy[0]*coefxy[0][1] + wxy[1]*coefxy[1][1] + wxy[2]*coefxy[2][1]);
                 u8_0 = minmod(coefxy[0][1],coefxy[1][1]);
                 u8_0 = minmod(coefxy[2][1],u8_0);
                 u8_0 = minmod(((1+eps)*u8_0), avg3);

                 avg3 = (wyy[0]*coefyy[0][0] + wyy[1]*coefyy[1][0] + wyy[2]*coefyy[2][0]);
                 u8_1 = minmod(coefyy[0][0],coefyy[1][0]);
                 u8_1 = minmod(coefyy[2][0],u8_1);
                 u8_1 = minmod(((1+eps)*u8_1), avg3);

                 avg3 = (wyy[0]*coefyy[0][1] + wyy[1]*coefyy[1][1] + wyy[2]*coefyy[2][1]);
                 u9 = minmod(coefyy[0][1],coefyy[1][1]);
                 u9 = minmod(coefyy[2][1],u9);
                 u9 = minmod(((1+eps)*u9), avg3);
                 u7 = minmod2((1+0.05)*minmod2(u7_0,u7_1), 0.5*(u7_0 + u7_1)); // eps = 0.05, 0.01
                 u8 = minmod2((1+0.05)*minmod2(u8_0,u8_1), 0.5*(u8_0 + u8_1));
             }
             //////////////////// END center_biased
             ///////////// WENO, weight is added to each component of the polynomials
             /**
             ///// u_xx polynomial
             for(i = 0; i < 3; i++)
             {
                 arrya[i] = coefxx[i][0];
                 arryb[i] = coefxx[i][1];
             }
             for(i = 3; i < 6; i++)
             {
                 arryb[i] = coefxy[i-3][0];
                 c_num_xx[i] = c_num_xy[i-3];
             }
             WENO_mod_cand1_P3(arrya,c_num_xx,3,sqr_sqr_diam,w);
             u6   = w[0]*coefxx[0][0] + w[1]*coefxx[1][0] + w[2]*coefxx[2][0];
             // u6   = (coefxx[0][0] + coefxx[1][0] + coefxx[2][0])/3.0;
             
             WENO_mod_cand1_P3(arryb,c_num_xx,6,sqr_sqr_diam,w);
             u7 = w[0]*coefxx[0][1] + w[1]*coefxx[1][1] + w[2]*coefxx[2][1] +
                  w[4]*coefxy[0][0] + w[5]*coefxy[1][0] + w[6]*coefxy[2][0];
             // u7 = (coefxx[0][1] + coefxx[1][1] + coefxx[2][1] +
             //       coefxy[0][0] + coefxy[1][0] + coefxy[2][0])/6.0;

             ///// u_xy polynomial
             for(i = 0; i < 3; i++)
             {
                 arryb[i] = coefxy[i][1];
             }
             for(i = 3; i < 6; i++)
             {
                 arryb[i] = coefyy[i-3][0];
                 c_num_xy[i] = c_num_yy[i-3]; 
             }
             WENO_mod_cand1_P3(arryb,c_num_xy,6,sqr_sqr_diam,w);
             u8 = w[0]*coefxy[0][1] + w[1]*coefxy[1][1] + w[2]*coefxy[2][1] +
                  w[3]*coefyy[0][0] + w[4]*coefyy[1][0] + w[5]*coefyy[2][0];
             // u8 = (coefxy[0][1] + coefxy[1][1] + coefxy[2][1] +
             //       coefyy[0][0] + coefyy[1][0] + coefyy[2][0])/6.0;

             ///// u_yy polynomial
             for(i = 0; i < 3; i++)
             {
                 arryb[i] = coefyy[i][1];
             }
             WENO_mod_cand1_P3(arryb,c_num_yy,3,sqr_sqr_diam,w);
             u9   = w[0]*coefyy[0][1] + w[1]*coefyy[1][1] + w[2]*coefyy[2][1];
             // u9   = (coefyy[0][1] + coefyy[1][1] + coefyy[2][1])/3.0;
             **/
             /////////// End WENO, weight is added to each component of the polynomials
             switch(k)
             {
             case 0:
                 dg_Dens(st2)[6] = u6;
                 dg_Dens(st2)[7] = u7;
                 dg_Dens(st2)[8] = u8;
                 dg_Dens(st2)[9] = u9;
             break;
             case 1:
                 dg_Mom(st2)[0][6] = u6;
                 dg_Mom(st2)[0][7] = u7;
                 dg_Mom(st2)[0][8] = u8;
                 dg_Mom(st2)[0][9] = u9;
             break;
             case 2:
                 dg_Mom(st2)[1][6] = u6;
                 dg_Mom(st2)[1][7] = u7;
                 dg_Mom(st2)[1][8] = u8;
                 dg_Mom(st2)[1][9] = u9;
             break;
             case 3:
                 dg_Energy(st2)[6] = u6;
                 dg_Energy(st2)[7] = u7;
                 dg_Energy(st2)[8] = u8;
                 dg_Energy(st2)[9] = u9;
             break;
             }
         }
}

// Reconstructed coeffs. are stored in RK_STEP[0]
LOCAL void limiting_2nd_degreeP3(
         TRI       *tri,
         TRI       *nbtri[3],
         Mid_soln  *midsoln,
         int       rk_iter)
{
         Locstate st, nbst[3], st2, nbst2[3];
         float    uxave[4], nbuxave[3][4], uyave[4], nbuyave[3][4];
         float    Rxave[4], nbRxave[3][4], Ryave[4], nbRyave[3][4];
         float    Lxave[4], nbLxave[3][4], Lyave[4], nbLyave[3][4];
         int      i, dim = 2, indx, k;
         double    *cent, *nbcent[3];
         float    rside[3], Ax[3][2][2], Ay[3][2][2], rside2[3], least_soln1[3];
         float    coefx[3][2], coefy[3][2];
         float    u3, u4, u5, u4_0, u4_1, avg1, avg2, wx[9], wy[9], arrya[9], arryb[9], w[9];
         double **Lmass_matrix = tri->Lmass_matrix;
         float    eps = 1.5, diam, sqr_diam, cub_diam; // 0.5, 0.8 for all_cent
         int      idir;
         float    c_num_x[5], c_num_y[5], Shu_ux[3][4], Shu_uy[3][4];
         float    re_val[3][4]; // remainder values at mitpts on edges
         float    mid[3][2], ux_range[2], uy_range[2]; //midpt on side
         float    Ax_edge[3][2], Ay_edge[3][2], sv_coef[6];

         if(rk_iter == RK_STEP)
         {
             st = tri->st;
             nbst[0] = nbtri[0]->st;
             nbst[1] = nbtri[1]->st;
             nbst[2] = nbtri[2]->st;
         }
         else
         {
             st = midsoln[tri->id].st[rk_iter];
             for(i = 0; i < 3; i++)
                 nbst[i] = midsoln[nbtri[i]->id].st[rk_iter];
         }

         // TMP
         // printf("u_xx, u_xy, u_yy origin coeff: %g %g %g\n",
         //        dg_Dens(st)[3], dg_Dens(st)[4],dg_Dens(st)[5]);
         // END TMP
         st2 = midsoln[tri->id].st[0];
         diam = fg_diam(tri);
         sqr_diam = sqr(diam);
         cub_diam = cub(diam);

         u_average_indx(tri,st,1,uxave);
         u_average_indx(tri,st,2,uyave);
         R_degree3_term_averageP3(tri,st,tri->Lmass_matrix,0,Rxave);
         R_degree3_term_averageP3(tri,st,tri->Lmass_matrix,1,Ryave);
         for(k = 0; k < N_EQN; k++)
         {
             uxave[k] += Rxave[k];
             uyave[k] += Ryave[k];
         }
         R_degree3_term_averageP3(tri,st2,tri->Lmass_matrix,0,Rxave);
         R_degree3_term_averageP3(tri,st2,tri->Lmass_matrix,1,Ryave);

         // 3 neighbor tris
         for(i = 0; i < 3; i++)
         {
             u_average_indx(nbtri[i],nbst[i],1,nbuxave[i]);
             u_average_indx(nbtri[i],nbst[i],2,nbuyave[i]);
             R_degree3_term_averageP3(nbtri[i],nbst[i],nbtri[i]->Lmass_matrix,0,nbRxave[i]);
             R_degree3_term_averageP3(nbtri[i],nbst[i],nbtri[i]->Lmass_matrix,1,nbRyave[i]);
             for(k = 0; k < N_EQN; k++)
             {
                 nbuxave[i][k] += nbRxave[i][k];
                 nbuyave[i][k] += nbRyave[i][k];
             }

             comp_mass_matrix_1st_row(MAX_N_COEF,nbtri[i],dim,fg_centroid(tri),mass_1st_rows[i]);
             R_degree3_term_averageP3(nbtri[i],st2,mass_1st_rows[i],0,nbRxave[i]);
             R_degree3_term_averageP3(nbtri[i],st2,mass_1st_rows[i],1,nbRyave[i]);
         }

         for(k = 0; k < N_EQN; k++)
         {
             Lxave[k] = uxave[k]-Rxave[k];
             Lyave[k] = uyave[k]-Ryave[k];
             for(i = 0; i < 3; i++)
             {
                 nbLxave[i][k] = nbuxave[i][k]-nbRxave[i][k];
                 nbLyave[i][k] = nbuyave[i][k]-nbRyave[i][k];
             }
         }
         cent = fg_centroid(tri);
         for(i = 0; i < 3; i++)
             nbcent[i] = fg_centroid(nbtri[i]);

         for(i = 0; i < 3; i++)
         {
             Ax[i][0][0] = 2.0*(nbcent[i][0]-cent[0]);
             Ax[i][0][1] =     (nbcent[i][1]-cent[1]);
             Ax[i][1][0] = 2.0*(nbcent[(i+1)%3][0]-cent[0]);
             Ax[i][1][1] =     (nbcent[(i+1)%3][1]-cent[1]);
             c_num_x[i] = cond_num(Ax[i]); // c_num_x[i] = 1.0;
             Ay[i][0][0] =     (nbcent[i][0]-cent[0]);
             Ay[i][0][1] = 2.0*(nbcent[i][1]-cent[1]);
             Ay[i][1][0] =     (nbcent[(i+1)%3][0]-cent[0]);
             Ay[i][1][1] = 2.0*(nbcent[(i+1)%3][1]-cent[1]);
             c_num_y[i] = cond_num(Ay[i]); // c_num_y[i] = 1.0;
         }

         if(debugging("cent_bias"))
         {
             unify_weight(c_num_x, 3, wx); // For center biased limiting
             unify_weight(c_num_y, 3, wy); // For center biased limiting
         }

         // TMP
         // printf("c_num_x[%g %g %g]\n", c_num_x[0], c_num_x[1], c_num_x[2]);
         // printf("c_num_y[%g %g %g]\n", c_num_y[0], c_num_y[1], c_num_y[2]);

         // TMP
         // for(i = 0; i < 3; i++)
         //     c_num_x[i] = c_num_y[i] = wx[i] = wy[i] = w[i] = 1.0/3.0;

         /**
         for(i = 0; i < 3; i++)
         {
             mid[i][0] = (Coords(Point_of_tri(tri)[i])[0] + Coords(Point_of_tri(tri)[(i+1)%3])[0])*0.5;
             mid[i][1] = (Coords(Point_of_tri(tri)[i])[1] + Coords(Point_of_tri(tri)[(i+1)%3])[1])*0.5;
             Ax_edge[i][0] = 2.0*(mid[i][0] - cent[0]);
             Ax_edge[i][1] = (mid[i][1] - cent[1]);
             Ay_edge[i][0] = (mid[i][0] - cent[0]);
             Ay_edge[i][1] = 2.0*(mid[i][1] - cent[1]);
         }
         **/

         /**
         {
             float tmpuxx[4], tmpuxy_0[4], tmpuxy_1[4], tmpuyy[4];
             Shu_V_sten_coeff(tri, midsoln, rk_iter, Lxave, nbLxave, tmpuxx, tmpuxy_0);
             Shu_V_sten_coeff(tri, midsoln, rk_iter, Lyave, nbLyave, tmpuxy_1, tmpuyy);
             for(k = 0; k < N_EQN; k++)
             {
                 u3 = tmpuxx[k];
                 u3 /= 2.0;
                 u4 = minmod2((1+0.05)*minmod2(tmpuxy_0[k],tmpuxy_1[k]), 0.5*(tmpuxy_0[k] + tmpuxy_1[k])); 
                 // u4 /= 2.0;
                 u5 = tmpuyy[k];
                 u5 /= 2.0;
                 switch(k)
                 {
                 case 0:
                     dg_Dens(st2)[3] = u3;
                     dg_Dens(st2)[4] = u4;
                     dg_Dens(st2)[5] = u5;
                 // TMP
                     printf("fixed u_xx, u_xy, u_yy coeff: %g %g %g\n\n",
                        dg_Dens(st2)[3], dg_Dens(st2)[4],dg_Dens(st2)[5]);
                 // END TMP
                 break;
                 case 1:
                     dg_Mom(st2)[0][3] = u3;
                     dg_Mom(st2)[0][4] = u4;
                     dg_Mom(st2)[0][5] = u5;
                 break;
                 case 2:
                     dg_Mom(st2)[1][3] = u3;
                     dg_Mom(st2)[1][4] = u4;
                     dg_Mom(st2)[1][5] = u5;
                 break;
                 case 3:
                     dg_Energy(st2)[3] = u3;
                     dg_Energy(st2)[4] = u4;
                     dg_Energy(st2)[5] = u5;
                 break;
                 }
             }
             return;
         }
         **/

         for(k = 0; k < N_EQN; k++)
         {
             for(i = 0; i < 3; i++)
             {
                 rside[0] = nbLxave[i][k] - Lxave[k];
                 rside[1] = nbLxave[(i+1)%3][k] - Lxave[k];
                 comp_coef(Ax[i],rside,coefx[i]);

                 rside[0] = nbLyave[i][k] - Lyave[k];
                 rside[1] = nbLyave[(i+1)%3][k] - Lyave[k];
                 comp_coef(Ay[i],rside,coefy[i]);
             }

             // TMP
             // printf("u_xx candidate   : %g %g %g\n", coefx[0][0], coefx[1][0], coefx[2][0]);
             // printf("u_xy candidate(1): %g %g %g\n", coefx[0][1], coefx[1][1], coefx[2][1]);
             // printf("u_xy candidate(2): %g %g %g\n", coefy[0][0], coefy[1][0], coefy[2][0]);
             // printf("u_yy candidate   : %g %g %g\n", coefy[0][1], coefy[1][1], coefy[2][1]);
             // END TMP

             /////// limit by edge center values
             /**
             limit_by_edge_cent_val(coefx,Ax_edge,rside);
             u3 = rside[0]; u4_0 = rside[1];
             limit_by_edge_cent_val(coefy,Ay_edge,rside);
             u4_1 = rside[0]; u5 = rside[1];
             u4 = minmod2((1+0.05)*minmod2(u4_0,u4_1), 0.5*(u4_0 + u4_1));
             sv_coef[0] = u3; sv_coef[1] = u4; sv_coef[2] = u5; 
             **/
             /////// END of limit by edge center values

             //////////// WENO weight ///////////////
         if(debugging("weno_w"))
         {
             for(i = 0; i < 3; i++)
             {
                 arrya[i] = coefx[i][0];
                 arryb[i] = coefx[i][1];
             }
             WENO_mod_on_2nd(arrya,arryb,c_num_x,3,1.0,w);
             // WENO_mod_cand1_P3(arrya,c_num_x,3,sqr_diam,w);
             u3   = w[0]*coefx[0][0] + w[1]*coefx[1][0] + w[2]*coefx[2][0];
             // u3 = (coefx[0][0] + coefx[1][0] + coefx[2][0])/3.0;
             // WENO_mod_cand1_P3(arryb,c_num_x,3,sqr_diam,w);
             u4_0 = w[0]*coefx[0][1] + w[1]*coefx[1][1] + w[2]*coefx[2][1];
             // u4 = (coefx[0][1] + coefx[1][1] + coefx[2][1] + 
             //       coefy[0][0] + coefy[1][0] + coefy[2][0])/6.0;

             for(i = 0; i < 3; i++)
             {
                 arrya[i] = coefy[i][0];
                 arryb[i] = coefy[i][1];
             }
             WENO_mod_on_2nd(arrya,arryb,c_num_y,3,1.0,w);
             // WENO_mod_cand1_P3(arrya,c_num_y,3,sqr_diam,w);
             u4_1 = w[0]*coefy[0][0] + w[1]*coefy[1][0] + w[2]*coefy[2][0];
             // WENO_mod_cand1_P3(arryb,c_num_y,3,sqr_diam,w);
             u5   = w[0]*coefy[0][1] + w[1]*coefy[1][1] + w[2]*coefy[2][1];
             // u5 =   (coefy[0][1] + coefy[1][1] + coefy[2][1])/3.0;

             u4 = minmod2((1+0.05)*minmod2(u4_0,u4_1), 0.5*(u4_0 + u4_1)); // 0.05, 0.01
             sv_coef[0] = u3; sv_coef[1] = u4; sv_coef[2] = u5; 
         }
             //////////// END WENO weight ///////////

             ////////// Centered biased ////////////////////
         /*
         if(debugging("cent_bias"))
         {
             for(i = 0; i < 3; i++)
             {
                 arrya[i] = coefx[i][0];
                 arryb[i] = coefx[i][1];
             }
             // avg1 = 1.0/3.0*(coefx[0][0] + coefx[1][0] + coefx[2][0]);
             // harmonic_mod(arrya,c_num_x,3,wx);
             avg1 = (wx[0]*coefx[0][0] + wx[1]*coefx[1][0] + wx[2]*coefx[2][0]);
             u3 = minmod(coefx[0][0],coefx[1][0]);
             u3 = minmod(coefx[2][0],u3);
             //eps = fabs(max(max(coefx[0][0], coefx[1][0]),coefx[2][0]) - 
             //           min(min(coefx[0][0], coefx[1][0]),coefx[2][0]));
             //eps = 10.0*diam/(diam + sqr(eps));
             u3 = minmod(((1+eps)*u3), avg1);
             // u3 = minmod(((1+eps)*u3), sv_coef[0]);

             // avg2 = 1.0/3.0*(coefx[0][1] + coefx[1][1] + coefx[2][1]);
             // harmonic_mod(arryb,c_num_x,3,wx);
             avg2 = (wx[0]*coefx[0][1] + wx[1]*coefx[1][1] + wx[2]*coefx[2][1]);
             u4_0 = minmod(coefx[0][1],coefx[1][1]);
             u4_0 = minmod(coefx[2][1],u4_0);
             //eps = fabs(max(max(coefx[0][1], coefx[1][1]),coefx[2][1]) - 
             //           min(min(coefx[0][1], coefx[1][1]),coefx[2][1]));
             //eps = 10.0*diam/(diam + sqr(eps));
             u4_0 = minmod(((1+eps)*u4_0), avg2);
             // u4_0 = minmod(((1+eps)*u4_0), sv_coef[1]);

             for(i = 0; i < 3; i++)
             {
                 arrya[i] = coefy[i][0];
                 arryb[i] = coefy[i][1];
             }
             // avg1 = 1.0/3.0*(coefx[0][0] + coefx[1][0] + coefx[2][0]);
             // harmonic_mod(arrya,c_num_y,3,wy);
             avg1 = (wy[0]*coefy[0][0] + wy[1]*coefy[1][0] + wy[2]*coefy[2][0]);
             u4_1 = minmod(coefy[0][0],coefy[1][0]);
             u4_1 = minmod(coefy[2][0],u4_1);
             //eps = fabs(max(max(coefy[0][0], coefy[1][0]),coefy[2][0]) - 
             //           min(min(coefy[0][0], coefy[1][0]),coefy[2][0]));
             //eps = 10.0*diam/(diam + sqr(eps));
             u4_1 = minmod(((1+eps)*u4_1), avg1);
             // u4_1 = minmod(((1+eps)*u4_1), sv_coef[1]);

             // avg2 = 1.0/3.0*(coefx[0][1] + coefx[1][1] + coefx[2][1]);
             // harmonic_mod(arryb,c_num_y,3,wy);
             avg2 = (wy[0]*coefy[0][1] + wy[1]*coefy[1][1] + wy[2]*coefy[2][1]);
             u5 = minmod(coefy[0][1],coefy[1][1]);
             u5 = minmod(coefy[2][1],u5);
             //eps = fabs(max(max(coefy[0][1], coefy[1][1]),coefy[2][1]) - 
             //           min(min(coefy[0][1], coefy[1][1]),coefy[2][1]));
             //eps = 10.0*diam/(diam + sqr(eps));
             u5 = minmod(((1+eps)*u5), avg2);
             // u5 = minmod(((1+eps)*u5), sv_coef[2]);

             u4 = minmod2((1+0.05)*minmod2(u4_0,u4_1), 0.5*(u4_0 + u4_1)); // 0.05, 0.01
         }
         */
             //////////// End Centered biased
             switch(k)
             {
             case 0:
                 dg_Dens(st2)[3] = u3;
                 dg_Dens(st2)[4] = u4;
                 dg_Dens(st2)[5] = u5;
                 // TMP
                 // printf("fixed u_xx, u_xy, u_yy origin coeff: %g %g %g\n\n",
                 //    dg_Dens(st2)[3], dg_Dens(st2)[4],dg_Dens(st2)[5]);
                 // END TMP
             break;
             case 1:
                 dg_Mom(st2)[0][3] = u3;
                 dg_Mom(st2)[0][4] = u4;
                 dg_Mom(st2)[0][5] = u5;
             break;
             case 2:
                 dg_Mom(st2)[1][3] = u3;
                 dg_Mom(st2)[1][4] = u4;
                 dg_Mom(st2)[1][5] = u5;
             break;
             case 3:
                 dg_Energy(st2)[3] = u3;
                 dg_Energy(st2)[4] = u4;
                 dg_Energy(st2)[5] = u5;
             break;
             }
         }
}

// average of 2nd degree terms of polynomial
/**
LOCAL void R_degree3_term_average_liuP3(
         TRI       *tri,
         Locstate  st,
         double **mass_1st_row,
         int       diff, // diff = 0, w.r.t. x; diff = 1, w.r.t. y.
         float     *ave)
{
         float      area;

         area = mass_1st_row[0][0];
         if(diff == 0)
         {
             ave[0] =(3.0*dg_Dens(st)[6]*mass_1st_row[0][3] +
                  2.0*dg_Dens(st)[7]*mass_1st_row[0][4] +
                  dg_Dens(st)[8]*mass_1st_row[0][5])/area;

             ave[1] =(3.0*dg_Mom(st)[0][6]*mass_1st_row[0][3] +
                  2.0*dg_Mom(st)[0][7]*mass_1st_row[0][4] +
                  dg_Mom(st)[0][8]*mass_1st_row[0][5])/area;

             ave[2] =(3.0*dg_Mom(st)[1][6]*mass_1st_row[0][3] +
                  2.0*dg_Mom(st)[1][7]*mass_1st_row[0][4] +
                  dg_Mom(st)[1][8]*mass_1st_row[0][5])/area;

             ave[3] =(3.0*dg_Energy(st)[6]*mass_1st_row[0][3] +
                  2.0*dg_Energy(st)[7]*mass_1st_row[0][4] +
                  dg_Energy(st)[8]*mass_1st_row[0][5])/area;
         }
         else
         {
             ave[0] =(dg_Dens(st)[7]*mass_1st_row[0][3] +
                  2.0*dg_Dens(st)[8]*mass_1st_row[0][4] +
                  3.0*dg_Dens(st)[9]*mass_1st_row[0][5])/area;

             ave[1] =(dg_Mom(st)[0][7]*mass_1st_row[0][3] +
                  2.0*dg_Mom(st)[0][8]*mass_1st_row[0][4] +
                  3.0*dg_Mom(st)[0][9]*mass_1st_row[0][5])/area;

             ave[2] =(dg_Mom(st)[1][7]*mass_1st_row[0][3] +
                  2.0*dg_Mom(st)[1][8]*mass_1st_row[0][4] +
                  3.0*dg_Mom(st)[1][9]*mass_1st_row[0][5])/area;

             ave[3] =(dg_Energy(st)[7]*mass_1st_row[0][3] +
                  2.0*dg_Energy(st)[8]*mass_1st_row[0][4] +
                  3.0*dg_Energy(st)[9]*mass_1st_row[0][5])/area;
         }
}
**/

// average of 3rd degree terms of polynomial
// after differenation.
LOCAL void R_degree3_term_averageP3(
         TRI       *tri,
         Locstate  st,
         double **Lmass_matrix,
         int       diff, // diff = 0, w.r.t. x; diff = 1, w.r.t. y.
         float     *ave)
{
         float      area;
         int        dim = 2;

         area = Lmass_matrix[0][0];
         if(diff == 0)
         {
             ave[0] =(3.0*dg_Dens(st)[6]*Lmass_matrix[0][3] +
                      2.0*dg_Dens(st)[7]*Lmass_matrix[0][4] +
                          dg_Dens(st)[8]*Lmass_matrix[0][5])/area;

             if(N_EQN == 1) return;

             ave[1] =(3.0*dg_Mom(st)[0][6]*Lmass_matrix[0][3] +
                      2.0*dg_Mom(st)[0][7]*Lmass_matrix[0][4] +
                          dg_Mom(st)[0][8]*Lmass_matrix[0][5])/area;

             ave[2] =(3.0*dg_Mom(st)[1][6]*Lmass_matrix[0][3] +
                      2.0*dg_Mom(st)[1][7]*Lmass_matrix[0][4] +
                          dg_Mom(st)[1][8]*Lmass_matrix[0][5])/area;

             ave[3] =(3.0*dg_Energy(st)[6]*Lmass_matrix[0][3] +
                      2.0*dg_Energy(st)[7]*Lmass_matrix[0][4] +
                          dg_Energy(st)[8]*Lmass_matrix[0][5])/area;
         }
         else
         {
             ave[0] =(dg_Dens(st)[7]*Lmass_matrix[0][3] +
                  2.0*dg_Dens(st)[8]*Lmass_matrix[0][4] +
                  3.0*dg_Dens(st)[9]*Lmass_matrix[0][5])/area;

             if(N_EQN == 1) return;

             ave[1] =(dg_Mom(st)[0][7]*Lmass_matrix[0][3] +
                  2.0*dg_Mom(st)[0][8]*Lmass_matrix[0][4] +
                  3.0*dg_Mom(st)[0][9]*Lmass_matrix[0][5])/area;

             ave[2] =(dg_Mom(st)[1][7]*Lmass_matrix[0][3] +
                  2.0*dg_Mom(st)[1][8]*Lmass_matrix[0][4] +
                  3.0*dg_Mom(st)[1][9]*Lmass_matrix[0][5])/area;

             ave[3] =(dg_Energy(st)[7]*Lmass_matrix[0][3] +
                  2.0*dg_Energy(st)[8]*Lmass_matrix[0][4] +
                  3.0*dg_Energy(st)[9]*Lmass_matrix[0][5])/area;
         }
}

// average of d_x u polynomial
LOCAL void u_average_indx(
         TRI       *tri,
         Locstate  st,
         int       indx,
         float     *uave)
{
         uave[0] = dg_Dens(st)[indx];

         if(N_EQN == 1) return;

         uave[1] = dg_Mom(st)[0][indx];
         uave[2] = dg_Mom(st)[1][indx];
         uave[3] = dg_Energy(st)[indx];
}

// Reconstructed coeffs. are stored in RK_STEP[0]
LOCAL void limiting_2nd_degree(
	 TRI       *tri,
         TRI       *nbtri[3], 
         Mid_soln  *midsoln,
         int       rk_iter)
{
         Locstate st, nbst[3], st2;
         float    uxave[4], nbuxave[3][4]; 
         float    uyave[4], nbuyave[3][4]; 
         int      i, dim = 2, k;
         double   *cent, *nbcent[3];
         float    rside[3], Ax[3][2][2], Ay[3][2][2], rside2[3], least_soln1[3], least_soln2[3];
         float    coefx[3][2], coefy[3][2];
         float    u3, u4, u5, u4_0, u4_1, avg3, avg4, avg5, arrya[9], arryb[9], w[9];
         double **Lmass_matrix = tri->Lmass_matrix;
         float    eps = 0.0;
         float    dirx[3], diry[3], theta[3]; // dirx, diry: cos of the angle
         float    ud[3][2]; // soln uxx, uxy, uyy computed using one line 
         int      idirx, idiry;
         static double **mat = NULL, **mat_tran, **AA, **inv;
         float    c_num_x[3], c_num_y[3]; // condition number of stencils
         float    diam, sqr_diam;
         int      is_bad_stenx[4], is_bad_steny[4];
         float    mid[3][2], Ax_edge[3][2], Ay_edge[3][2];

         if(mat == NULL)
         {
             matrix(&(AA), 3, 3, sizeof(double));
             matrix(&(inv), 3, 3, sizeof(double));
             matrix(&(mat_tran), 3, 3, sizeof(double));
             matrix(&(mat), 3, 3, sizeof(double));
         }

         if(rk_iter == RK_STEP)
         {
             st = tri->st;
             nbst[0] = nbtri[0]->st;
             nbst[1] = nbtri[1]->st;
             nbst[2] = nbtri[2]->st;
         }
         else
         {
             st = midsoln[tri->id].st[rk_iter]; 

             for(i = 0; i < 3; i++)
             {
                 if(nbtri[i]->id >= 0)
                     nbst[i] = midsoln[nbtri[i]->id].st[rk_iter];
                 else
                 {
                     // attached buffer
                     nbst[i] = nbtri[i]->st;
                 }
             }
         }

         st2 = midsoln[tri->id].st[0]; 
         Set_params(st2,st);
         set_type_of_state(st2,state_type(st));

         // TMP, no need, for debugging only
         // assign(st2, st, sizest);
         // END TMP

         diam = fg_diam(tri);
         sqr_diam = sqr(diam);

         cent = fg_centroid(tri);
         for(i = 0; i < 3; i++)
             nbcent[i] = fg_centroid(nbtri[i]);

         /*
         for(i = 0; i < 3; i++)
         {
             mid[i][0] = (Coords(Point_of_tri(tri)[i])[0] + Coords(Point_of_tri(tri)[(i+1)%3])[0])*0.5;
             mid[i][1] = (Coords(Point_of_tri(tri)[i])[1] + Coords(Point_of_tri(tri)[(i+1)%3])[1])*0.5;
             Ax_edge[i][0] = 2.0*(mid[i][0] - cent[0]);
             Ax_edge[i][1] = mid[i][1] - cent[1];
             Ay_edge[i][0] = (mid[i][0] - cent[0]);
             Ay_edge[i][1] = 2.0*(mid[i][1] - cent[1]);
         }
         */

         u_x_average(tri,st,uxave);
         u_y_average(tri,st,uyave);

         for(i = 0; i < 3; i++)
         {
             u_x_average(nbtri[i],nbst[i],nbuxave[i]);
             u_y_average(nbtri[i],nbst[i],nbuyave[i]);
         }

         // Shu_V_smooth_dect(tri,midsoln,rk_iter,uxave,nbuxave,is_bad_stenx);
         // Shu_V_smooth_dect(tri,midsoln,rk_iter,uyave,nbuyave,is_bad_steny);
         extrema_detec(uxave,nbuxave,is_bad_stenx);
         extrema_detec(uyave,nbuyave,is_bad_steny);
	 /*
         for(k = 0; k < N_EQN; k++)
         {
             is_bad_stenx[k] = is_bad_steny[k] = NO;
         }
	 */

         for(i = 0; i < 3; i++)
         {
             Ax[i][0][0] = 2.0*(nbcent[i][0]-cent[0]);
             Ax[i][0][1] = (nbcent[i][1]-cent[1]);
             Ax[i][1][0] = 2.0*(nbcent[(i+1)%3][0]-cent[0]);
             Ax[i][1][1] = (nbcent[(i+1)%3][1]-cent[1]);
             c_num_x[i] = cond_num(Ax[i]); // c_num_x[i] = 1.0;
             Ay[i][0][0] = (nbcent[i][0]-cent[0]);
             Ay[i][0][1] = 2.0*(nbcent[i][1]-cent[1]);
             Ay[i][1][0] = (nbcent[(i+1)%3][0]-cent[0]);
             Ay[i][1][1] = 2.0*(nbcent[(i+1)%3][1]-cent[1]);
             c_num_y[i] = cond_num(Ay[i]); // c_num_y[i] = 1.0;
         }

         for(k = 0; k < N_EQN; k++)
         {
             // d_x u polynomial
             // d_y u polynomial
             // tri, nb0, nb1 // tri, nb1, nb2 // tri, nb2, nb0
             for(i = 0; i < 3; i++)
             {
                 rside[0] = nbuxave[i][k] - uxave[k]; 
                 rside[1] = nbuxave[(i+1)%3][k] - uxave[k]; 
                 comp_coef(Ax[i],rside,coefx[i]);

                 rside[0] = nbuyave[i][k] - uyave[k];
                 rside[1] = nbuyave[(i+1)%3][k] - uyave[k];
                 comp_coef(Ay[i],rside,coefy[i]);
             }
             
             ///////////// WENO
             for(i = 0; i < 3; i++)
             {
                 arrya[i] = coefx[i][0];
                 arryb[i] = coefx[i][1];
             }

             WENO_mod(arrya,arryb,c_num_x,3,diam,w);
             // WENO_mod_cand1_P2(arrya,c_num_x,3,sqr_diam,w);
             u3 = w[0]*coefx[0][0] + w[1]*coefx[1][0] + w[2]*coefx[2][0];

             // WENO_mod_cand1_P2(arryb,c_num_x,3,sqr_diam,w);
             u4_0 = w[0]*coefx[0][1] + w[1]*coefx[1][1] + w[2]*coefx[2][1];

             if(is_bad_stenx[k] == YES)
             {
                 u3 = 0.0; u4_0 = 0.0;
             }

             for(i = 0; i < 3; i++)
             {
                 arrya[i] = coefy[i][0];
                 arryb[i] = coefy[i][1];
             }

             WENO_mod(arrya,arryb,c_num_y,3,diam,w);
             // WENO_mod_cand1_P2(arrya,c_num_y,3,sqr_diam,w);
             u4_1 = w[0]*coefy[0][0] + w[1]*coefy[1][0] + w[2]*coefy[2][0];

             // WENO_mod_cand1_P2(arryb,c_num_y,3,sqr_diam,w);
             u5 =   w[0]*coefy[0][1] + w[1]*coefy[1][1] + w[2]*coefy[2][1];

             if(is_bad_steny[k] == YES)
             {
                 u4_1 = 0.0; u5 = 0.0;
             }

             u4 = minmod((1+0.05)*minmod(u4_0,u4_1), 0.5*(u4_0 + u4_1));
             /////// End WENO

             /////// limit by edge center values
             /**
             limit_by_edge_cent_val(coefx,Ax_edge,rside);
             u3 = rside[0]; u4_0 = rside[1];
             limit_by_edge_cent_val(coefy,Ay_edge,rside);
             u4_1 = rside[0]; u5 = rside[1];
             u4 = minmod2((1+0.01)*minmod2(u4_0,u4_1), 0.5*(u4_0 + u4_1));
             **/
             /////// END of limit by edge center values

             switch(k)
             {
             case 0:
                 dg_Dens(st2)[3] = u3;
                 dg_Dens(st2)[4] = u4;
                 dg_Dens(st2)[5] = u5;
             break;
             case 1:
                 dg_Mom(st2)[0][3] = u3;
                 dg_Mom(st2)[0][4] = u4;
                 dg_Mom(st2)[0][5] = u5;
             break;
             case 2:
                 dg_Mom(st2)[1][3] = u3;
                 dg_Mom(st2)[1][4] = u4;
                 dg_Mom(st2)[1][5] = u5;
             break;
             case 3:
                 dg_Energy(st2)[3] = u3;
                 dg_Energy(st2)[4] = u4;
                 dg_Energy(st2)[5] = u5;
             break;
             }
         }

         // TMP
         debug_flag = NO;
}


LOCAL void build_1st_order_poly(
         TRI       *tri,
         TRI       *nbtri[3],
         Mid_soln  *midsoln,
         float     *uave, 
         float     nbuave[3][4],
         int       rk_iter)
{
         Locstate st, nbst[3], st2, nbst2[3];
         int      i, dim = 2, indx, k;
         float    Lave[4], nbLave[3][4];
         double    *cent, *nbcent[3];
         float    rside[2], A[2][2];
         float    coef[3][2];
         float    u0, u1, u2;
         double **Lmass_matrix = tri->Lmass_matrix;

         float   mid[3][2]; //midpt on side
         float   slp[3][2][4]; // midpt, two candidate geom, four comp of soln
         float   limt_slp[3][4]; // limited sloped on midpt
         float   pos, neg, theta_p,  theta_n;
         float   debug = NO;

         if(rk_iter == RK_STEP)
         {
             st = tri->st;
             nbst[0] = nbtri[0]->st;
             nbst[1] = nbtri[1]->st;
             nbst[2] = nbtri[2]->st;
         }
         else
         {
             st = midsoln[tri->id].st[rk_iter];
             for(i = 0; i < 3; i++)
             {
                 if(nbtri[i]->id >= 0)
                     nbst[i] = midsoln[nbtri[i]->id].st[rk_iter];
                 else
                     nbst[i] = nbtri[i]->st;
             }
         }

         st2 = midsoln[tri->id].st[0];

         if(tri->id == 86)
         {
             printf("Entered build_1st_order_poly, iteration = %d for tri(%d)\n", 
                  rk_iter, tri->id);
             verbose_print_state("before rebuild", st2);
             debug = YES;
         }

         for(i = 0; i < 4; i++)
         {
             Lave[i] = uave[i];
             for(k = 0; k < 3; k++)
                 nbLave[k][i] = nbuave[k][i];
         }

         cent = fg_centroid(tri);
         for(i = 0; i < 3; i++)
             nbcent[i] = fg_centroid(nbtri[i]);

         for(k = 0; k < 4; k++)
         {
             // linear part of polynomial
             // tri, nb0, nb1
             // tri, nb1, nb2
             // tri, nb2, nb0
             for(i = 0; i < 3; i++)
             {
                 rside[0] = nbLave[i][k] - Lave[k];
                 rside[1] = nbLave[(i+1)%3][k] - Lave[k];
                 A[0][0] = (nbcent[i][0]-cent[0]);
                 A[0][1] = (nbcent[i][1]-cent[1]);
                 A[1][0] = (nbcent[(i+1)%3][0]-cent[0]);
                 A[1][1] = (nbcent[(i+1)%3][1]-cent[1]);
                 comp_coef(A,rside,coef[i]);
             }
             // limiting coeffcients
             u1 = minmod(coef[0][0],coef[1][0]);
             u1 = minmod(coef[2][0],u1);

             u2 = minmod(coef[0][1],coef[1][1]);
             u2 = minmod(coef[2][1],u2);

             u0 = Lave[k];

             switch(k)
             {
             case 0:
                 dg_Dens(st2)[0] = u0;
                 dg_Dens(st2)[1] = u1;
                 dg_Dens(st2)[2] = u2;
             break;
             case 1:
                 dg_Mom(st2)[0][0] = u0;
                 dg_Mom(st2)[0][1] = u1;
                 dg_Mom(st2)[0][2] = u2;
             break;
             case 2:
                 dg_Mom(st2)[1][0] = u0;
                 dg_Mom(st2)[1][1] = u1;
                 dg_Mom(st2)[1][2] = u2;
             break;
             case 3:
                 dg_Energy(st2)[0] = u0;
                 dg_Energy(st2)[1] = u1;
                 dg_Energy(st2)[2] = u2;
             break;
             }
         }

         for(indx = 3; indx < MAX_N_COEF; indx++)
         {
             dg_Dens(st2)[indx] = 0.0;
             dg_Mom(st2)[0][indx] = 0.0;
             dg_Mom(st2)[1][indx] = 0.0;
             dg_Energy(st2)[indx] = 0.0;
         }
         Dens(st2) = dg_Dens(st2)[0];
         Mom(st2)[0] = dg_Mom(st2)[0][0];
         Mom(st2)[1] = dg_Mom(st2)[1][0];
         Energy(st2) = dg_Energy(st2)[0];

}

LOCAL int overshoot_state(
        TRI        *tri,
	Mid_soln   *mid_soln,
        int        rk_iter)
{
        POINT   *p[3];
        int     i, side, dim =2, k;
        float   *pcrds[3], con_u[4], nbcon_u[4];
        double   *cent = fg_centroid(tri), *nbcent;  
        Locstate      st, nbst;
        double        q[4];
        int           Gauss_N = 3;
        float         qcrds[4], diam;
        TRI           *nbtri;
        
        if(rk_iter == RK_STEP)
            st = tri->st;
        else
            st = mid_soln[tri->id].st[rk_iter];

        for(i = 0; i < 3; i++)
        {
            p[i] = Point_of_tri(tri)[i];
            pcrds[i] = Coords(p[i]);
        }

        // 3-point
        q[0] = -sqrt(0.6); q[1] = 0.0; q[2] = sqrt(0.6);
        diam = max(fg_length_side(tri)[0], max(fg_length_side(tri)[1], fg_length_side(tri)[2]));

        for(side = 0; side < 3; side++)
        {
            if((nbtri = Tri_on_side(tri,side)) == NULL)
                continue;
            nbcent = fg_centroid(nbtri);  
            if(rk_iter == RK_STEP)
                nbst = nbtri->st;
            else
                nbst = mid_soln[nbtri->id].st[rk_iter];

            for(k = 0; k < Gauss_N; k++)
            {       
                for(i = 0; i < dim; i++)
                    qcrds[i] = (pcrds[(side+1)%3][i] + pcrds[side][i])/2.0 +
                           (pcrds[(side+1)%3][i] - pcrds[side][i])/2.0*q[k];
                con_u_at_pt(st, qcrds, cent, con_u);
                con_u_at_pt(nbst, qcrds, nbcent, nbcon_u);
                if(N_EQN == 1)
                {
                    if(fabs(con_u[0]-nbcon_u[0]) > diam)
                        return YES; 
                }
                else
                {
                    if(fabs(con_u[0]-nbcon_u[0]) > diam)
                        return YES; 
                    if(fabs(con_u[1]-nbcon_u[1]) > diam)
                        return YES; 
                    if(fabs(con_u[2]-nbcon_u[2]) > diam)
                        return YES; 
                    if(fabs(con_u[3]-nbcon_u[3]) > diam)
                        return YES; 
                }
            }
        }  

        return NO;
}

// Stencil from limiting_1st_degree_origin_Shu_char()
// Use V. to do trouble cell detector
LOCAL int overshoot_state_Shu_V(
        TRI        *tri,
        Mid_soln   *mid_soln,
        int        rk_iter)
{
        POINT   *p[3];
        int     i, side, dim =2, k, j, less_than_M = YES, do_limit = NO;
        float   *pcrds[3], con_u[4], nbcon_u[4];
        double   *cent = fg_centroid(tri), *nbcent[3];
        Locstate      st, nbst[3];
        float         qcrds[4], diam;
        TRI           *nbtri[3];
        // static Locstate Tst = NULL;
        float         Ma, dir[3][MAXD], len;
        float    mid[3][2]; //midpt on side
        static float  **L[3] = {NULL, NULL, NULL}, **R[3], **I;
        float   du[3][4]; // jumps of  linear polynomial at edge midpts.[midpt][comp_of_soln]
        float   ch_du[3][4], t[2], ch_slp[3][4];
        float   uave[4], nbuave[3][4], mu = 1.2; // old mu = 1.5, 1.15, 1.2
        float    coef[3][2];
        float   slp[3][4], limt_slp[3][4]; // midpt, four comp of soln
         float   pos, neg, theta_p,  theta_n;

         if(debugging("twod_riemann") || debugging("shock_vort"))
         {
             // force to do HR on boundaries
             if(Boundary_tri(tri) || tri_on_phy_bdry(tri))
                 return YES;
         }

         tri->redo_limiting = NO;
         // Ma = 5.0*tri->Lmass_matrix[0][0];

         // for(i = 0; i < N_EQN; i++)
         //     tri->limit_1st[i] = NO;

         for(i = 0; i < 3; i++)
             nbtri[i] = Tri_on_side(tri,i);
         if(rk_iter == RK_STEP)
         {
             st = tri->st;
             nbst[0] = nbtri[0]->st;
             nbst[1] = nbtri[1]->st;
             nbst[2] = nbtri[2]->st;
         }
         else
         {
             st = mid_soln[tri->id].st[rk_iter];
             for(i = 0; i < 3; i++)
                 nbst[i] = mid_soln[nbtri[i]->id].st[rk_iter];
         }

         if(L[0] == NULL)
         {
             for(i = 0; i < 3; i++)
             {
                 matrix(&L[i], 4, 4, sizeof(float));
                 matrix(&R[i], 4, 4, sizeof(float));
             }
             //matrix(&I, 4, 4, sizeof(float));
         }

         // p1_L2projection_ver2(tri,st,Tst);
         // verbose_print_state("st", st);
         // verbose_print_state("projected Tst", Tst);
         // clean_up(0);

         for(i = 0; i < 3; i++)
         {
             mid[i][0] = (Coords(Point_of_tri(tri)[i])[0] + Coords(Point_of_tri(tri)[(i+1)%3])[0])*0.5;
             mid[i][1] = (Coords(Point_of_tri(tri)[i])[1] + Coords(Point_of_tri(tri)[(i+1)%3])[1])*0.5;
         }

         // check jump at each midpt
         for(i = 0; i < 3; i++)
         {
             /**
             du[i][0] = dg_Dens(Tst)[1]*(mid[i][0] - cent[0]) + dg_Dens(Tst)[2]*(mid[i][1] - cent[1]);
             du[i][1] = dg_Mom(Tst)[0][1]*(mid[i][0] - cent[0]) + dg_Mom(Tst)[0][2]*(mid[i][1] - cent[1]);
             du[i][2] = dg_Mom(Tst)[1][1]*(mid[i][0] - cent[0]) + dg_Mom(Tst)[1][2]*(mid[i][1] - cent[1]);
             du[i][3] = dg_Energy(Tst)[1]*(mid[i][0] - cent[0]) + dg_Energy(Tst)[2]*(mid[i][1] - cent[1]);
             **/
             con_u_at_pt(st, mid[i], cent, con_u);
             du[i][0] = con_u[0] - Dens(st);
             du[i][1] = con_u[1] - Mom(st)[0];
             du[i][2] = con_u[2] - Mom(st)[1];
             du[i][3] = con_u[3] - Energy(st);

             dir[i][0] = mid[i][0] - cent[0];
             dir[i][1] = mid[i][1] - cent[1];
             len = sqrt(sqr(dir[i][0]) + sqr(dir[i][1]));
             for(j = 0; j < 2; j++)
                 dir[i][j] = dir[i][j]/len;

             /**
             for(j = 0; j < dim; j++)
                t[j] = fg_side_vector(tri)[i][j];
             dir[i][0] = t[1];
             dir[i][1] = -t[0];
             **/

             LR_matrix_in_dir(dir[i], st, L[i], R[i]);
             d_matrix_vec_mult(L[i], du[i], 4, 4, ch_du[i]);

             /**
             for(k = 0; k < N_EQN; k++)
             {
                 // if(fabs(du[i][k]) > Ma)
                 if(fabs(ch_du[i][k]) > Ma)
                 {
                     //
                     // verbose_print_state("st", st);
                     // verbose_print_state("projected Tst", Tst);
                     // d_matrix_matrix_mult(L[0],R[0],4,4,I);
                     // print_matrix("Identity", 4, 4, I, " %g ");
                     // clean_up(0);
                     //

                     less_than_M = NO;
                 }
             }
             **/
         }

         // if(less_than_M == YES)
         //     return NO;

         uave[0] = Dens(st);
         uave[1] = Mom(st)[0];
         uave[2] = Mom(st)[1];
         uave[3] = Energy(st);

         // 3 neighbor tris
         for(i = 0; i < 3; i++)
         {
             nbuave[i][0] = Dens(nbst[i]);
             nbuave[i][1] = Mom(nbst[i])[0];
             nbuave[i][2] = Mom(nbst[i])[1];
             nbuave[i][3] = Energy(nbst[i]);
         }

         for(i = 0; i < 3; i++)
             nbcent[i] = fg_centroid(nbtri[i]);

         // compute alphas for midpt[0]
         //choice, (tri, nb0, nb1)
         compute_tri_geom(mid[0], cent, nbcent[0], nbcent[1], coef[0]);
         for(k = 0; k < N_EQN; k++)
             slp[0][k] = coef[0][0]*(nbuave[0][k]-uave[k]) + coef[0][1]*(nbuave[1][k]-uave[k]);

         // compute alphas for midpt[1],
         // choice, (tri, nb1, nb2)
         compute_tri_geom(mid[1], cent, nbcent[1], nbcent[2], coef[0]);
         for(k = 0; k < N_EQN; k++)
             slp[1][k] = coef[0][0]*(nbuave[1][k]-uave[k]) + coef[0][1]*(nbuave[2][k]-uave[k]);

         // compute alphas for midpt[2],
         // choice, (tri, nb2, nb0)
         compute_tri_geom(mid[2], cent, nbcent[2], nbcent[0], coef[0]);
         for(k = 0; k < N_EQN; k++)
             slp[2][k] = coef[0][0]*(nbuave[2][k]-uave[k]) + coef[0][1]*(nbuave[0][k]-uave[k]);

         // limit slopes at 3 midpts
         for(i = 0; i < 3; i++)
         {
             d_matrix_vec_mult(L[i], slp[i], 4, 4, ch_slp[i]);

             // TVB limiter version
             for(k = 0; k < N_EQN; k++)
             {
                 if((ch_slp[i][k])*ch_du[i][k] < 0.0)
                 {
                     do_limit = YES;
                     break;
                 }
                 if(mu*fabs(ch_slp[i][k]) < fabs(ch_du[i][k]))
                 {
                     do_limit = YES;
                     break;
                 }
                 // ch_slp[i][k] = minmod(mu*ch_slp[i][k], ch_du[i][k]);
             }
             // d_matrix_vec_mult(R[i], ch_slp[i], 4, 4, limt_slp[i]);
         }
         /**
         for(k = 0; k < N_EQN; k++)
         {
             if(fabs(limt_slp[0][k] + limt_slp[1][k] + limt_slp[2][k]) > MACH_EPS)
             {
                 pos = neg = 0.0;
                 for(i = 0; i < 3; i++)
                 {
                     pos += max(0.0, limt_slp[i][k]);
                     neg += max(0.0, -limt_slp[i][k]);
                 }

                 if(fabs(pos) < MACH_EPS)
                     theta_p = 1.0;
                 else
                     theta_p = min(1.0, neg/pos);
                 if(fabs(neg) < MACH_EPS)
                     theta_n = 1.0;
                 else
                     theta_n = min(1.0, pos/neg);
                 for(i = 0; i < 3; i++)
                 {
                     limt_slp[i][k] = theta_p*max(0.0, limt_slp[i][k]) - theta_n*max(0.0, -limt_slp[i][k]);
                 }
             }
             for(i = 0; i < 3; i++)
                 slp[0][i] = limt_slp[i][k];
             if(fsame_sign(slp[0], 3) == YES)
                 tri->limit_1st[k] = YES;
         }
         **/

         if(do_limit == YES)
             return YES;

         return NO;
}

LOCAL void limiting_1st_degreeP3(
         TRI       *tri,
         TRI       *nbtri[3],
         Mid_soln  *midsoln,
         int       rk_iter,
         int       comput_mat)
{
         Locstate st, nbst[3], st2, nbst2[3];
         float    uave[4], nbuave[3][4];
         float    Rave[4], nbRave[3][4];
         float    Lave[4], nbLave[3][4];
         int      i, dim = 2, indx, k;
         double    *cent, *nbcent[3];
         float    rside[3], A[3][2][2], rside2[3], least_soln1[3];
         float    coef[3][2];
         float    u0, u1, u2, avg1, avg2, w[9], arrya[9], arryb[9];
         double **Lmass_matrix = tri->Lmass_matrix;
         float    eps = 0.7; // 0.5, 0.1: over-smooth???, 0.7 for all cent
         float    c_num[5], diam;
         float    A_edge[3][2], mid[3][2], sv_coef[6];

         if(rk_iter == RK_STEP)
         {
             st = tri->st;
             nbst[0] = nbtri[0]->st;
             nbst[1] = nbtri[1]->st;
             nbst[2] = nbtri[2]->st;
         }
         else
         {
             st = midsoln[tri->id].st[rk_iter];
             for(i = 0; i < 3; i++)
                 nbst[i] = midsoln[nbtri[i]->id].st[rk_iter];
         }

         st2 = midsoln[tri->id].st[0];

         uave[0] = Dens(st);
         uave[1] = Mom(st)[0];
         uave[2] = Mom(st)[1];
         uave[3] = Energy(st);
         R_degree2_above_term_averageP3(tri,st2,tri->Lmass_matrix,Rave);

         // 3 neighbor tris
         if(comput_mat == YES)
         {
             for(i = 0; i < 3; i++)
                 comp_mass_matrix_1st_row(MAX_N_COEF,nbtri[i],dim,fg_centroid(tri),mass_1st_rows[i]);
         }
         for(i = 0; i < 3; i++)
         {
             nbuave[i][0] = Dens(nbst[i]);
             nbuave[i][1] = Mom(nbst[i])[0];
             nbuave[i][2] = Mom(nbst[i])[1];
             nbuave[i][3] = Energy(nbst[i]);
             R_degree2_above_term_averageP3(nbtri[i],st2,mass_1st_rows[i],nbRave[i]);
         }

         for(k = 0; k < N_EQN; k++)
         {
             Lave[k] = uave[k]-Rave[k];
             for(i = 0; i < 3; i++)
                 nbLave[i][k] = nbuave[i][k]-nbRave[i][k];
         }

         cent = fg_centroid(tri);
         for(i = 0; i < 3; i++)
             nbcent[i] = fg_centroid(nbtri[i]);
         diam = (fg_diam(tri));

         for(i = 0; i < 3; i++)
         {
             A[i][0][0] = (nbcent[i][0]-cent[0]);
             A[i][0][1] = (nbcent[i][1]-cent[1]);
             A[i][1][0] = (nbcent[(i+1)%3][0]-cent[0]);
             A[i][1][1] = (nbcent[(i+1)%3][1]-cent[1]);
             c_num[i] = cond_num(A[i]); // c_num[i] =1.0;
         }
         if(debugging("cent_bias"))
             unify_weight(c_num, 3, w); // For center biased limiting

         // for(i = 0; i < 3; i++)  c_num[i] = 1.0/3.0;
         /**
         for(i = 0; i < 3; i++)
         {
             mid[i][0] = (Coords(Point_of_tri(tri)[i])[0] + Coords(Point_of_tri(tri)[(i+1)%3])[0])*0.5;
             mid[i][1] = (Coords(Point_of_tri(tri)[i])[1] + Coords(Point_of_tri(tri)[(i+1)%3])[1])*0.5;
             A_edge[i][0] = mid[i][0] - cent[0];
             A_edge[i][1] = mid[i][1] - cent[1];
         }
         **/

         for(k = 0; k < N_EQN; k++)
         {
             // linear part of polynomial
             for(i = 0; i < 3; i++)
             {
                 rside[0] = nbLave[i][k] - Lave[k];
                 rside[1] = nbLave[(i+1)%3][k] - Lave[k];
                 comp_coef(A[i],rside,coef[i]);
             }
             for(i = 0; i < 3; i++)
             {
                 arrya[i] = coef[i][0];
                 arryb[i] = coef[i][1];
             }

             /////// limit by edge center values
             // limit_by_edge_cent_val(coef,A_edge,rside);
             // sv_coef[0] = rside[0]; sv_coef[1] = rside[1];
             /////// END of limit by edge center values

             //////////////WENO weight ///////////////
         if(debugging("weno_w"))
         {
             WENO_mod_1(arrya, arryb, c_num, 3, w);
             // WENO_mod_cand1_P3(arrya,c_num,3,diam,w);
             u1 = w[0]*coef[0][0] + w[1]*coef[1][0] + w[2]*coef[2][0];
             // u1 = (coef[0][0] + coef[1][0] + coef[2][0])/3.0;
             // WENO_mod_cand1_P3(arryb,c_num,3,diam,w);
             u2 = w[0]*coef[0][1] + w[1]*coef[1][1] + w[2]*coef[2][1];
             // u2 = (coef[0][1] + coef[1][1] + coef[2][1])/3.0;
             sv_coef[0] = u1; sv_coef[1] = u2;
         }
             ///////////// END WENO weight /////////

             //////// Centered biased ////////////////////
         /*
         if(debugging("cent_bias"))
         {
             // avg1 = 1.0/3.0*(coef[0][0] + coef[1][0] + coef[2][0]);
             // harmonic_mod(arrya,c_num,3,w);
             avg1 = (w[0]*coef[0][0] + w[1]*coef[1][0] + w[2]*coef[2][0]);
             u1 = minmod(coef[0][0],coef[1][0]);
             u1 = minmod(coef[2][0],u1);

             // eps = fabs(max(max(coef[0][0], coef[1][0]),coef[2][0]) -
             //            min(min(coef[0][0], coef[1][0]),coef[2][0]));
             // eps = 10.0*diam/(diam + sqr(eps));
             u1 = minmod(((1+eps)*u1), avg1);
             // u1 = minmod(((1+eps)*u1), sv_coef[0]);

             // avg2 = 1.0/3.0*(coef[0][1] + coef[1][1] + coef[2][1]);
             // harmonic_mod(arryb,c_num,3,w);
             avg2 = (w[0]*coef[0][1] + w[1]*coef[1][1] + w[2]*coef[2][1]);
             u2 = minmod(coef[0][1],coef[1][1]);
             u2 = minmod(coef[2][1],u2);
             // eps = fabs(max(max(coef[0][1], coef[1][1]),coef[2][1]) -
             //            min(min(coef[0][1], coef[1][1]),coef[2][1]));
             // eps = 10.0*diam/(diam + sqr(eps));
             u2 = minmod(((1+eps)*u2), avg2);
             // u2 = minmod(((1+eps)*u2), sv_coef[1]);
         }
         */
             //////// End Centered biased ///////////////

             ////////// Zero moment //////////////
             u0 = Lave[k];

             switch(k)
             {
             case 0:
                 dg_Dens(st2)[0] = u0;
                 dg_Dens(st2)[1] = u1;
                 dg_Dens(st2)[2] = u2;
             break;
             case 1:
                 dg_Mom(st2)[0][0] = u0;
                 dg_Mom(st2)[0][1] = u1;
                 dg_Mom(st2)[0][2] = u2;
             break;
             case 2:
                 dg_Mom(st2)[1][0] = u0;
                 dg_Mom(st2)[1][1] = u1;
                 dg_Mom(st2)[1][2] = u2;
             break;
             case 3:
                 dg_Energy(st2)[0] = u0;
                 dg_Energy(st2)[1] = u1;
                 dg_Energy(st2)[2] = u2;
             break;
             }
         }

         Dens(st2) = Dens(st);
         Mom(st2)[0] = Mom(st)[0];
         Mom(st2)[1] = Mom(st)[1];
         Energy(st2) = Energy(st);

         if(N_EQN == 4 && YES == unphysical_st_at_quadrature(tri, st2))
             tri->redo_limiting = YES;
         else
             tri->redo_limiting = NO;

}

// average of 2nd degree terms of polynomial
LOCAL void R_degree2_above_term_averageP3(
         TRI       *tri,
         Locstate  st,
         double **Lmass_matrix,
         float     *ave)
{
         float      area;
         int        i;
         
         area = Lmass_matrix[0][0];
 
         for(i = 0; i < N_EQN; i++)
             ave[i] = 0.0;
         for(i = 3; i < MAX_N_COEF; i++)
             ave[0] += dg_Dens(st)[i]*Lmass_matrix[0][i];
         ave[0] /= area;
         if(N_EQN == 1) return;
         
         for(i = 3; i < MAX_N_COEF; i++)
         {
             ave[1] += dg_Mom(st)[0][i]*Lmass_matrix[0][i];
             ave[2] += dg_Mom(st)[1][i]*Lmass_matrix[0][i];
             ave[3] += dg_Energy(st)[i]*Lmass_matrix[0][i];
         }
         for(i = 1; i < N_EQN; i++)
             ave[i] /= area;
        
         /**
         ave[0] =(dg_Dens(st)[3]*Lmass_matrix[0][3] +
                  dg_Dens(st)[4]*Lmass_matrix[0][4] +
                  dg_Dens(st)[5]*Lmass_matrix[0][5] +
                  dg_Dens(st)[6]*Lmass_matrix[0][6] +
                  dg_Dens(st)[7]*Lmass_matrix[0][7] +
                  dg_Dens(st)[8]*Lmass_matrix[0][8] +
                  dg_Dens(st)[9]*Lmass_matrix[0][9])/area;
         ave[1] =(dg_Mom(st)[0][3]*Lmass_matrix[0][3] +
                  dg_Mom(st)[0][4]*Lmass_matrix[0][4] +
                  dg_Mom(st)[0][5]*Lmass_matrix[0][5] +
                  dg_Mom(st)[0][6]*Lmass_matrix[0][6] +
                  dg_Mom(st)[0][7]*Lmass_matrix[0][7] +
                  dg_Mom(st)[0][8]*Lmass_matrix[0][8] +
                  dg_Mom(st)[0][9]*Lmass_matrix[0][9])/area;
         ave[2] =(dg_Mom(st)[1][3]*Lmass_matrix[0][3] +
                  dg_Mom(st)[1][4]*Lmass_matrix[0][4] +
                  dg_Mom(st)[1][5]*Lmass_matrix[0][5] +
                  dg_Mom(st)[1][6]*Lmass_matrix[0][6] +
                  dg_Mom(st)[1][7]*Lmass_matrix[0][7] +
                  dg_Mom(st)[1][8]*Lmass_matrix[0][8] +
                  dg_Mom(st)[1][9]*Lmass_matrix[0][9])/area;
         ave[3] =(dg_Energy(st)[3]*Lmass_matrix[0][3] +
                  dg_Energy(st)[4]*Lmass_matrix[0][4] +
                  dg_Energy(st)[5]*Lmass_matrix[0][5] +
                  dg_Energy(st)[6]*Lmass_matrix[0][6] +
                  dg_Energy(st)[7]*Lmass_matrix[0][7] +
                  dg_Energy(st)[8]*Lmass_matrix[0][8] +
                  dg_Energy(st)[9]*Lmass_matrix[0][9])/area;
        **/
}

LOCAL void limiting_1st_degree(
         TRI       *tri,
         TRI       *nbtri[3],
         Mid_soln  *midsoln,
         int       rk_iter)
{
         Locstate st, nbst[3], st2, nbst2[3];
         float    uave[4], nbuave[3][4];
         float    Rave[4], nbRave[3][4];
         float    Lave[4], nbLave[3][4];
         int      i, dim = 2, indx, k;
         double    *cent, *nbcent[3];
         float    rside[3], A[3][2][2], rside2[3], least_soln1[3];
         float    coef[3][2];
         float    u0, u1, u2, avg1, avg2, w[9], arrya[9], arryb[9];
         double **Lmass_matrix = tri->Lmass_matrix;
         float    eps = 0.5, diam; // 0.005, 0.05
         float    dir[3], theta[3];
         int      idir;
         static double **mat = NULL, **mat_tran, **AA, **inv;
         float    c_num[3];
         float    mid[3][2], A_edge[3][2], sv_coef[6];

         if(mat == NULL)
         {
             matrix(&(AA), 3, 3, sizeof(double));
             matrix(&(inv), 3, 3, sizeof(double));
             matrix(&(mat_tran), 3, 3, sizeof(double));
             matrix(&(mat), 3, 3, sizeof(double));
         }

         if(rk_iter == RK_STEP)
         {
             st = tri->st;
             nbst[0] = nbtri[0]->st;
             nbst[1] = nbtri[1]->st;
             nbst[2] = nbtri[2]->st;
         }
         else
         {
             st = midsoln[tri->id].st[rk_iter];
             for(i = 0; i < 3; i++)
             {
                 if(nbtri[i]->id >= 0)
                     nbst[i] = midsoln[nbtri[i]->id].st[rk_iter];                  
                 else
                     nbst[i] = nbtri[i]->st;
             }

         }

         st2 = midsoln[tri->id].st[0];

         diam = fg_diam(tri);

         uave[0] = Dens(st);
         uave[1] = Mom(st)[0];
         uave[2] = Mom(st)[1];
         uave[3] = Energy(st); 

         R_degree2_term_average(tri,st2,Rave);

         // 3 neighbor tris
         for(i = 0; i < 3; i++)
         {
             nbuave[i][0] = Dens(nbst[i]);
             nbuave[i][1] = Mom(nbst[i])[0];
             nbuave[i][2] = Mom(nbst[i])[1];
             nbuave[i][3] = Energy(nbst[i]);

             comp_mass_matrix_1st_row(MAX_N_COEF,nbtri[i],dim,fg_centroid(tri),mass_1st_row);
             R_degree2_term_average_Liu(nbtri[i],st2,mass_1st_row,nbRave[i]);
         }

         for(k = 0; k < N_EQN; k++)
         {
             Lave[k] = uave[k]-Rave[k];
             for(i = 0; i < 3; i++)
                 nbLave[i][k] = nbuave[i][k]-nbRave[i][k];
         }

         cent = fg_centroid(tri);
         for(i = 0; i < 3; i++)
             nbcent[i] = fg_centroid(nbtri[i]);

         for(i = 0; i < 3; i++)
         {
             mid[i][0] = (Coords(Point_of_tri(tri)[i])[0] + Coords(Point_of_tri(tri)[(i+1)%3])[0])*0.5;
             mid[i][1] = (Coords(Point_of_tri(tri)[i])[1] + Coords(Point_of_tri(tri)[(i+1)%3])[1])*0.5;
             A_edge[i][0] = mid[i][0] - cent[0];
             A_edge[i][1] = mid[i][1] - cent[1];
         }

         for(i = 0; i < 3; i++)
         {
             A[i][0][0] = (nbcent[i][0]-cent[0]);
             A[i][0][1] = (nbcent[i][1]-cent[1]);
             A[i][1][0] = (nbcent[(i+1)%3][0]-cent[0]);
             A[i][1][1] = (nbcent[(i+1)%3][1]-cent[1]);
             c_num[i] = cond_num(A[i]); // c_num[i] =1.0;
         }
         unify_weight(c_num, 3, w); // For center biased limiting

         for(k = 0; k < N_EQN; k++)
         {
             // linear part of polynomial
             // tri, nb0, nb1
             // tri, nb1, nb2
             // tri, nb2, nb0
             for(i = 0; i < 3; i++)
             {
                 rside[0] = nbLave[i][k] - Lave[k];
                 rside[1] = nbLave[(i+1)%3][k] - Lave[k];
                 comp_coef(A[i],rside,coef[i]);
             }

             ///////////// WENO
             for(i = 0; i < 3; i++)
             {
                 arrya[i] = coef[i][0];   
                 arryb[i] = coef[i][1];   
             }
             // WENO_mod_1(arrya, arryb, theta, 3, w);
             WENO_mod_1(arrya, arryb, c_num, 3, w);
             u1 = w[0]*coef[0][0] + w[1]*coef[1][0] + w[2]*coef[2][0];
             u2 = w[0]*coef[0][1] + w[1]*coef[1][1] + w[2]*coef[2][1];
             sv_coef[0] = u1; sv_coef[1] = u2;
             /////////// END WENO

             ///// Centered biased
	     /*
             // avg1 = 1.0/3.0*(coef[0][0] + coef[1][0] + coef[2][0]);
             // avg1 = (w[0]*coef[0][0] + w[1]*coef[1][0] + w[2]*coef[2][0]);
             u1 = minmod(coef[0][0],coef[1][0]);
             u1 = minmod(coef[2][0],u1);
             u1 = minmod(((1+eps)*u1), sv_coef[0]);

             // avg2 = 1.0/3.0*(coef[0][1] + coef[1][1] + coef[2][1]);
             // avg2 = (w[0]*coef[0][1] + w[1]*coef[1][1] + w[2]*coef[2][1]);
             u2 = minmod(coef[0][1],coef[1][1]);
             u2 = minmod(coef[2][1],u2);
             u2 = minmod(((1+eps)*u2), sv_coef[1]);
	     */
             ///// End Centered biased

             /////// limit by edge center values
             // limit_by_edge_cent_val(coef,A_edge,rside);
             // u1 = rside[0]; u2 = rside[1];
             /////// END of limit by edge center values

             ////////// Zero moment //////////////
             u0 = Lave[k];

             switch(k)
             {
             case 0:
                 dg_Dens(st2)[0] = u0;
                 dg_Dens(st2)[1] = u1;
                 dg_Dens(st2)[2] = u2;
             break;
             case 1:
                 dg_Mom(st2)[0][0] = u0;
                 dg_Mom(st2)[0][1] = u1;
                 dg_Mom(st2)[0][2] = u2;
             break;
             case 2:
                 dg_Mom(st2)[1][0] = u0;
                 dg_Mom(st2)[1][1] = u1;
                 dg_Mom(st2)[1][2] = u2;
             break;
             case 3:
                 dg_Energy(st2)[0] = u0;
                 dg_Energy(st2)[1] = u1;
                 dg_Energy(st2)[2] = u2;
             break;
             }
         }

         Dens(st2) = Dens(st);
         Mom(st2)[0] = Mom(st)[0];
         Mom(st2)[1] = Mom(st)[1];
         Energy(st2) = Energy(st);

         if(N_EQN == 4 && YES == unphysical_st_at_quadrature(tri, st2))
             tri->redo_limiting = YES;

         /**
         if(overshoot_state(st2,tri))
         {
             build_1st_order_poly(tri,nbtri,midsoln,uave,nbuave,rk_iter);
         }
         **/

         // TMP
         debug_flag = NO;
}

// solve Ax = b, 2 by 2 system
EXPORT void comp_coef(
	float  A[ ][2],
        float  *rside,
        float  *x)
{
        /**
        float  tmp;
        tmp = A[0][0]*A[1][1] - A[0][1]*A[1][0];
        x[0] = (rside[0]*A[1][1] - rside[1]*A[0][1])/tmp;
        x[1] = (rside[1]*A[0][0] - rside[0]*A[1][0])/tmp;
        **/
        // Gauss eli
        if(fabs(A[0][0]) > fabs(A[1][0]))
        {
            x[1] = (-A[1][0]/A[0][0]*rside[0] + rside[1])/(A[1][1] - A[1][0]/A[0][0]*A[0][1]); 
            x[0] = (rside[0] - A[0][1]*x[1])/A[0][0];
        }
        else
        {
            x[1] = (-A[0][0]/A[1][0]*rside[1] + rside[0])/(A[0][1] - A[0][0]/A[1][0]*A[1][1]); 
            x[0] = (rside[1] - A[1][1]*x[1])/A[1][0];
        }
}

// For periodic boundary on both sides.
// 
LOCAL void update_db_Mach_buffer(
        Front    *fr,
        Mid_soln *midsoln,
        int      rk_step)
{
        RECT_GRID *gr = fr->rect_grid;
        float     *L = gr->L, *U = gr->U;
        float     crsp_cent[3], diam = 0.0, l[MAXD],u[MAXD];
        TRI       *tri, *crsp_tri;
        SURFACE   **surf;
        int       dim = 2, in_buf, dir;
        double     *cent; 
        size_t    sizest = fr->sizest;
        PP_GRID    *pp_grid = fr->pp_grid;
        int         myid;
        int         me[MAXD];
        int         i,side;
        static int  first = YES;
        static double extra_buf;


        if(first == YES)
        {
            for(surf = fr->mesh->surfaces; surf && *surf; surf++)
            {
                for (tri = first_tri(*surf);
                     !at_end_of_tri_list(tri,*surf); tri = tri->next) 
                {
                    diam = fg_length_side(tri)[0];  
                    break;
                }
            }
            pp_global_max(&diam, 1);
            extra_buf = 2.0*diam;
            first = NO;
        }

        myid = pp_mynode();
        dim = fr->rect_grid->dim;
        find_Cartesian_coordinates(myid,pp_grid,me);

        for(dir = 0; dir < dim; dir++)
        {
            for (side = 0; side < 2; ++side)
            {
                pp_gsync();
                pp_send_interior_fields(me,dir,side,extra_buf,fr,midsoln,rk_step);
                pp_receive_interior_fields(me,dir,(side+1)%2,extra_buf,fr,midsoln,rk_step);
            }
        }

}

EXPORT void update_buffer(
        Front    *fr,
        Mid_soln *midsoln,
        int      rk_step)
{
        RECT_GRID *gr = fr->rect_grid;
        float     *L = gr->L, *U = gr->U;
        float     crsp_cent[3];
        TRI       *tri, *crsp_tri;
        SURFACE   **surf;
        BDRY_SIDE side;
        int       dim = 2, i, in_buf;
        double     *cent;
        size_t    sizest = fr->sizest;
 
        // First update left and right side 
        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf);
                 !at_end_of_tri_list(tri,*surf); tri = tri->next) 
            {
                if(tri->BC_type != SUBDOMAIN)
                    continue;

                in_buf = NO;
                cent = fg_centroid(tri); 
                if((L[0] > cent[0] &&  L[1] < cent[1] && U[1] > cent[1]))
                {
                    in_buf = YES;
                    side = LEFT_BDRY;
                }
                if((U[0] < cent[0] &&  L[1] < cent[1] && U[1] > cent[1])
                  )
                {
                    in_buf = YES;
                    side = RIGHT_BDRY;
                }
                
                if(in_buf == YES)
                {
                    // TMP
                    // printf("\n\n----------entered  update_buffer\n");
                    // print_tri(tri,fr->mesh);

                    cent = fg_centroid(tri); 
                    switch(side)
                    {
                    case LEFT_BDRY:
                        crsp_cent[0] = cent[0] + U[0]-L[0];
                        crsp_cent[1] = cent[1];
                    break;
                    case RIGHT_BDRY:
                        crsp_cent[0] = cent[0] - (U[0]-L[0]);
                        crsp_cent[1] = cent[1];
                    break;  
                    }

                    crsp_tri = find_corres_tri(crsp_cent,fr->mesh);
                     
                    if(rk_step == RK_STEP)
                        assign(tri->st, crsp_tri->st,sizest);
                    else
                        assign(midsoln[tri->id].st[rk_step],
                           midsoln[crsp_tri->id].st[rk_step],sizest);
                    /**
                    printf("tri center[%g %g], corresp cent[%g %g], tri = %d\n",
                          cent[0], cent[1], crsp_cent[0], crsp_cent[1], crsp_tri);
                    print_tri(crsp_tri,fr->mesh);
                    printf("print buffered st\n");
                    g_verbose_print_state(midsoln[tri->id].st[rk_step]);
                    printf("EXIT in update_buffer()\n");
                    clean_up(0);
                    **/
                }
            }
        }

        // update upper and lower side
        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf);
                 !at_end_of_tri_list(tri,*surf); tri = tri->next)
            {
                if(tri->BC_type != SUBDOMAIN)
                    continue;
                in_buf = NO;
                cent = fg_centroid(tri);
                if((L[1] > cent[1])
                  )
                {
                    in_buf = YES;
                    side = LOWER_BDRY;
                }
                if((U[1] < cent[1])
                  )
                {
                    in_buf = YES;
                    side = UPPER_BDRY;
                }
                if(in_buf == YES)
                {
                    cent = fg_centroid(tri);
                    switch(side)
                    {
                    case LOWER_BDRY:
                        crsp_cent[0] = cent[0];
                        crsp_cent[1] = cent[1] + U[1]-L[1];
                    break;
                    case UPPER_BDRY:
                        crsp_cent[0] = cent[0];
                        crsp_cent[1] = cent[1] - (U[1]-L[1]);
                    break;
                    }
                    crsp_tri = find_corres_tri(crsp_cent,fr->mesh);

                    // TMP
                    /**
                    if(tri->id == 234)
                    {
                        printf("In update_buffer, tri (%d) receive state from tri(%d) at iter %d\n",
                                  tri->id, crsp_tri->id, rk_step);
                        print_general_vector("Tri_pt", Coords(Point_of_tri(tri)[0]), dim, "\n");
                        print_general_vector("Tri_pt", Coords(Point_of_tri(tri)[1]), dim, "\n");
                        print_general_vector("Tri_pt", Coords(Point_of_tri(tri)[2]), dim, "\n");
                        printf("tri(%d) crds\n", crsp_tri->id);
                        print_general_vector("Tri_pt", Coords(Point_of_tri(crsp_tri)[0]), dim, "\n");
                        print_general_vector("Tri_pt", Coords(Point_of_tri(crsp_tri)[1]), dim, "\n");
                        print_general_vector("Tri_pt", Coords(Point_of_tri(crsp_tri)[2]), dim, "\n");
                    }
                    **/

                    if(rk_step == RK_STEP)
                        assign(tri->st,crsp_tri->st,sizest);
                    else
                        assign(midsoln[tri->id].st[rk_step],
                           midsoln[crsp_tri->id].st[rk_step],sizest);
                }
           } 
       }
}

//  periodic boundary in X direction.
//  reflecting boundary in Y direction.
EXPORT void update_buffer_x_per_y_ref(
        Front    *fr,
        Mid_soln *midsoln,
        int      rk_step)
{
        RECT_GRID *gr = fr->rect_grid;
        float     *L = gr->L, *U = gr->U;
        float     crsp_cent[3], dbcent[2];
        TRI       *tri, *crsp_tri;
        SURFACE   **surf;
        BDRY_SIDE side;
        int       dim = 2, i, in_buf;
        double     *cent;
        size_t    sizest = fr->sizest;
        float     nor[2] = {0.0, 1.0}, pt[2];
 
        // First update left and right side 
        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf);
                 !at_end_of_tri_list(tri,*surf); tri = tri->next) 
            {
                in_buf = NO;
                cent = fg_centroid(tri); 
                if((L[0] > cent[0] &&  L[1] < cent[1] && U[1] > cent[1])
                  )
                {
                    in_buf = YES;
                    side = LEFT_BDRY;
                }
                if((U[0] < cent[0] &&  L[1] < cent[1] && U[1] > cent[1])
                  )
                {
                    in_buf = YES;
                    side = RIGHT_BDRY;
                }
                
                if(in_buf == YES)
                {
                    // TMP
                    // printf("\n\n----------entered  update_buffer\n");
                    // print_tri(tri,fr->mesh);

                    cent = fg_centroid(tri); 
                    switch(side)
                    {
                    case LEFT_BDRY:
                        crsp_cent[0] = cent[0] + U[0]-L[0];
                        crsp_cent[1] = cent[1];
                    break;
                    case RIGHT_BDRY:
                        crsp_cent[0] = cent[0] - (U[0]-L[0]);
                        crsp_cent[1] = cent[1];
                    break;  
                    }

                    crsp_tri = find_corres_tri(crsp_cent,fr->mesh);
                     
                    if(rk_step == RK_STEP)
                        assign(tri->st, crsp_tri->st,sizest);
                    else
                        assign(midsoln[tri->id].st[rk_step],
                           midsoln[crsp_tri->id].st[rk_step],sizest);

                    /**
                    if(tri->id == 589 || tri->id == 635)
                    {
                        printf("--------------------------\n");
                        printf("In  update_buffer_x_per_y_ref\n");
                        printf("Tri(%d) center[%g %g] copy from corresp Tri (%d) cent[%g %g]\n",
                          tri->id, cent[0], cent[1], crsp_tri->id, crsp_cent[0], crsp_cent[1]);
                        printf("print buffered st\n");
                        if(rk_step == RK_STEP)
                            g_verbose_print_state(tri->st);
                        else
                            g_verbose_print_state(midsoln[tri->id].st[rk_step]);
                    }
                    **/
                    /**
                    printf("tri center[%g %g], corresp cent[%g %g], tri = %d\n",
                          cent[0], cent[1], crsp_cent[0], crsp_cent[1], crsp_tri);
                    print_tri(crsp_tri,fr->mesh);
                    printf("print buffered st\n");
                    g_verbose_print_state(midsoln[tri->id].st[rk_step]);
                    printf("EXIT in update_buffer()\n");
                    clean_up(0);
                    **/
                }
            }
        }

        // update upper and lower side by reflecting
        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf);
                 !at_end_of_tri_list(tri,*surf); tri = tri->next)
            {
                in_buf = NO;
                cent = fg_centroid(tri);
                if((L[1] > cent[1])
                  )
                {
                    in_buf = YES;
                    side = LOWER_BDRY;
                }
                if((U[1] < cent[1])
                  )
                {
                    in_buf = YES;
                    side = UPPER_BDRY;
                }
                if(in_buf == YES)
                {
                    cent = fg_centroid(tri);
                    switch(side)
                    {
                    case LOWER_BDRY:
                        crsp_cent[0] = cent[0];
                        // crsp_cent[1] = cent[1] + U[1]-L[1];
                        crsp_cent[1] = L[1]*2.0 - cent[1];
                        pt[0] = crsp_cent[0];
                        pt[1] = L[1];
                    break;
                    case UPPER_BDRY:
                        crsp_cent[0] = cent[0];
                        // crsp_cent[1] = U[1]-L[1] - cent[1];
                        crsp_cent[1] = U[1]*2.0 - cent[1];
                        pt[0] = crsp_cent[0];
                        pt[1] = U[1];
                    break;
                    }
                    crsp_tri = find_corres_tri(crsp_cent,fr->mesh);

                    // TMP
                    if(crsp_tri == NULL)
                    {
                        printf("ERROR: update_buffer_x_per_y_ref\n");
                        printf("Update y_ref buffer failed\n");
                        // print_general_vector("Tri_center", cent, dim, "\n");
                        // print_general_vector("Crsp Tri_center", crsp_cent, dim, "\n");
                        clean_up(ERROR);
                    }

                    if(rk_step == RK_STEP)
                    {
                        assign(tri->st,crsp_tri->st,sizest);
                        for(i = 0; i < dim; i++) dbcent[i] = cent[i];
                        reflect_state(tri->st,fr->interf,dbcent,pt,nor);
                    }
                    else
                    {
                        assign(midsoln[tri->id].st[rk_step],
                           midsoln[crsp_tri->id].st[rk_step],sizest);
                        for(i = 0; i < dim; i++) dbcent[i] = cent[i];
                        reflect_state(midsoln[tri->id].st[rk_step],fr->interf,dbcent,pt,nor);
                    }
                }
           } 
       }
}

//  reflecting boundary in X direction.
EXPORT void update_buffer_x_ref(
        Front    *fr,
        Mid_soln *midsoln,
        int      rk_step)
{
        RECT_GRID *gr = fr->rect_grid;
        float     *L = gr->L, *U = gr->U;
        float     crsp_cent[3], dbcent[2];
        TRI       *tri, *crsp_tri;
        SURFACE   **surf;
        BDRY_SIDE side;
        int       dim = 2, i, in_buf;
        double     *cent;
        size_t    sizest = fr->sizest;
        float     nor[2] = {1.0, 0.0}, pt[2];
 
        // Update left and right side 
        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf);
                 !at_end_of_tri_list(tri,*surf); tri = tri->next) 
            {
                if(tri->BC_type != SUBDOMAIN)
                    continue;
                in_buf = NO;
                cent = fg_centroid(tri); 

                if((L[0] > cent[0] &&  L[1] < cent[1] && U[1] > cent[1])
                  )
                {
                    in_buf = YES;
                    side = LEFT_BDRY;
                }
                if((U[0] < cent[0] &&  L[1] < cent[1] && U[1] > cent[1])
                  )
                {
                    in_buf = YES;
                    side = RIGHT_BDRY;
                }
                
                if(in_buf == YES)
                {
                    cent = fg_centroid(tri); 
                    switch(side)
                    {
                    case LEFT_BDRY:
                        crsp_cent[0] = L[0]*2.0 - cent[0]; 
                        crsp_cent[1] = cent[1];
                        pt[1] = crsp_cent[1];
                        pt[0] = L[0];
                    break;
                    case RIGHT_BDRY:
                        crsp_cent[0] = U[0]*2.0 - cent[0];
                        crsp_cent[1] = cent[1];
                        pt[1] = crsp_cent[1];
                        pt[0] = U[0];
                    break;  
                    }

                    crsp_tri = find_corres_tri(crsp_cent,fr->mesh);

                    if(crsp_tri == NULL)
                    {
                        printf("ERROR: update_buffer_x_ref()\n");
                        printf("tri[%d] can not find crsp\n", tri->id);
                        print_tri_crds(tri);
                        clean_up(ERROR);
                    }
                     
                    if(rk_step == RK_STEP)
                    {
                        assign(tri->st, crsp_tri->st,sizest);
                        for(i = 0; i < dim; i++) dbcent[i] = cent[i];
                        // reflect_state(tri->st,fr->interf,dbcent,pt,nor);
                        reflect_state_about_y(tri->st, tri->Lmass_matrix);
                    }
                    else
                    {
                        assign(midsoln[tri->id].st[rk_step],
                           midsoln[crsp_tri->id].st[rk_step],sizest);
                        for(i = 0; i < dim; i++) dbcent[i] = cent[i];
                        // reflect_state(midsoln[tri->id].st[rk_step],fr->interf,dbcent,pt,nor);
                        reflect_state_about_y(midsoln[tri->id].st[rk_step], tri->Lmass_matrix);
                    }

                    /**
                    if(tri->id == 589 || tri->id == 635)
                    {
                        printf("--------------------------\n");
                        printf("In  update_buffer_x_per_y_ref\n");
                        printf("Tri(%d) center[%g %g] copy from corresp Tri (%d) cent[%g %g]\n",
                          tri->id, cent[0], cent[1], crsp_tri->id, crsp_cent[0], crsp_cent[1]);
                        printf("print buffered st\n");
                        if(rk_step == RK_STEP)
                            g_verbose_print_state(tri->st);
                        else
                            g_verbose_print_state(midsoln[tri->id].st[rk_step]);
                    }
                    **/
                    /**
                    printf("tri center[%g %g], corresp cent[%g %g], tri = %d\n",
                          cent[0], cent[1], crsp_cent[0], crsp_cent[1], crsp_tri);
                    print_tri(crsp_tri,fr->mesh);
                    printf("print buffered st\n");
                    g_verbose_print_state(midsoln[tri->id].st[rk_step]);
                    printf("EXIT in update_buffer()\n");
                    clean_up(0);
                    **/
                }
            }
        }
}

//  Periodic boundary condition in X direction.
EXPORT void update_buffer_x_peri(
        Front    *fr,
        Mid_soln *midsoln,
        int      rk_step)
{
        RECT_GRID *gr = fr->rect_grid;
        float     *L = gr->L, *U = gr->U;
        float     crsp_cent[3], dbcent[2];
        TRI       *tri, *crsp_tri;
        SURFACE   **surf;
        BDRY_SIDE side;
        int       dim = 2, i, in_buf;
        double     *cent;
        size_t    sizest = fr->sizest;
        float     nor[2] = {1.0, 0.0}, pt[2];
 
        // First update left and right side 
        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf);
                 !at_end_of_tri_list(tri,*surf); tri = tri->next) 
            {
                in_buf = NO;
                cent = fg_centroid(tri); 
                if((L[0] > cent[0] &&  L[1] < cent[1] && U[1] > cent[1])
                  )
                {
                    in_buf = YES;
                    side = LEFT_BDRY;
                }
                if((U[0] < cent[0] &&  L[1] < cent[1] && U[1] > cent[1])
                  )
                {
                    in_buf = YES;
                    side = RIGHT_BDRY;
                }
                
                if(in_buf == YES)
                {
                    // TMP
                    // printf("\n\n----------entered  update_buffer\n");
                    // print_tri(tri,fr->mesh);

                    cent = fg_centroid(tri); 
                    switch(side)
                    {
                    case LEFT_BDRY:
                        crsp_cent[0] = cent[0] + U[0]-L[0];
                        crsp_cent[1] = cent[1];
                        /**
                        crsp_cent[0] = L[0]*2.0 - cent[0]; 
                        crsp_cent[1] = cent[1];
                        pt[1] = crsp_cent[1];
                        pt[0] = L[0];
                        **/
                    break;
                    case RIGHT_BDRY:
                        crsp_cent[0] = cent[0] - (U[0]-L[0]);
                        crsp_cent[1] = cent[1];
                        /**
                        crsp_cent[0] = U[0]*2.0 - cent[0];
                        crsp_cent[1] = cent[1];
                        pt[1] = crsp_cent[1];
                        pt[0] = U[0];
                        **/
                    break;  
                    }

                    crsp_tri = find_corres_tri(crsp_cent,fr->mesh);
                     
                    if(rk_step == RK_STEP)
                    {
                        assign(tri->st, crsp_tri->st,sizest);
                        
                        // for(i = 0; i < dim; i++) dbcent[i] = cent[i];
                        // reflect_state(tri->st,fr->interf,dbcent,pt,nor);
                    }
                    else
                    {
                        assign(midsoln[tri->id].st[rk_step],
                           midsoln[crsp_tri->id].st[rk_step],sizest);
                        // for(i = 0; i < dim; i++) dbcent[i] = cent[i];
                        // reflect_state(midsoln[tri->id].st[rk_step],fr->interf,dbcent,pt,nor);
                    }

                    /**
                    if(tri->id == 589 || tri->id == 635)
                    {
                        printf("--------------------------\n");
                        printf("In  update_buffer_x_per_y_ref\n");
                        printf("Tri(%d) center[%g %g] copy from corresp Tri (%d) cent[%g %g]\n",
                          tri->id, cent[0], cent[1], crsp_tri->id, crsp_cent[0], crsp_cent[1]);
                        printf("print buffered st\n");
                        if(rk_step == RK_STEP)
                            g_verbose_print_state(tri->st);
                        else
                            g_verbose_print_state(midsoln[tri->id].st[rk_step]);
                    }
                    **/
                }
            }
        }
}


LOCAL int tri_cent_outside(
        TRI  *tri,
        float  *L,
        float  *U,
        int    dim,
        BDRY_SIDE  *side)
{
        int    i;
        double  *cent = fg_centroid(tri);
        for (i = 0; i < dim; ++i) 
        {
            if(L[i] - cent[i] > 0.0)
            {
                if(i == 0)
                    *side = LEFT_BDRY;
                else
                    *side = LOWER_BDRY;
                return YES;
            }
            if(U[i] - cent[i] < 0.0)
            {  
                if(i == 0)
                    *side = RIGHT_BDRY;
                else
                    *side = UPPER_BDRY;
                return YES;  
            }
        }
        return NO;
}


LOCAL void adv_fw(
        TRI      *tri, 
        TRI      *otri,
        float    dt,
        float    alpha,
        Mid_soln *mid_soln,
        int      rk_iter,
        Front    *fr)
{
        TRI      *nbtri[3];
        int      dim = 2, indx, side, i;
        float    i_integr[4], eint0[4], eint1[4], eint2[4], *peint[3];
        double  rhs[4][MAX_N_COEF], mulrhs[4][MAX_N_COEF]; // 4 governingeqn, 6 coeff for poly. 
        double    *cent = fg_centroid(otri);
        Locstate soln, st0;
        Locstate prev_st, prev_nbst;
        int              is_neighbr = NO;
        double      **mass_inv, **Lmass_matrix;
        int              nbside;
        static int first = YES; 

        // TMP
        // if(tri->id == 304  || YES == find_tri(cent))
        // if(tri->id == 110 || YES == find_tri_from_ver(Coords(Point_of_tri(otri)[0]),
        //             Coords(Point_of_tri(otri)[1]), Coords(Point_of_tri(otri)[2]))||
        //    tri->id == 111 )
        /*
        if(YES == find_tri(cent))
        {
            float tcent[3], con_u[4];
            printf("\n\n**********Entered adv_fw() Triangle(%d)"
              " ceontriod (%g, %g), iteration %d:\n",
               tri->id, cent[0], cent[1], rk_iter);
            debug_flag = YES;
            if(rk_iter == 0)
            {
                // print_tri_crds(otri);
                verbose_print_state("tri state",otri->st);
            }
	    else
                verbose_print_state("state",mid_soln[otri->id].st[rk_iter]);
        }
        */

        for(i = 0; i < 3; i++)
            nbtri[i] = Tri_on_side(otri,i);

        if(otri->BC_type == SUBDOMAIN)
        {
            debug_flag = NO;
            return; 
        }

        if(Boundary_tri(otri) || tri_on_phy_bdry(tri))
        {
            if (NO == bdry_tri_adv_fw(tri,otri,fr))
                return;
            is_neighbr = YES;
        }

        // TMP
        /*
        if(debug_flag == YES)
        {
            if(nbtri[0] != NULL)
            { 
                printf("tri(%d) on side 0, ceontriod (%g %g)\n", nbtri[0]->id,
                       fg_centroid(nbtri[0])[0],  fg_centroid(nbtri[0])[1]);
                if(rk_iter == 0)
                    verbose_print_state("state",nbtri[0]->st);
                else
                    verbose_print_state("state",mid_soln[nbtri[0]->id].st[rk_iter]);
            }
            if(nbtri[1] != NULL)
            {
                printf("tri(%d) on side 1, ceontriod (%g %g)\n", nbtri[1]->id,
                       fg_centroid(nbtri[1])[0],  fg_centroid(nbtri[1])[1]);
                if(rk_iter == 0)
                    verbose_print_state("state",nbtri[1]->st);
                else
                    verbose_print_state("state",mid_soln[nbtri[1]->id].st[rk_iter]);
            }
            if(nbtri[2] != NULL)
            { 
                printf("tri(%d) on side 2, ceontriod (%g %g)\n", nbtri[2]->id,
                       fg_centroid(nbtri[2])[0],  fg_centroid(nbtri[2])[1]);
                if(rk_iter == 0)
                    verbose_print_state("state",nbtri[2]->st);
                else
                    verbose_print_state("state",mid_soln[nbtri[2]->id].st[rk_iter]);
            }
        }
        */

        if(rk_iter == RK_STEP-1)
            soln = tri->st;
        else
            soln = mid_soln[otri->id].st[rk_iter+1];

        if(rk_iter == 0)
            prev_st = otri->st; 
        else
            prev_st = mid_soln[otri->id].st[rk_iter]; 

        Set_params(soln,otri->st);
        set_type_of_state(soln,state_type(otri->st)); 

        peint[0] = eint0; 
        peint[1] = eint1; 
        peint[2] = eint2; 

        Lmass_matrix = otri->Lmass_matrix;
        mass_inv = otri->mass_inv;

        /**
        if(debug_flag == YES && first == YES)
        {
            // double **tmpinv;
            first = NO;
            // matrix(&(tmpinv), 10, 10, sizeof(double));
            // print_ldb_matrix("mass_matrix:",MAX_N_COEF, MAX_N_COEF, Lmass_matrix," %15.14e");
            print_ldb_matrix("inverse mass_matrix:",MAX_N_COEF, MAX_N_COEF, mass_inv," %15.14e");
            // inverse_matrix(Lmass_matrix,MAX_N_COEF,tmpinv);
            // print_ldb_matrix("tmp inverse mass_matrix:",MAX_N_COEF, MAX_N_COEF, tmpinv," %15.14e");
            // free(tmpinv);
            // matrix_matrix_mult(mass_inv, Lmass_matrix, MAX_N_COEF, MAX_N_COEF, iden);
            // print_ldb_matrix("Inverse by matrix_inv:", MAX_N_COEF, MAX_N_COEF, tmpm," %22.20Lg");
        }
        if(first == YES)
        {
            printf("\n\n**********Entered adv_fw() Triangle(%d)"
              " ceontriod (%20.19Lg, %20.19Lg), iteration %d:\n",
               tri->id, cent[0], cent[1], rk_iter);
            print_tri_crds(otri);
            print_ldb_matrix("mass_matrix:",MAX_N_COEF, MAX_N_COEF, Lmass_matrix," %13.12Lg");
            print_ldb_matrix("Inverse by matrix_inv:", MAX_N_COEF, MAX_N_COEF, mass_inv," %13.12Lg");
            first = NO;
        }
        **/

        for(indx = 0; indx < MAX_N_COEF; indx++)
        {
            // Compute interior integral
            // inter_integr_center(otri,prev_st,indx,i_integr,rk_iter);
            if(MAX_N_COEF == 6)
                inter_integr(otri,prev_st,indx,i_integr,rk_iter);
            else
                inter_integr_13_quad(otri,prev_st,indx,i_integr,rk_iter);

            for(i = 0; i < N_EQN; i++)
                rhs[i][indx] = i_integr[i];
	    /////////////////// TMP
            /*
            if(debug_flag == YES && indx == 0)
            {
                printf("inter mom[0]-flux integr  %20.19g\n", i_integr[1]); 
            }	
            */
     
            // Compute edge integral
            for(side = 0; side < 3; side++)
            {
                if(nbtri[side] != NULL)
                {
                    if(rk_iter == 0)
                        prev_nbst = nbtri[side]->st;
                    else
                        prev_nbst = mid_soln[nbtri[side]->id].st[rk_iter];
                }
                else
                    prev_nbst = NULL;

                edge_integr(otri, nbtri[side], prev_st,prev_nbst,
                    alpha, side, indx, peint[side],fr,dt,mid_soln,rk_iter);
		//////// TMP
                /*
                if(debug_flag == YES && indx == 0)
		{
                    printf("side[%d] mom[0]-edge-flux %20.19g\n", side, peint[side][1]); 
		}
                */
            }
 
            for(i = 0; i < N_EQN; i++)
            {
                for(side = 0; side < 3; side++)
                {
                    rhs[i][indx] -= peint[side][i];
                }
            }
        }

        // save computed flux for the neighbr and reset flags
        for(side = 0; side < 3; side++)
        {
            if(fg_sf_flag(otri)[side] == YES)
                fg_sf_flag(otri)[side] = NO;
        }

        //////// TMP
        /*
        if(debug_flag == YES)
        {
            printf("At iteration %d\n", rk_iter);
            // printf("Dens_ RHS %15.14Lg %15.14Lg %15.14Lg %15.14Lg %15.14Lg %15.14Lg\n",
            //            rhs[0][0], rhs[0][1], rhs[0][2], rhs[0][3], rhs[0][4], rhs[0][5]);
            print_general_vector("Dens_ RHS", rhs[0], MAX_N_COEF, "\n");    
            print_general_vector("mom(0)RHS", rhs[1], MAX_N_COEF, "\n");    
            print_general_vector("mom(1)RHS", rhs[2], MAX_N_COEF, "\n");    
            print_general_vector("EnergyRHS", rhs[3], MAX_N_COEF, "\n");    
        }
        */

        for(i = 0; i < N_EQN; i++)
            matrix_vec_mult(mass_inv, rhs[i], MAX_N_COEF, MAX_N_COEF, mulrhs[i]); 

        //////// TMP
        /*
        if(debug_flag == YES)
        {
            printf("multiply RHS\n");
            print_general_vector("Dens_ MULRHS", mulrhs[0], MAX_N_COEF, "\n");    
            print_general_vector("mom(0)MULRHS", mulrhs[1], MAX_N_COEF, "\n");    
            print_general_vector("mom(1)MULRHS", mulrhs[2], MAX_N_COEF, "\n");    
            print_general_vector("EnergyMULRHS", mulrhs[3], MAX_N_COEF, "\n");    
        }
        */

        if(rk_iter == 0)
        {
            for(indx = 0; indx < MAX_N_COEF; indx++)
            {
                dg_Dens(soln)[indx] = dg_Dens(prev_st)[indx] + dt*mulrhs[0][indx];
                dg_Mom(soln)[0][indx] = dg_Mom(prev_st)[0][indx] + dt*mulrhs[1][indx];
                dg_Mom(soln)[1][indx] = dg_Mom(prev_st)[1][indx] + dt*mulrhs[2][indx];
                dg_Energy(soln)[indx] = dg_Energy(prev_st)[indx] + dt*mulrhs[3][indx];
            }
        }

        if(RK_STEP == 2)
        {
            if(rk_iter == 1)
            {
                st0 = otri->st;
                for(indx = 0; indx < MAX_N_COEF; indx++)
                {
                    dg_Dens(soln)[indx] = 0.5*dg_Dens(st0)[indx] +
                          0.5*dg_Dens(prev_st)[indx] + 0.5*dt*mulrhs[0][indx];
                    dg_Mom(soln)[0][indx] = 0.5*dg_Mom(st0)[0][indx] +
                          0.5*dg_Mom(prev_st)[0][indx] + 0.5*dt*mulrhs[1][indx];
                    dg_Mom(soln)[1][indx] = 0.5*dg_Mom(st0)[1][indx] +
                          0.5*dg_Mom(prev_st)[1][indx] + 0.5*dt*mulrhs[2][indx];
                    dg_Energy(soln)[indx] = 0.5*dg_Energy(st0)[indx] +
                          0.5*dg_Energy(prev_st)[indx] + 0.5*dt*mulrhs[3][indx];
                }
            }
        }
        else
        {
            if(rk_iter == 1)
            {
                st0 = otri->st;
                for(indx = 0; indx < MAX_N_COEF; indx++)
                {
                    dg_Dens(soln)[indx] = 0.75*dg_Dens(st0)[indx] +
                          0.25*dg_Dens(prev_st)[indx] + 0.25*dt*mulrhs[0][indx];
                    dg_Mom(soln)[0][indx] = 0.75*dg_Mom(st0)[0][indx] +
                          0.25*dg_Mom(prev_st)[0][indx] + 0.25*dt*mulrhs[1][indx];
                    dg_Mom(soln)[1][indx] = 0.75*dg_Mom(st0)[1][indx] +
                          0.25*dg_Mom(prev_st)[1][indx] + 0.25*dt*mulrhs[2][indx];
                    dg_Energy(soln)[indx] = 0.75*dg_Energy(st0)[indx] +
                          0.25*dg_Energy(prev_st)[indx] + 0.25*dt*mulrhs[3][indx];
                }
            }
            if(rk_iter == 2)
            {
                st0 = otri->st;
                for(indx = 0; indx < MAX_N_COEF; indx++)
                {
                    dg_Dens(soln)[indx] = 1.0/3.0*dg_Dens(st0)[indx] +
                          2.0/3.0*dg_Dens(prev_st)[indx] + 2.0/3.0*dt*mulrhs[0][indx];
                    dg_Mom(soln)[0][indx] = 1.0/3.0*dg_Mom(st0)[0][indx] +
                          2.0/3.0*dg_Mom(prev_st)[0][indx] + 2.0/3.0*dt*mulrhs[1][indx];
                    dg_Mom(soln)[1][indx] = 1.0/3.0*dg_Mom(st0)[1][indx] +
                          2.0/3.0*dg_Mom(prev_st)[1][indx] + 2.0/3.0*dt*mulrhs[2][indx];
                    dg_Energy(soln)[indx] = 1.0/3.0*dg_Energy(st0)[indx] +
                          2.0/3.0*dg_Energy(prev_st)[indx] + 2.0/3.0*dt*mulrhs[3][indx];
                }
            }
        }

        // Compute average soln    
        Dens(soln) = 0.0;
        Mom(soln)[0] = 0.0;
        Mom(soln)[1] = 0.0;
        Energy(soln) = 0.0;
        for(indx = 0; indx < MAX_N_COEF; indx++)
        {
            Dens(soln) += dg_Dens(soln)[indx]*Lmass_matrix[0][indx];
            Mom(soln)[0] += dg_Mom(soln)[0][indx]*Lmass_matrix[0][indx];
            Mom(soln)[1] += dg_Mom(soln)[1][indx]*Lmass_matrix[0][indx];
            Energy(soln) += dg_Energy(soln)[indx]*Lmass_matrix[0][indx];
        }
        Dens(soln) /= Lmass_matrix[0][0];
        Mom(soln)[0] /= Lmass_matrix[0][0];
        Mom(soln)[1] /= Lmass_matrix[0][0];
        Energy(soln) /= Lmass_matrix[0][0];

        if(debug_flag == YES)
        {
            float con_u[4], tcent[3];
            for(i = 0; i < 3; i++)
                tcent[i] = cent[i];
            
            // print_general_vector("Tri_center", cent, dim, "\n");
            // print_general_vector("Tri_pt", Coords(Point_of_tri(tri)[0]), dim, "\n");
            // print_general_vector("Tri_pt", Coords(Point_of_tri(tri)[1]), dim, "\n");
            // print_general_vector("Tri_pt", Coords(Point_of_tri(tri)[2]), dim, "\n");
            printf("\n**********************************\n");
            printf("New soln of TRI (%d) at iteration %d:", tri->id, rk_iter);
            g_verbose_print_state(soln);

            // printf("print input tri's states\n");
            // g_verbose_print_state(prev_st);
            // printf("print neighbr states\n");
            // g_verbose_print_state(mid_soln[nbtri[0]->id].st[rk_iter]);
            // g_verbose_print_state(mid_soln[nbtri[1]->id].st[rk_iter]);
            // g_verbose_print_state(mid_soln[nbtri[2]->id].st[rk_iter]);

            // print_mass_matrix();
            // printf("\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\n");
            // print_inverse_mass();
        }

        // TMP
        debug_flag = NO;
}

LOCAL int bdry_tri_adv_fw(
        TRI      *tri, 
        TRI      *otri,
        Front    *fr)
{
        // static float  **Lmass_matrix = NULL, **mass_inv;
        TRI      *nbtri[3];
        int      dim = 2, indx, side, i;
        double    *cent = fg_centroid(otri);

        if(debug_flag == YES)
            printf("\n\n**********Entered  bdry_tri_adv_fw() Triangle(%d), BC type %d\n",
                           tri->id,  tri->BC_type);

        if(otri->BC_type != NEUMANN &&
           otri->BC_type != IN_FLOW &&
           otri->BC_type != OUT_FLOW &&
           otri->BC_type != CONST_P)
        {
            return NO;
        }
        else
           return YES;
 
}


LOCAL void edge_integr(
	TRI      *tri,
        TRI      *nbtri,
        Locstate st,
        Locstate nbst,
        float    alpha,
        int      side,
        int      indx,
        float    *eint,
        Front    *fr,
        float    dt,
	Mid_soln *mid_soln,
        int      rk_iter)
{
        // static int     Gauss_N = 4;
        int     i, j, k, dim = 2;
        POINT   *p[3];	
        float   *pcrds[4], qcrds[4];
        // double   q[8], w[8];
        float   nor[3], t[3], length;
        double   *cent = fg_centroid(tri);
        double   *nbcent;
        float   fluxx[4], fluxy[4], flux[4][4], tmpflux[4][4];
        float   nbfluxx[4], nbfluxy[4];
        float   con_u[4], nbcon_u[4];
        float   tmpf[4], vh_pt, tmpfR[4];
        static  Locstate Tst = NULL, Tnbst, bdryst;
        float   tmp_alpha;
        int     debug = NO, phy_brdy_side;

        if(Tst == NULL)
        {
            g_alloc_state(&Tst, fr->sizest);
            g_alloc_state(&Tnbst, fr->sizest);
            g_alloc_state(&bdryst, fr->sizest);
            assign(Tst, st, fr->sizest);
            assign(Tnbst, st, fr->sizest);
            assign(bdryst, st, fr->sizest);
        }
        for(i = 0; i < N_EQN; i++) eint[i] = 0.0;
        if(nbtri != NULL)
            nbcent = fg_centroid(nbtri);
        for(i = 0; i < 3; i++)
        {
            p[i] = Point_of_tri(tri)[i];
            pcrds[i] = Coords(p[i]);
        }
        length = fg_length_side(tri)[side];

        /////////////////
        // use saved flux
        /////////////////
        if(fg_sf_flag(tri)[side] == YES)
        {
            for(k = 0; k < Gauss_N; k++)
            {
                for(i = 0; i < dim; i++)
                    qcrds[i] = (pcrds[(side+1)%3][i] + pcrds[side][i])/2.0 +
                           (pcrds[(side+1)%3][i] - pcrds[side][i])/2.0*q[k];
                vh_pt = vh_val(qcrds, cent, indx);

                for(i = 0; i < N_EQN; i++)
                    flux[k][i] = fg_side_flx(tri)[side][k][i];

                for(i = 0; i < N_EQN; i++)
                {
                    // LF_flux  and HLL_flux
                    tmpf[i] = flux[k][i]*vh_pt*qw[k];
                    eint[i] += tmpf[i];
                }
            }
            if(Gauss_N == 1)
            {
                // To test mid pt
                for(i = 0; i < N_EQN; i++) eint[i] *= length;
            }
            else
            {
                for(i = 0; i < N_EQN; i++) eint[i] *= length/2.0;
            }
            return;
        }

        for(i = 0; i < dim; i++)
            t[i] = fg_side_vector(tri)[side][i]; 
        nor[0] = t[1];
        nor[1] = -t[0];
        phy_brdy_side = is_phy_bdry_side(tri,side);

        /// LF_flux
        if(nbst != NULL && phy_brdy_side == NO) 
            alpha = compute_alpha_of_LF_flux(st,nbst,nor);

        for(i = 0; i < N_EQN; i++) eint[i] = 0.0;

        for(k = 0; k < Gauss_N; k++)
        {
            for(i = 0; i < dim; i++)
                qcrds[i] = (pcrds[(side+1)%3][i] + pcrds[side][i])/2.0 +
                           (pcrds[(side+1)%3][i] - pcrds[side][i])/2.0*q[k];
            vh_pt = vh_val(qcrds, cent, indx);

            con_u_at_pt(st, qcrds, cent, con_u);
            // LF_flux
            flux_at_pt(con_u,st,qcrds,cent,fluxx,fluxy);
            // LF_flux_end

            con_u_to_state(con_u, dim, Tst);
            /**
            if(N_EQN == 4 && invalid_state("edge_integr",Tst,NO))
            {
                // g_invalid_state
                float midpt[MAXD], tmpcon[4];
                int ii;
                for(ii = 0; ii < 2; ii++)
                    midpt[ii] = (pcrds[(side+1)%3][ii] + pcrds[side][ii])/2.0;
                con_u_at_pt(st, midpt, cent, con_u);

                // TMP
                // printf("tri(%d), side %d, pt(%g %g), quadrature[%d]\n", tri->id, side,
                //           qcrds[0], qcrds[1], k);
                // printf("ERROR: edge_integr, density < 0, iteration %d\n", rk_iter);
                // printf("tri(%d), side %d, pt(%g %g), quadrature[%d]\n", tri->id, side,
                //           qcrds[0], qcrds[1], k);
                // print_tri_crds(tri);
                // verbose_print_state("st at pt",Tst);
                // verbose_print_state("st of tri",st);
                // con_u_to_state(tmpcon, dim, Tst);
                // verbose_print_state("st at edge center",Tst);
                // clean_up(ERROR);
                // END TMP

                con_u_to_state(con_u, dim, Tst);
                //  LF_flux
                flux_at_pt(con_u,st,qcrds,cent,fluxx,fluxy);
                // LF_flux_end
            }
            **/

            // TMP, debugging
            if(debug_flag == YES && k == 1 && debug == YES)
            {
                con_u_at_pt(st, qcrds, cent, con_u);
                con_u_to_state(con_u, dim, Tst);
                /** LF_flux
                con_u_at_pt(st, qcrds, cent, con_u);
                flux_at_pt(con_u,st,qcrds,cent,fluxx,fluxy);
                **/
            }

            ///////////////////////////////
            ///// Flux from neighbr
            //////////////////////////////
            if(nbtri != NULL && phy_brdy_side == NO)
            {
                if(fg_sf_flag(tri)[side] == NO)
                {
                    con_u_at_pt(nbst, qcrds, nbcent, nbcon_u);
                    // LF_flux
                    flux_at_pt(nbcon_u,nbst,qcrds,nbcent,nbfluxx,nbfluxy);
                    // LF_flux_end

                    con_u_to_state(nbcon_u, dim, Tnbst);
                    /**
                    if(N_EQN == 4 && invalid_state("edge_integr_nbst",Tnbst,NO))
                    {
                        float midpt[MAXD];
                        int ii;
                        for(ii = 0; ii < 2; ii++)
                            midpt[ii] = (pcrds[(side+1)%3][ii] + pcrds[side][ii])/2.0;
                        con_u_at_pt(nbst, midpt, nbcent, nbcon_u);
                        // TMP
                        // if(debug_flag == YES)
                        //     printf("ERROR: edge_integr, NBtri density < 0\n");
                        // printf("ERROR: edge_integr, NBtri density < 0 at pt[%g %g], side[%d], "
                        //        "qudrature %d\n", qcrds[0], qcrds[1], side, k);
                        // print_tri_crds(nbtri);
                        // print_tri_crds(tri);
                        // verbose_print_state("edge pt state", Tnbst);
                        // verbose_print_state("NBtri cent state", nbst);
                        // con_u_to_state(nbcon_u, dim, Tnbst);
                        // verbose_print_state("new edge center state", Tnbst);
                        // clean_up(ERROR);
                        // END TMP

                        con_u_to_state(nbcon_u, dim, Tnbst);
                        // LF_flux
                        flux_at_pt(nbcon_u,nbst,qcrds,nbcent,nbfluxx,nbfluxy);
                        // LF_flux_end 
                    }
                    **/
                }
            }
            else if( phy_brdy_side == YES) // Flow Boundary 
            {
                DG_boundary_state(bdryst,tri,qcrds,fr,side,nor,dt,mid_soln,
                        rk_iter);
                nbcon_u[0] = Dens(bdryst);
                nbcon_u[1] = Mom(bdryst)[0];
                nbcon_u[2] = Mom(bdryst)[1];
                nbcon_u[3] = Energy(bdryst);

                con_u_to_state(nbcon_u, dim, Tnbst);

                // LF_flux
                flux_from_st(bdryst,nbfluxx,nbfluxy);
                alpha = compute_alpha_of_LF_flux(st,bdryst,nor);
                // LF_flux End
            }
            else
            {
                printf("ERROR: edge_integr, unknow edge case\n");
                clean_up(ERROR);
            }

            //////// LF_flux
            for(i = 0; i < N_EQN; i++)
            {
                if(debugging("modifyflux"))
                {
                    if(nbtri != NULL && phy_brdy_side == NO)
                        tmp_alpha = comput_loc_alpha(i, st, con_u, nbst, nbcon_u, alpha);
                    else
                        tmp_alpha = alpha;
                }
                else
                    tmp_alpha = alpha;
                flux[k][i] = 0.5*(fluxx[i]*nor[0]+fluxy[i]*nor[1] +
                     nbfluxx[i]*nor[0]+nbfluxy[i]*nor[1] -
                     tmp_alpha*(nbcon_u[i]-con_u[i]));
            }
            // Burgers_flux(con_u, nbcon_u, nor, Tst, Tnbst, flux[k],NO); 
            /////// LF_flux End

            /**
            HLL_flux(con_u, nbcon_u, nor, Tst, Tnbst, flux[k],NO);
            if(debug_flag == YES && k == 1)
            {
                printf("tri[%d] side[%d] compute flux at Quadrature[%d]\n", tri->id, side, k);
                HLL_flux(con_u, nbcon_u, nor, Tst, Tnbst, flux[k],YES);
            }
            **/
            /////////// HLL flux END

            for(i = 0; i < N_EQN; i++)
            {
                // LF_flux  and HLL_flux
                tmpf[i] = flux[k][i]*vh_pt*qw[k]; 
                eint[i] += tmpf[i];
            }
        }

        if(Gauss_N == 1)
        {
            // To test mid pt 
            for(i = 0; i < N_EQN; i++) eint[i] *= length;
        }
        else
        {
            for(i = 0; i < N_EQN; i++) eint[i] *= length/2.0;
        }

        ////////////////// 
        // Save Computed Flux for the neighbr
        // and reset flags
        if(nbtri != NULL && indx == 0 && phy_brdy_side == NO)
        {
            int nbside;
            if(fg_sf_flag(tri)[side] == NO)
            {
                for(nbside = 0; nbside < 3; nbside++)
                {
                    if(Tri_on_side(nbtri,nbside) == tri)
                        break;
                }

                fg_sf_flag(tri)[side] = YES;
                fg_sf_flag(nbtri)[nbside] = YES;
                for(i = 0; i < N_EQN; i++)
                {
                    for(k = 0; k < Gauss_N; k++)
                    {
                        fg_side_flx(nbtri)[nbside][Gauss_N-1-k][i] = -flux[k][i];
                        fg_side_flx(tri)[side][k][i] = flux[k][i];
                    }
                }
            }
        }
        else if(indx == 0 && phy_brdy_side == YES)
        {
            // On the phyiscal boundary 
            if(fg_sf_flag(tri)[side] == NO)
            {
                fg_sf_flag(tri)[side] = YES;
                for(i = 0; i < N_EQN; i++)
                {
                    for(k = 0; k < Gauss_N; k++)
                        fg_side_flx(tri)[side][k][i] = flux[k][i];
                }
            }
        } 
}


LOCAL void grad_vh(
        float *crds,
        double *cent,
        int   indx,
        float *val)
{
        val[0] = val[1] = 0.0;
        switch(indx)
        {
        case 0:
        break;
        case 1:
            val[0] = 1.0; 
        break;
        case 2:
            val[1] = 1.0;
        break;
        case 3:
            val[0] = 2.0*(crds[0]-cent[0]);
        break; 
        case 4:
            val[0] = crds[1] - cent[1];
            val[1] = crds[0] - cent[0];
        break;
        case 5:
            val[1] = 2.0*(crds[1]-cent[1]);
        break;
        case 6:
            val[0] = 3.0*sqr(crds[0]-cent[0]);
        break;
        case 7:
            val[0] = 2.0*(crds[0]-cent[0])*(crds[1]-cent[1]);
            val[1] = sqr(crds[0]-cent[0]);
        break;
        case 8:
            val[0] = sqr(crds[1]-cent[1]);
            val[1] = 2.0*(crds[0]-cent[0])*(crds[1]-cent[1]);
        break;
        case 9:
            val[1] = 3.0*sqr(crds[1]-cent[1]);
        break;
        default:
            printf("ERROR grad_vh, implement 2D degree %d\n", indx);
            clean_up(ERROR);       
        }
}

EXPORT void con_u_at_pt(
         Locstate st,
         float    *crds,
         double  *cent,
         float    *con_u)
{
         int      i;
         float    val;

         for(i = 0; i < 4; i++)
             con_u[i] = 0.0;
         
         for(i = 0; i < MAX_N_COEF; i++)
         {
             val = vh_val(crds,cent,i);
             con_u[0] += dg_Dens(st)[i]*val;
             con_u[1] += dg_Mom(st)[0][i]*val;
             con_u[2] += dg_Mom(st)[1][i]*val;
             con_u[3] += dg_Energy(st)[i]*val;
         }
}

// For gamma law gas
LOCAL void flux_at_pt(
         float    *con_u,
         Locstate st,
         float    *crds,
         double    *cent,
         float    *fluxx,
         float    *fluxy)
{
         float    den, u, v, P;
         float    Gam; // = gamma-1.0
 
         // Burger's
         if(N_EQN == 1)
         {
             fluxx[0] = 0.5*sqr(con_u[0]); // Burgers
             fluxy[0] = 0.5*sqr(con_u[0]);
             // fluxx[0] = (con_u[0]); // linear adv
             // fluxy[0] = (con_u[0]); 
             return;
         }

         den = con_u[0];
         u = con_u[1]/den;
         v = con_u[2]/den;
         Gam = gruneisen_gamma(st);
         P = (Gam)*(con_u[3]-0.5*den*(u*u+v*v));

         // TMP
         // if(debug_flag == YES) printf("print pressure P = %g, Gam = %g\n", P, Gam);

         fluxx[0] = con_u[1];
         fluxx[1] = con_u[1]*u + P;
         fluxx[2] = con_u[2]*u;
         fluxx[3] = u*(con_u[3]+P);

         fluxy[0] = con_u[2];
         fluxy[1] = con_u[1]*v;
         fluxy[2] = con_u[2]*v + P;
         fluxy[3] = v*(con_u[3]+P);
}

// For gamma law gas
LOCAL void flux_from_st(
         Locstate st,
         float    *fluxx,
         float    *fluxy)
{
         float    den, u, v, P;
         float    Gam; // = gamma-1.0
         float    con_u[4];

         if(N_EQN == 1)
         {
             fluxx[0] = 0.5*sqr(Dens(st)); // Burgers
             fluxy[0] = 0.5*sqr(Dens(st));
             // fluxx[0] = (Dens(st)); // linear adv
             // fluxy[0] = (Dens(st));
             return;
         }

         den = Dens(st);
         u = Mom(st)[0]/den;
         v = Mom(st)[1]/den;
         // Gam = gruneisen_gamma(st);
         P = pressure(st);

         // TMP
         // if(debug_flag == YES) printf("print pressure P = %g, Gam = %g\n", P, Gam);

         fluxx[0] = Mom(st)[0];
         fluxx[1] = Mom(st)[0]*u + P;
         fluxx[2] = Mom(st)[1]*u;
         fluxx[3] = u*(Energy(st)+P);

         fluxy[0] = Mom(st)[1];
         fluxy[1] = Mom(st)[0]*v;
         fluxy[2] = Mom(st)[1]*v + P;
         fluxy[3] = v*(Energy(st)+P);
}

LOCAL void con_u_to_state(
	float     *conu,
        int       dim,
        Locstate  Tst)
{
        int i;
        Dens(Tst) = conu[0];
        for(i = 0; i < dim; i++)
            Mom(Tst)[i] = conu[1+i];
        Energy(Tst) = conu[i+1];
	
}

/* Riemann soln of 1d Burgers eqn. */
LOCAL void Burgers_flux(
        float     *conul,
        float     *conur,
        float     *nor,
        Locstate  stl,
        Locstate  str,
        float     *flux,
        int       debug)
{
        float     u_star, s;  
	if(conul[0] >= 0.0 && conur[0] >= 0.0)
            u_star = conul[0];
        else if(conul[0] <= 0.0 && conur[0] <= 0.0) 
            u_star = conur[0];
        else if(conul[0] >= 0.0 && conur[0] <= 0.0)
        {
            s = 0.5*(conul[0] + conur[0]);
            if(s > 0.0)
                u_star = conul[0];  
            else
                u_star = conur[0];
        }
        else // transonic rarefaction
        {
            u_star = 0.0;
        }

        flux[0] = 0.5*sqr(u_star)*nor[0] + 0.5*sqr(u_star)*nor[1];
}

// H = (E+p)/\rho.
//c^2 = (gamma-1)*(H-1/2(u^2+v^2));
LOCAL void HLL_flux(
	float     *conul,
        float     *conur,
        float     *nor,
        Locstate  stl,
        Locstate  str,
        float     *flux,
        int       debug)
{
	int       dim = 2, i;
        float     ql, qr, qR, ul[2], ur[2], Hl, Hr, Pl, Pr;
        float     Roe[4]; // Roe average, \rho, u, v, H
        float     Gam; // = gamma-1.0
        float     Sl, Sr, Sm, cm, cl, cr;
        float     lam_l[2], lam_r[2], lam_min_R, lam_max_R;
        float     Pm, Oml, Omr, conum[4];

        Gam = gruneisen_gamma(stl);

        for(i = 0; i < dim; i++)
        {
            ul[i] = conul[i+1]/conul[0];
            ur[i] = conur[i+1]/conur[0];
        }
        ql = ul[0]*nor[0] + ul[1]*nor[1]; 
        qr = ur[0]*nor[0] + ur[1]*nor[1]; 

        Pl = (Gam)*(conul[3]-0.5*conul[0]*(ul[0]*ul[0]+ul[1]*ul[1]));
        Pr = (Gam)*(conur[3]-0.5*conur[0]*(ur[0]*ur[0]+ur[1]*ur[1]));

        Hl = (conul[3] + Pl)/conul[0];
        Hr = (conur[3] + Pr)/conur[0];

        Roe[0] = sqrt(conul[0]*conur[0]);
        Roe[1] = (sqrt(conul[0])*ul[0] + sqrt(conur[0])*ur[0])/(sqrt(conul[0]) + sqrt(conur[0]));
        Roe[2] = (sqrt(conul[0])*ul[1] + sqrt(conur[0])*ur[1])/(sqrt(conul[0]) + sqrt(conur[0]));
        Roe[3] = (sqrt(conul[0])*Hl + sqrt(conur[0])*Hr)/(sqrt(conul[0]) + sqrt(conur[0]));

        cl = sqrt( Gam*(Hl- 0.5*(sqr(ul[0]) + sqr(ul[1]))) );
        cr = sqrt( Gam*(Hr- 0.5*(sqr(ur[0]) + sqr(ur[1]))) );
        cm = sqrt(Gam*(Roe[3] - 0.5*(sqr(Roe[1]) + sqr(Roe[2]))));
 
        lam_l[0] = min( min((ql-cl), ql), (ql+cl) );
        lam_l[1] = max( max((ql-cl), ql), (ql+cl) );
        lam_r[0] = min( min((qr-cr), qr), (qr+cr) );
        lam_r[1] = max( max((qr-cr), qr), (qr+cr) );

        qR = Roe[1]*nor[0] + Roe[2]*nor[1];
        lam_min_R =  min( min((qR-cm), qR), (qR+cm) );
        lam_max_R =  max( max((qR-cm), qR), (qR+cm) );

        // Sl = min(min(lam_l[0], lam_min_R), (lam_r[0]));
        Sl = min(lam_l[0], lam_min_R);
        Sr = max(lam_r[1], lam_max_R);
        Sm = (conur[0]*qr*(Sr-qr) - conul[0]*ql*(Sl-ql) + Pl -Pr )/(conur[0]*(Sr-qr) - conul[0]*(Sl-ql) );

        if(isnan(Sm))
        {
            printf("Sm ERROR: qR %g, cm %g\n", qR, cm);
            printf("Sm ERROR: ql %g, cl %g\n", qr, cr);
            printf("Sm ERROR: qr %g, cr %g\n", qr, cr);
            verbose_print_state("Stl", stl);
            verbose_print_state("Str", str);
            clean_up(ERROR);
        }
          
        if(debug == YES)
        {
            printf("HLL_flux, Sl, Sr, Sm = %11.10g, %11.10g, %11.10g, nor_ul_ur (%11.10g, %11.10g)\n",
                   Sl, Sr, Sm, ql, qr);
            // printf("Roe avg[%12.11g, %12.11g, %12.11g, %12.11g]\n",
            //          Roe[0], Roe[1], Roe[2], Roe[3]);
            // printf("Roe lambda %11.10g %11.10g, qm = %11.10g cm = %12.11g\n", lam_min_R, lam_max_R, qR, cm);
            printf("UL: lambda %11.10g %11.10g, ql = %11.10g cl = %11.10g\n", 
                           lam_l[0], lam_l[1], ql, cl);
            printf("UR: lambda %11.10g %11.10g, qr = %11.10g cr = %11.10g\n",           
                        lam_r[0], lam_r[1], qr, cr); 
            printf("\n");
        }
       
        if(Sl > 0.0)
        {
            flux[0] = conul[0]*ql;
            flux[1] = conul[1]*ql + Pl*nor[0];
            flux[2] = conul[2]*ql + Pl*nor[1];
            flux[3] = (conul[3] + Pl)*ql; 
        }
        else if (Sl <= 0.0 && Sm > 0.0) 
        {
            Pm = conul[0]*(ql-Sl)*(ql-Sm) + Pl;

            /**
            if(fabs(Pm - (conur[0]*(qr-Sr)*(qr-Sm) + Pr)) > 0.000000001)
            {
                printf("ERROR: HLL_flux, Pm not equal\n");
                clean_up(ERROR);
            }
            **/

            Oml = 1.0/(Sl-Sm);
            conum[0] = Oml*(conul[0]*(Sl-ql)); 
            conum[1] = Oml*((Sl-ql)*(conul[1]) + (Pm-Pl)*nor[0]); 
            conum[2] = Oml*((Sl-ql)*(conul[2]) + (Pm-Pl)*nor[1]); 
            conum[3] = Oml*((Sl-ql)*(conul[3]) - Pl*ql + Pm*Sm); 
            flux[0] = conum[0]*Sm;
            flux[1] = conum[1]*Sm + Pm*nor[0];
            flux[2] = conum[2]*Sm + Pm*nor[1];
            flux[3] = (conum[3] + Pm)*Sm;

            /**
            if(debug == YES)
            {
                printf("conum[1] = %12.11g, conum[2] = %12.11g\n", conum[1], conum[2]);
                printf("conum[1]*Sm = %12.11g, Pm = %12.11g, Pm*nor[0] = %12.11g, E = %12.11g, newdf = %12.11g\n",
                           conum[1]*Sm, Pm, Pm*nor[0], conum[3],
                            conum[1]*nor[0] + conum[2]*nor[1]);
            }
            **/
        } 
        else if (Sm <= 0.0 && Sr >= 0.0)
        {
            Pm = conul[0]*(ql-Sl)*(ql-Sm) + Pl;
 
            /**
            if(fabs(Pm - (conur[0]*(qr-Sr)*(qr-Sm) + Pr)) > 0.000000001)
            {
                printf("ERROR: HLL_flux, Pm not equal 2\n");
                clean_up(ERROR);
            }
            **/  

            Omr = 1.0/(Sr-Sm);
            conum[0] = Omr*(conur[0]*(Sr-qr));
            conum[1] = Omr*((Sr-qr)*(conur[1]) + (Pm-Pr)*nor[0]);
            conum[2] = Omr*((Sr-qr)*(conur[2]) + (Pm-Pr)*nor[1]);
            conum[3] = Omr*((Sr-qr)*(conur[3]) - Pr*qr + Pm*Sm);
            flux[0] = conum[0]*Sm;
            flux[1] = conum[1]*Sm + Pm*nor[0];
            flux[2] = conum[2]*Sm + Pm*nor[1];
            flux[3] = (conum[3] + Pm)*Sm;

            /**
            if(debug == YES)
            {
                printf("conum[1] = %12.11g, conum[2] = %12.11g\n", conum[1], conum[2]);
                printf("conum[1]*Sm = %12.11g, Pm = %12.11g, Pm*nor[0] = %12.11g, E = %12.11g, newdf = %12.11g\n",
                           conum[1]*Sm, Pm, Pm*nor[0], conum[3],
                  conum[1]*nor[0] + conum[2]*nor[1]);
            }
            **/
        }
        else if(Sr < 0.0)
        {
            flux[0] = conur[0]*qr;
            flux[1] = conur[1]*qr + Pr*nor[0];
            flux[2] = conur[2]*qr + Pr*nor[1];
            flux[3] = (conur[3] + Pr)*qr;
        } 
        else
        {
            printf("ERROR: HLL_flux, flux not computed, Sl %22.20g, Sr %22.20g, Sm %22.20g\n",
                   Sl, Sr, Sm);
            clean_up(ERROR);
        }
}

/**
7 point quadrature which has precision 4.
Johnson, pp340
**/
LOCAL float inter_integr(
        TRI      *tri,
        Locstate st, 
        int      indx,
        float    *ans, 
        int      rk_iter)
{
        int     i, j, dim = 2;
        double   *cent = fg_centroid(tri);
        float   area = fg_area(tri);
        POINT   *p[3];
        float   *pcrds[3], dbcent[2];
        float   emid0[3], emid1[3], emid2[3], *pemid[3];
        float   g_vh[3], fluxx[4], fluxy[4];
        float   con_u[4];
        static  Locstate Tst = NULL;

        if(Tst == NULL)
        {
            (*Params(st)->_alloc_state)(&Tst,Params(st)->sizest);
            assign(Tst, st, Params(st)->sizest);
        }

        for(i = 0; i < 3; i++)
        {
            p[i] = Point_of_tri(tri)[i];
            pcrds[i] = Coords(p[i]);
        }

        for(i = 0; i < 2; i++)
        {
            emid0[i] = 0.5*(pcrds[0][i] + pcrds[1][i]); 
            emid1[i] = 0.5*(pcrds[1][i] + pcrds[2][i]); 
            emid2[i] = 0.5*(pcrds[2][i] + pcrds[0][i]); 
        }
        pemid[0] = emid0;
        pemid[1] = emid1;
        pemid[2] = emid2;

        for(j = 0; j < 4; j++)
            ans[j] = 0.0;
        // vertices
        for(i = 0; i < 3; i++)
        {
            con_u_at_pt(st, pcrds[i], cent, con_u);
            // NEW
            con_u_to_state(con_u, dim, Tst);
            // assign(Tst, st, Params(st)->sizest);
            // Dens(Tst) = con_u[0];
            // Mom(Tst)[0] = con_u[1];
            // Mom(Tst)[1] = con_u[2];
            // Energy(Tst) = con_u[3];
            /**
            if(invalid_state("inter_integr",Tst,NO))
            {
                // printf("ERROR: inter_integr, density < 0 at iteration %d\n",rk_iter);
                // printf("tri(%d), pt(%d)(%g %g)\n", tri->id, 
                //              i, pcrds[i][0], pcrds[i][1]);
                // print_general_vector("Tri_pt", Coords(Point_of_tri(tri)[0]), dim, "\n");
                // print_general_vector("Tri_pt", Coords(Point_of_tri(tri)[1]), dim, "\n");
                // print_general_vector("Tri_pt", Coords(Point_of_tri(tri)[2]), dim, "\n");
                // verbose_print_state("st at pt",Tst);
                // verbose_print_state("st of tri",st);
                // clean_up(ERROR);

                con_u[0] = Dens(st);
                con_u[1] = Mom(st)[0];
                con_u[2] = Mom(st)[2];
                con_u[3] = Energy(st);
            }
            **/
            // END NEW
            flux_at_pt(con_u,st,pcrds[i],cent,fluxx,fluxy);
            grad_vh(pcrds[i], cent, indx,g_vh);
            for(j = 0; j < 4; j++)
                ans[j] += 0.05*(fluxx[j]*g_vh[0] + fluxy[j]*g_vh[1]);
        }

        // TMP
        /*
        if(debug_flag == YES && indx == 1)
        {
            printf("Indx(%d) On vertice intgr {%g, %g, %g, %g},"
             " Mom0 flux (%g %g), Eng flux (%g %g), g_vh[%g %g]\n", 
                   indx, ans[0], ans[1], ans[2], ans[3], fluxx[1], fluxy[1], 
                   fluxx[3], fluxy[3], g_vh[0], g_vh[1]);
        }
        */

        // edge mid points
        for(i = 0; i < 3; i++)
        {
            con_u_at_pt(st, pemid[i], cent, con_u);
            // NEW
            con_u_to_state(con_u, dim, Tst);
            // Dens(Tst) = con_u[0];
            // Mom(Tst)[0] = con_u[1];
            // Mom(Tst)[1] = con_u[2];
            // Energy(Tst) = con_u[3];
            /**
            if(invalid_state("inter_integr",Tst,NO))
            {
                // printf("ERROR: inter_integr, density < 0\n");
                // printf("tri(%d), ptmid(%d)(%g %g)\n", tri->id,
                //              i, pemid[i][0], pemid[i][1]);
                // print_general_vector("Tri_pt", Coords(Point_of_tri(tri)[0]), dim, "\n");
                // print_general_vector("Tri_pt", Coords(Point_of_tri(tri)[1]), dim, "\n");
                // print_general_vector("Tri_pt", Coords(Point_of_tri(tri)[2]), dim, "\n");
                // verbose_print_state("st at pt",Tst);
                // verbose_print_state("st of tri",st);
                // clean_up(ERROR);

                con_u[0] = Dens(st);
                con_u[1] = Mom(st)[0];
                con_u[2] = Mom(st)[2];
                con_u[3] = Energy(st);
            }
            **/
            // END NEW
            flux_at_pt(con_u,st,pemid[i],cent,fluxx,fluxy);
            grad_vh(pemid[i], cent, indx,g_vh);
            for(j = 0; j < 4; j++)
                ans[j] += 2.0/15.0*(fluxx[j]*g_vh[0] + fluxy[j]*g_vh[1]);
        }

        // TMP
        /**
        if(debug_flag == YES && indx == 1)
        {
            printf("Indx(%d) On edge intgr {%g, %g, %g, %g} Mom0 flux (%g %g)\n", 
                  indx, ans[0], ans[1], ans[2], ans[3], fluxx[1], fluxy[1]);
        }
        **/

        for(i = 0; i < 2; i++) dbcent[i] = cent[i]; 

        // tri centriod 
        con_u_at_pt(st, dbcent, cent, con_u);
        con_u_to_state(con_u, dim, Tst);
        // Dens(Tst) = con_u[0];
        // Mom(Tst)[0] = con_u[1];
        // Mom(Tst)[1] = con_u[2];
        // Energy(Tst) = con_u[3];
        /**
        if(invalid_state("inter_integr",Tst,NO))
        {
            // printf("ERROR: inter_integr, density < 0\n");
            // printf("tri(%d), dbcent(%g %g)\n", tri->id,
            //                  dbcent[0], dbcent[1]);
            // print_general_vector("Tri_pt", Coords(Point_of_tri(tri)[0]), dim, "\n");
            // print_general_vector("Tri_pt", Coords(Point_of_tri(tri)[1]), dim, "\n");
            // print_general_vector("Tri_pt", Coords(Point_of_tri(tri)[2]), dim, "\n");
            // verbose_print_state("st at pt",Tst);
            // verbose_print_state("st of tri",st);
            // clean_up(ERROR);

            con_u[0] = Dens(st);
            con_u[1] = Mom(st)[0];
            con_u[2] = Mom(st)[2];
            con_u[3] = Energy(st);
        }
        **/ 
        flux_at_pt(con_u,st,dbcent,cent,fluxx,fluxy);
                
        grad_vh(dbcent, cent, indx,g_vh);
        for(j = 0; j < 4; j++)
            ans[j] += 9.0/20.0*(fluxx[j]*g_vh[0] + fluxy[j]*g_vh[1]);

        // TMP
        /**
        if(debug_flag == YES && indx == 1)
        {
            printf("Indx(%d) On cent intgr {%g, %g, %g, %g} Mom0 flux (%g %g)\n",
                  indx, ans[0], ans[1], ans[2], ans[3], fluxx[1], fluxy[1]);
        }
        **/ 

        for(j = 0; j < 4; j++)
            ans[j] *= area;
}

/**
7 point quadrature which has precision 4.
Johnson, pp340
**/
LOCAL float inter_integr_13_quad(
        TRI      *tri,
        Locstate st, 
        int      indx,
        float    *ans, 
        int      rk_iter)
{
        int     i, j, dim = 2;
        double   *cent = fg_centroid(tri);
        float   area = fg_area(tri);
        POINT   *p[3];
        float   *pcrds[3], dbcent[2];
        float   g_vh[3], fluxx[4], fluxy[4];
        float   con_u[4];
        static  Locstate Tst = NULL;
        float   crds[13][2], tmpans[N_EQN][13];
        static float w1 =-0.149570044467670, w2 = 0.053347235608839,
                     w3 = 0.175615257433204, w4 = 0.077113760890257;

        if(Tst == NULL)
            (*Params(st)->_alloc_state)(&Tst,Params(st)->sizest);

        for(i = 0; i < 3; i++)
        {
            p[i] = Point_of_tri(tri)[i];
            pcrds[i] = Coords(p[i]);
        }

        tri_quadrature_13_pts(pcrds[0], pcrds[1], pcrds[2], crds);

        for(j = 0; j < N_EQN; j++)
            ans[j] = 0.0;
        for(i = 0; i < 13; i++)
        {
            con_u_at_pt(st, crds[i], cent, con_u);
            // assign(Tst, st, sizest);
            // Dens(Tst) = con_u[0];
            // Mom(Tst)[0] = con_u[1];
            // Mom(Tst)[1] = con_u[2];
            // Energy(Tst) = con_u[3];
            flux_at_pt(con_u,st,crds[i],cent,fluxx,fluxy);
            grad_vh(crds[i], cent, indx,g_vh);

            for(j = 0; j < N_EQN; j++)
                tmpans[j][i] = (fluxx[j]*g_vh[0] + fluxy[j]*g_vh[1]);
        } 
      
        for(j = 0; j < N_EQN; j++)
        {
            ans[j] =  w1*tmpans[j][0] +
                      w2*(tmpans[j][1] + tmpans[j][2] + tmpans[j][3]) +
                      w3*(tmpans[j][4] + tmpans[j][5] + tmpans[j][6]) +
                      w4*(tmpans[j][7] + tmpans[j][8] + tmpans[j][9] +
                          tmpans[j][10] + tmpans[j][11] + tmpans[j][12]);
        }

        for(j = 0; j < N_EQN; j++)
            ans[j] *= area;
}

/**
Only use tri centroid
**/
LOCAL float inter_integr_center(
        TRI      *tri,
        Locstate st, 
        int      indx,
        float    *ans, 
        int      rk_iter)
{
        int     i, j, dim = 2;
        double   *cent = fg_centroid(tri);
        float   area = fg_area(tri);
        POINT   *p[3];
        float   *pcrds[3], dbcent[2];
        float   emid0[3], emid1[3], emid2[3], *pemid[3];
        float   g_vh[3], fluxx[4], fluxy[4];
        float   con_u[4];
        static  Locstate Tst = NULL;

        if(Tst == NULL)
            (*Params(st)->_alloc_state)(&Tst,Params(st)->sizest);

        for(i = 0; i < 3; i++)
        {
            p[i] = Point_of_tri(tri)[i];
            pcrds[i] = Coords(p[i]);
        }

        for(i = 0; i < 2; i++)
        {
            emid0[i] = 0.5*(pcrds[0][i] + pcrds[1][i]); 
            emid1[i] = 0.5*(pcrds[1][i] + pcrds[2][i]); 
            emid2[i] = 0.5*(pcrds[2][i] + pcrds[0][i]); 
        }
        pemid[0] = emid0;
        pemid[1] = emid1;
        pemid[2] = emid2;

        for(j = 0; j < 4; j++)
            ans[j] = 0.0;

        for(i = 0; i < 2; i++) dbcent[i] = cent[i]; 

        // tri centriod 
        con_u_at_pt(st, dbcent, cent, con_u);
        Dens(Tst) = con_u[0];
        Mom(Tst)[0] = con_u[1];
        Mom(Tst)[1] = con_u[2];
        Energy(Tst) = con_u[3];

        flux_at_pt(con_u,st,dbcent,cent,fluxx,fluxy);
                
        grad_vh(dbcent, cent, indx,g_vh);
        for(j = 0; j < 4; j++)
            ans[j] += (fluxx[j]*g_vh[0] + fluxy[j]*g_vh[1]);

        // TMP
        /**
        if(debug_flag == YES && indx == 1)
        {
            printf("Indx(%d) On cent intgr {%g, %g, %g, %g} Mom0 flux (%g %g)\n",
                  indx, ans[0], ans[1], ans[2], ans[3], fluxx[1], fluxy[1]);
        }
        **/ 

        for(j = 0; j < 4; j++)
            ans[j] *= area;
}

/*
*
* \int{f(X)\cdot n(X) v(X) ds} = |e|/|x2-x1| \int^{x2}_{x1} {f(X)\cdot n(X) v(X) dx}
* Use three pt quadrature.
* \int^{x2}_{x1} {f(X)\cdot n(X) v(X) dx} = |x2-x1|/2 \int^{1}_{-1} {f(Y)\cdot n(Y) v(Y) dt} 
* Overall:
* \int{f(X)\cdot n(X) v(X) ds} = |e|/2 \int^{1}_{-1} {f(Y)\cdot n(Y) v(Y) dt}
* where Y = (P1+P2)/2 + (P2-P1)/2 t, t = -\sqrt{3/5}, 0, \sqrt{3/5}.
*
*/
/**
LOCAL float bdry_integr(
        TRI      *tri,
        Locstate st,
        int      indx,
        float    *ans)
{
        int     i, j, k, dim = 2;
        double   *cent = fg_centroid(tri);
        float   area = fg_area(tri);
        POINT   *p[3];
        float   *pcrds[3], qcrds[3], q[3], w[3];
        float   fluxx[4], fluxy[4];
        float   nor[3], t[3], length;
        float   con_u[4];

        for(i = 0; i < 3; i++)
        {
            p[i] = Point_of_tri(tri)[i];
            pcrds[i] = Coords(p[i]);
        }
      
        q[0] = -sqrt(0.6); q[1] = 0.0; q[2] = sqrt(0.6);
        w[0] = 5.0/9.0; w[1] = 8.0/9.0; w[2] = 5.0/9.0;

        // on 3 edges
        for(j = 0; j < 3; j++)
        { 
            for(i = 0; i < dim; i++)
                t[i] = pcrds[(j+1)%3][i] - pcrds[j][i];    
            length = mag_vector(t,dim);
            for (i = 0; i < dim; ++i)
                t[i] /= length;
            nor[0] = t[1];
            nor[1] = -t[0];
            // at 3 quadrature points
            for(k = 0; k < 3; k++)
            {
                for(i = 0; i < dim; i++)
                    qcrds[i] = (pcrds[(j+1)%3][i] + pcrds[j][i])/2.0 + 
                            (pcrds[(j+1)%3][i] - pcrds[j][i])/2.0*q[k]; 
                vh_val(qcrds, cent, indx);
                // NEW
                con_u_at_pt(st, qcrds, cent, con_u);
                // END NEW
                flux_at_pt(con_u,st,qcrds,cent,fluxx,fluxy);
            }
        }
}
*/


/* 
1.  By The RKDGM for CLs V, JCP, 141, 1998.
    PP 213
    CFL >= (vel+c)perimeter(K)\delta t/|K|
2.  Use distance from centroid to edge
*/
EXPORT float time_step_on_tri(
        Front         *front,
	TRI           *tri)
{
	Locstate      st;
        float         CFL, min_len = HUGE_VAL, peri = 0.0, max_len = -HUGE_VAL;
        int           i;
        float         velx, vely, vel, max_speed;
        float         area, mindist = HUGE_VAL, tmp;
        float         *crds[3];
        double   *cent = fg_centroid(tri);
        float         diam = fg_diam(tri);

        CFL = Time_step_factor(front);
        st = (Locstate)tri->st;
        area = fg_area(tri);

        if(N_EQN == 1)
        {
            max_speed = fabs(Dens(st));
            /**
            for(i = 0; i < 3; i++)
            {
                peri += fg_length_side(tri)[i];
            }
            **/
            if(debugging("P3_acc"))
            {
                /**
                printf("old time %g, new time %g, 2are/peri = %g, dx^4/3 = %g, (dx^4/3)_2 = %g\n",
                     CFL*area/(max_speed*peri),  CFL/max_speed*pow((2.0*area/peri),(4.0/3.0)),
                            2.0*(area/peri), pow((2.0*area/peri),(4.0/3.0)),
                            pow((CFL/max_speed*2.0*area/peri),(4.0/3.0)));
                return pow((CFL/max_speed*2.0*area/peri),(5.0/3.0));
                **/
                // return min(CFL/max_speed*pow((2.0*area/peri),(4.0/3.0)), 
                //            CFL*2.0*area/(max_speed*peri));
                return min(CFL/max_speed*pow((diam),(4.0/3.0)), 
                           CFL*diam/max_speed);
            }
            else
                // return CFL*area/(max_speed*peri);
                return CFL*diam/max_speed;
        }

        velx = Mom(st)[0]/Dens(st);
        vely = Mom(st)[1]/Dens(st);
        vel = sqrt(sqr(velx) + sqr(vely));
        max_speed = vel+sound_speed(st);

        // return CFL*area/(max_speed*peri);
        if(debugging("P3_acc"))
        {
            return min(CFL/max_speed*pow((diam),(4.0/3.0)),
                           CFL*diam/max_speed);
        }
        return CFL*diam/(max_speed);
        // return CFL*min_len/max_speed;
}

LOCAL float maxmod(float a, float b)
{
    if(a > 0.0 && b > 0.0)
        return max(a,b);
    else if(a < 0.0 && b < 0.0)
        return min(a,b);
    else return 0.0;
}

LOCAL float TVB_minmod(float a, float b, float Ma)
{
   if(fabs(a) < Ma)
       return a; 

   if(a>=0.0 && b>=0.0)
       return min(a,b);
   else if(a<=0.0 && b<=0.0)
       return max(a,b);
   else return 0.0;
}

EXPORT float minmod(float a, float b)
{
   int k = 0;

   if(k == 0) // Minmod
   {
       if(a>=0.0 && b>=0.0)
           return min(a,b);
       else if(a<=0.0 && b<=0.0)
           return max(a,b);
       else return 0.0;
   }
   if(k == 1) // ENO
   {
       if(a*a<=b*b)
           return a;
       else
           return b; 
   }
}

LOCAL float minmod2(float a, float b)
{
   if(a*a<=b*b)
       return a;
   else 
       return b;
}


LOCAL int find_tri(
	double *cent)
{
        // float match[2] = {0.05, 0.983333333333};
        // float match[2] = {0.95, 0.01666666666666};
        float match[2] = {0.481284, 0.477131};

        {
            // match[0] = 0.47676; match[1] = 0.326866;
            // match[0] = 0.517463; match[1] = 0.290467;
            match[0] = 0.509992; match[1] = 0.235087;
            // match[0] = 0.45765; match[1] = 0.209688;
        }

        if(fabs(cent[0]-match[0]) < 0.00001 &&
           fabs(cent[1]-match[1]) < 0.00001)
            return YES;
        return NO;
}

LOCAL int find_tri_from_ver(
	float *pt1,
        float *pt2,
        float *pt3)
{
        // float mpt1[2] = {0.05, 0.45}; 
        // float mpt2[2] = {0.0, 0.5}; 
        // float mpt3[2] = {0.1, 0.5}; 
        float mpt1[2] = {0.25, 0}; 
        float mpt2[2] = {0.25, -0.25}; 
        float mpt3[2] = {0.5, 0}; 
        int   m1 = NO, m2 = NO, m3 = NO;

        if((fabs(pt1[0]-mpt1[0]) < 0.00001 &&
            fabs(pt1[1]-mpt1[1]) < 0.00001) ||
           (fabs(pt1[0]-mpt2[0]) < 0.00001 &&
            fabs(pt1[1]-mpt2[1]) < 0.00001) ||
           (fabs(pt1[0]-mpt3[0]) < 0.00001 &&
            fabs(pt1[1]-mpt3[1]) < 0.00001)
          ) 
            m1 = YES;

        if((fabs(pt2[0]-mpt1[0]) < 0.00001 &&
            fabs(pt2[1]-mpt1[1]) < 0.00001) ||
           (fabs(pt2[0]-mpt2[0]) < 0.00001 &&
            fabs(pt2[1]-mpt2[1]) < 0.00001) ||
           (fabs(pt2[0]-mpt3[0]) < 0.00001 &&
            fabs(pt2[1]-mpt3[1]) < 0.00001)
          )
            m2 = YES;

        if((fabs(pt3[0]-mpt1[0]) < 0.00001 &&
            fabs(pt3[1]-mpt1[1]) < 0.00001) ||
           (fabs(pt3[0]-mpt2[0]) < 0.00001 &&
            fabs(pt3[1]-mpt2[1]) < 0.00001) ||
           (fabs(pt3[0]-mpt3[0]) < 0.00001 &&
            fabs(pt3[1]-mpt3[1]) < 0.00001)
          )
            m3 = YES;

        if(m1 == YES && m2 == YES && m3 == YES)
            return YES;
        return NO;

}

/**
LOCAL void print_matrix(
            float **mat,
            int   row,
            int   col)
{
            int i, indx;
            printf("Print matrix entry\n");
            for(i = 0; i < row; i++)
            {
                printf("ROW[%d]: ",i);
                for(indx = 0; indx < MAX_N_COEF; indx++)
                {
                    printf("%g, ", mat[i][indx]);
                }
                printf("\n");
            }
}
**/

LOCAL void print_state_coef(Locstate state)
{
         int i, j, dim = 2;  
            
         printf("Density coeff: ");
         for(i = 0; i < MAX_N_COEF; i++)
             printf("%g ", dg_Dens(state)[i]);
         printf("\n");
         for(j = 0; j < dim; j++)
         {
             printf("Mom%d coeff: ", j);
             for(i = 0; i < MAX_N_COEF; i++)
                 printf("%g ", dg_Mom(state)[j][i]);
             printf("\n");
         }
         printf("Energy coeff: ");
         for(i = 0; i < MAX_N_COEF; i++)
             printf("%g ", dg_Energy(state)[i]);
         printf("\n");
}

LOCAL void print_mass_matrix_1st_row(
	double **Lmass_matrix)
{
         int indx;
         printf("THe 1st row of mass matrix\n");
         for(indx = 0; indx < MAX_N_COEF; indx++)
         {
             printf("%g, ", Lmass_matrix[0][indx]);
         }
         printf("\n");
}

LOCAL float compute_alpha_of_LF_flux(
	Locstate  st,
        Locstate  nbst,
        float     *nor)
{
        float     velx, vely, vel, c;
        float     nbvelx, nbvely, nbvel, nbc;
        float     alpha = 0.0;

        if(N_EQN == 1)
        {
            alpha = max(fabs(Dens(st)), fabs(Dens(nbst)));
            return alpha;
            // return 1.0; // linear adv
        }

        velx = Mom(st)[0]/Dens(st);
        vely = Mom(st)[1]/Dens(st);
        vel = velx*nor[0] + vely*nor[1];
        c = sound_speed(st);

        nbvelx = Mom(nbst)[0]/Dens(nbst);
        nbvely = Mom(nbst)[1]/Dens(nbst);
        nbvel = nbvelx*nor[0] + nbvely*nor[1];
        nbc = sound_speed(nbst);
        
        /**
        alpha = max(alpha, fabs(velx*nor[0] + vely*nor[1] + c));
        alpha = max(alpha, fabs(velx*nor[0] + vely*nor[1] - c));
        alpha = max(alpha, fabs(velx*nor[0] + vely*nor[1]));

        alpha = max(alpha, fabs(nbvelx*nor[0] + nbvely*nor[1] + nbc));
        alpha = max(alpha, fabs(nbvelx*nor[0] + nbvely*nor[1] - nbc));
        alpha = max(alpha, fabs(nbvelx*nor[0] + nbvely*nor[1]));
        **/ 
        alpha = max((fabs(vel)+c), (fabs(nbvel)+nbc));

        return alpha;
}


LOCAL void g_exact_R_flux(
        float   *nor,
        Locstate   sti,	
        Locstate   sto,
	float   *flux)
{
        static Locstate   Tist = NULL, Tost, ansi, Tsl, Tsr;
        int          i, dim = 2;
        float        vtani[MAXD], vtano[MAXD];     /* velocities */
        float        ui = 0.0, uo = 0.0;
        float        pjump = 0.0;
        float        pl, pr, pM, E;    /* pressures */
        RIEMANN_SOLVER_WAVE_TYPE  l_wave,r_wave;
        float        ri,ro;          /* densities */
        float        ux, uy;
        float        en_den;
        float        spdans, vans;
        int          l_or_r;
        float        pml, pmr, uml, umr, mr, ml; /* midstate quantities */

        if(Tist == NULL)
        {
            (*Params(sti)->_alloc_state)(&Tist,Params(sti)->sizest);
            (*Params(sti)->_alloc_state)(&Tost,Params(sti)->sizest);
            (*Params(sti)->_alloc_state)(&ansi,Params(sti)->sizest);

            (*Params(sti)->_alloc_state)(&Tsl,Params(sti)->sizest);
            (*Params(sti)->_alloc_state)(&Tsr,Params(sti)->sizest);
            // g_alloc_state(&Tost,sizest);
            // g_alloc_state(&Tist,sizest);
            // g_alloc_state(&ansi,sizest);
        }

        // set_state_for_find_mid_state(Tist, sti);
        set_state(Tist, EGAS_STATE, sti);
        // set_state_for_find_mid_state(Tost, sto);
        set_state(Tost, EGAS_STATE, sto);

        for ( i = 0; i < dim; ++i)
        {
            ui += nor[i] * Vel(Tist)[i];
            uo += nor[i] * Vel(Tost)[i];
        }
        for ( i = 0; i < dim; ++i)
        {
            vtani[i] = Vel(Tist)[i] - nor[i] * ui;
            vtano[i] = Vel(Tost)[i]  - nor[i] * uo;
        }

        Vel(Tist)[0] = ui;     Vel(Tost)[0] = uo;
        for ( i = 1; i < dim; ++i)
            Vel(Tist)[i] = Vel(Tost)[i] = 0.0;

        if(debug_flag == YES)
        {
            // add_to_debug("riem_sol");
        }

        l_or_r = onedrsoln(0.0,Tist,Tost,ansi,&spdans,EGAS_STATE);

        vans = Vel(ansi)[0];
        switch(l_or_r)
        {
        case LEFT_FAMILY:
            for (i = 0; i < dim; ++i)
                Vel(ansi)[i] = vans*nor[i] + vtani[i];
            break;

        case RIGHT_FAMILY:
            for (i = 0; i < dim; ++i)
                Vel(ansi)[i] = vans*nor[i] + vtano[i];
            break;
        }

        // riemann_solution(0.0,nor,Tist,Tost,ansi,EGAS_STATE); 
 
        // TMP
        set_state_for_find_mid_state(Tsl,Tist);
        set_state_for_find_mid_state(Tsr,Tost);

        if (find_mid_state(Tsl,Tsr,0.0,&pml,&pmr,&uml,&umr,
                               &ml,&mr,&l_wave,&r_wave) != FUNCTION_SUCCEEDED)
        {
            screen("ERROR in g_exact_R_flux(), find_mid_state() failed\n");
            clean_up(ERROR);
        }

        if(debugging("g_exact_R_flux"))
        {
            // remove_from_debug("riem_sol");
            printf("In g_exact_R_flux,input state:");
            g_verbose_print_state(Tist);
            g_verbose_print_state(Tost);
            printf("In g_exact_R_flux, spdand %g ansi, family = %d, vans = %g:", spdans, l_or_r, vans);
            g_verbose_print_state(ansi);
            printf("P l-r(%g, %g), U l-r(%g, %g), l_wave %d, r_wave %d\n", 
                   pml, pmr, uml, umr, l_wave, r_wave);
        }

        pM = pressure(ansi);
        E = Dens(ansi)*(0.5*(sqr(Vel(ansi)[0])+sqr(Vel(ansi)[1]))+Energy(ansi));

        flux[0] = Dens(ansi)*Vel(ansi)[0]*nor[0] + Dens(ansi)*Vel(ansi)[1]*nor[1];
        flux[1] = (Dens(ansi)*sqr(Vel(ansi)[0]) + pM)*nor[0] + Dens(ansi)*Vel(ansi)[0]*Vel(ansi)[1]*nor[1];
        flux[2] = Dens(ansi)*Vel(ansi)[0]*Vel(ansi)[1]*nor[0] + (Dens(ansi)*sqr(Vel(ansi)[1]) + pM)*nor[1];
        flux[3] = (pM+E)*Vel(ansi)[0]*nor[0] + (pM+E)*Vel(ansi)[1]*nor[1];

}

// L[0] : (u-c)(dp-\rho *c* du)
// L[1] : u family, tangential vel
// L[2] : (u)(dp-c*c*d \rho)
// L[3] : (u+c)(dp + \rho *c *du)
// u is the normal vel. dp etc is \frac{\partial p}{\partial t} actually.
LOCAL void DG_boundary_state(
	Locstate    bdryst,
        TRI         *tri,
        float       *qcrds,
        Front       *fr,
        int         side,
        float       *nor,
        float       dt,
        Mid_soln    *mid_soln,
        int         rk_iter)
{
        double      *cent = fg_centroid(tri);
        float       con_u[4], c, vn = 0.0, P, rho, tempnor[MAXD], temptan[MAXD], vt = 0.0;
        float       inc, invn = 0.0, inP, inrho, invt = 0.0;
        Locstate    trist = tri->st;
        static Locstate    st = NULL, inst;
        int         dim = 2, i;
        float       L[4] = {0, 0, 0, 0}, inqcrds[MAXD], inrad;
        float       dp, dvn, drho, vtan[MAXD], dvt;
        float       dirx[2] = {1.0, 0.0}, ans;
        TRI         *loc_tris[50];
        int         loc_N;
        // int         debug = NO;
        /*
        if(tri->id == 5)
        {
            printf("tri[%d]Enter DG_boundary_state, iteration %d\n", tri->id, rk_iter);
            printf("side[%d], normal[%g %g], pt_crds[%g %g], side type[%d]\n",
                  side, nor[0], nor[1], qcrds[0], qcrds[1], fg_e_type(tri)[side]);
            debug = YES;
        }
        */
        if(debugging("g_sine"))
        {
            if(rk_iter == 0)
                impose_gas_sine_states(bdryst,tri,qcrds,
                   fr,side,nor,dt,mid_soln,rk_iter,(fr->time));
            else if(rk_iter == 1)
                impose_gas_sine_states(bdryst,tri,qcrds,
                   fr,side,nor,dt,mid_soln,rk_iter,(fr->time+dt));
            else
                impose_gas_sine_states(bdryst,tri,qcrds,
                   fr,side,nor,dt,mid_soln,rk_iter,(fr->time+0.5*dt));
            return;
        }

        // if(tri->BC_type == NEUMANN)
        if(fg_e_type(tri)[side] == NEUMANN)
        {
            return DG_NEU_boundary_state(bdryst,tri,qcrds,
                   fr,side,nor,dt,mid_soln,rk_iter);
        }

        // This is for db_Mach reflection problem
        // The function does not mean exactly as it is named.
        // if(tri->BC_type == CONST_P)
        if(fg_e_type(tri)[side] == CONST_P)
        {
            return DG_CONST_P_boundary_state(bdryst,tri,qcrds,
                   fr,side,nor,dt,mid_soln,rk_iter);
        }

        if(st == NULL)
        {
            g_alloc_state(&st, fr->sizest);
            g_alloc_state(&inst, fr->sizest);
        }
        /**
        if(debugging("twod_riemann"))
        {
            // use boundary state directly
            if(rk_iter != 0)
                trist = mid_soln[tri->id].st[rk_iter];
            assign(bdryst, trist, fr->sizest);
            con_u_at_pt(trist, qcrds, cent, con_u);
            con_u_to_state(con_u,dim,bdryst);
            return;
        }
        **/

        // inrad = 2.0*fg_area(tri)/
        //       (fg_length_side(tri)[0] + fg_length_side(tri)[1] + fg_length_side(tri)[2]);
        inrad = fg_diam(tri); // actually it's the diamter

        con_u_at_pt(trist, qcrds, cent, con_u);

        assign(st, trist, fr->sizest);
        Dens(st) = con_u[0];
        Mom(st)[0] = con_u[1];
        Mom(st)[1] = con_u[2];
        Energy(st) = con_u[3];
        set_type_of_state(st,GAS_STATE);
        rho = Dens(st);
        P = pressure(st);
        c = sound_speed(st);

        assign(bdryst, st, fr->sizest);
        // NEW
        if(RK_STEP == 3)
        {
            if(rk_iter == 0)
                return;
            else if(rk_iter == 2)
                dt = 0.5*dt;
        }
        else if(RK_STEP == 2)
        {
            if(rk_iter == 0)
                return;
        }
        // END NEW

        // TMP
        /*
        if(debug == YES)
        {
            printf("state at boundary\n");
            verbose_print_state("bdryst", bdryst);
            printf("the crsping tri state\n");
            verbose_print_state("trist", trist);
        }
        */

        // Convert to x-y coord.
        ans = fabs(nor[0]*dirx[0] + nor[1]*dirx[1]);
        if(ans > 0.5 && nor[0] > 0.5)
        {
             // right side
            for(i = 0; i < dim; i++) tempnor[i] = nor[i];
            for(i = 0; i < dim; i++)
                inqcrds[i] = qcrds[i] - inrad*tempnor[i];
            temptan[0] = 0.0; temptan[1] = 1.0;
        }
        else if(ans > 0.5 && nor[0] < -0.5)
        {
            //  left side
            tempnor[0] = -nor[0];
            tempnor[1] = -nor[1];
            for(i = 0; i < dim; i++)
                inqcrds[i] = qcrds[i] + inrad*tempnor[i];
            temptan[0] = 0.0; temptan[1] = 1.0;
        }
        else if(ans < 0.5 && nor[1] > 0.5)
        {
            // top
            for(i = 0; i < dim; i++) tempnor[i] = nor[i];
            for(i = 0; i < dim; i++)
                inqcrds[i] = qcrds[i] - inrad*tempnor[i];
            temptan[0] = 1.0; temptan[1] = 0.0;
        }
        else
        {
            // bottom
            tempnor[0] = -nor[0];
            tempnor[1] = -nor[1];
            for(i = 0; i < dim; i++)
                inqcrds[i] = qcrds[i] + inrad*tempnor[i];
            temptan[0] = 1.0; temptan[1] = 0.0;
        }

        for(i = 0; i < dim; i++)
        {
            vn += tempnor[i]*vel(i,st);
            vt += temptan[i]*vel(i,st);
        }
        for ( i = 0; i < dim; ++i)
            vtan[i] = vel(i,st) - tempnor[i]*vn;

        // install local tris
        loc_N = install_local_tris(tri, loc_tris);

        // if(NO == solution_from_tri(inst,fr,inqcrds))
        if(NO == solution_from_loc_tris(inst,loc_tris, loc_N, fr->sizest, inqcrds))
        {
            printf("ERROR: in DG_boundary_state()\n");
            printf("interpolation failed for inflow boundary on pt(%g %g), inpt(%g %g)\n",
              qcrds[0], qcrds[1], inqcrds[0], inqcrds[1]);
            printf("radius = %g, cent[%g %g]\n", inrad, cent[0], cent[1]);
            printf("print tri[%d] side[%d] boundary:\n", tri->id, side);
            print_general_vector("Tri_pt", Coords(Point_of_tri(tri)[0]), dim, "\n");
            print_general_vector("Tri_pt", Coords(Point_of_tri(tri)[1]), dim, "\n");
            print_general_vector("Tri_pt", Coords(Point_of_tri(tri)[2]), dim, "\n\n");
            printf("fg_e_type(tri) %d %d %d, nbs %d %d %d; id[%d %d %d], type[%d %d %d]\n",
               fg_e_type(tri)[0], fg_e_type(tri)[1], fg_e_type(tri)[2],
                 Tri_on_side(tri,0), Tri_on_side(tri,1), Tri_on_side(tri,2),
                 Tri_on_side(tri,0)->id, Tri_on_side(tri,1)->id, Tri_on_side(tri,2)->id,
                Tri_on_side(tri,0)->BC_type, Tri_on_side(tri,1)->BC_type, Tri_on_side(tri,2)->BC_type);

            /**
            printf("print local tris %d\n", loc_N);
            for(i = 0; i < loc_N; i++)
            {
                printf("cent[%g %g], id = %d\n", fg_centroid(loc_tris[i])[0], 
                          fg_centroid(loc_tris[i])[1], loc_tris[i]->id);
                print_general_vector("Tri_pt", Coords(Point_of_tri(loc_tris[i])[0]), dim, "\n");
                print_general_vector("Tri_pt", Coords(Point_of_tri(loc_tris[i])[1]), dim, "\n");
                print_general_vector("Tri_pt", Coords(Point_of_tri(loc_tris[i])[2]), dim, "\n\n");
            }
            **/
            clean_up(ERROR);
        }
        inrho = Dens(inst);
        inP = pressure(inst);
        inc = sound_speed(inst);
        for(i = 0; i < dim; i++)
        {
            invn += tempnor[i]*vel(i,inst);
            invt += temptan[i]*vel(i,inst);
        }

        // TMP
        /*
        if(debug == YES)
        {
            printf("IN DG_boundary_state(), find state on boundary\n");
            printf("inqrts[%g %g], side = %d\n", inqcrds[0], inqcrds[1], side);
            printf("vn = %g, invn = %g, sound_spd = %g\n", vn ,invn, c);
            printf("TRI BC type %d, incircle rad %g\n", tri->BC_type, inrad);
            // verbose_print_state("state at from boundary -- pt", st);
            verbose_print_state("state at inner pt", inst);
        }
        */

        set_state(st,TGAS_STATE,st);
        set_state(bdryst,TGAS_STATE,bdryst);

        switch(fg_e_type(tri)[side])
        {
        case IN_FLOW:  // left & bottom side
            if(fabs(vn) <= c) // subsonic flow
            {
                if(vn >= 0.0)
                {
                    // v+c, v incoming
                    // v-c, outgoing
                    // L[1] = L[2] = L[3] = 0.0;
                    L[0] = (vn-c)*1.0/inrad*(inP-P-rho*c*(invn-vn));
                }
                else
                {
                    // v-c, v, v outgoing
                    // v+c incoming
                    L[0] = (vn-c)*1.0/inrad*(inP-P-rho*c*(invn-vn));
                    L[1] = vn*1.0/inrad*(invt-vt);
                    // L[2] = vn*1.0/inrad*(inP-P - sqr(c)*(inrho-rho));
                    L[2] = -vn*1.0/inrad*(inP-P - sqr(c)*(inrho-rho));
                }
            }
            else // supsonic flow
            {
                if(vn >= 0.0)
                {
                    // supsonic inflow, takes boundary value
                    NULL;
                }
                else
                {
                    // supsonic outflow
                    // v-c, v, v+c outgoing
                    L[0] = (vn-c)*1.0/inrad*(inP-P-rho*c*(invn-vn));
                    L[1] = vn*1.0/inrad*(invt-vt);
                    // L[2] = vn*1.0/inrad*(inP-P - sqr(c)*(inrho-rho));
                    L[2] = -vn*1.0/inrad*(inP-P - sqr(c)*(inrho-rho));
                    L[3] = (vn+c)*1.0/inrad*(inP-P+rho*c*(invn-vn));
                }
            }
        break;
        case OUT_FLOW:
            if(fabs(vn) <= c)  // subsonic flow
            {
                if(vn >= 0.0)
                {
                    // v-c incoming, v, v+c outgoing
                    L[1] = vn*1.0/inrad*(vt-invt);
                    // L[2] = vn*1.0/inrad*(P-inP -sqr(c)*(rho-inrho));
                    L[2] = -vn*1.0/inrad*(P-inP -sqr(c)*(rho-inrho));
                }
                L[3] = (vn+c)*1.0/inrad*(P-inP + rho*c*(vn-invn));
            }
            else // supsonic flow
            {
                if(vn > 0.0)
                {
                    // v-c, v, v+c outgoing
                    L[0] = (vn-c)*1.0/inrad*(P-inP - rho*c*(vn-invn));
                    L[1] = vn*1.0/inrad*(vt-invt);
                    // L[2] = vn*1.0/inrad*(P-inP -sqr(c)*(rho-inrho));
                    L[2] = -vn*1.0/inrad*(P-inP -sqr(c)*(rho-inrho));
                    L[3] = (vn+c)*1.0/inrad*(P-inP + rho*c*(vn-invn));
                }
                else
                {
                    // v-c, v, v+c incoming
                    NULL;
                }
            }
        break;
        case NEUMANN:
        default:
            printf("ERROR: in DG_boundary_state(), unknown BC, side[%d] of tri(%d)\n",
                side, tri->id);
            printf("tri neigh[%d %d %d]\n", Tri_on_side(tri,0), Tri_on_side(tri,1),
                   Tri_on_side(tri,2));
            print_tri(tri, tri->surf->interface);
            clean_up(ERROR);
        }

        dp = -0.5*(L[3]+L[0]);
        drho = 1.0/(sqr(c))*(dp - L[2]);
        dvn = -0.5/(rho*c)*(L[3]-L[0]);
        dvt = -L[1];
        Dens(bdryst) = Dens(st) + drho*dt;
        Press(bdryst) = Press(st) + dp*dt;
        vn = vn + dvn*dt;
        vt = vt + dvt*dt;
        for (i = 0; i < dim; ++i)
            Vel(bdryst)[i] = tempnor[i]*vn + temptan[i]*vt;
        // for (i = 0; i < dim; ++i)
        //     Vel(bdryst)[i] = vtan[i] + tempnor[i]*vn;

        set_state(bdryst,GAS_STATE,bdryst);

        if(invalid_state("edge_integr",bdryst,NO))
        {
            printf("ERROR: DG_boundary_state()\n");
            printf("Invalid boundary state\n");
            clean_up(ERROR);
        }

        /*
        if(debug == YES)
        {
            printf("DG_boundary_state(), final boundary state\n");
            verbose_print_state("bdryst",bdryst);
            clean_up(0);
        }
        */
}

LOCAL void DG_CONST_P_boundary_state(
        Locstate    bdryst,
        TRI         *tri,
        float       *qcrds,
        Front       *fr,
        int         side,
        float       *nor,
        float       dt,
        Mid_soln    *mid_soln,
        int         rk_iter)
{
        double   *cent = fg_centroid(tri);
        float       con_u[4], c, vn = 0.0, P, rho, tempnor[MAXD];
        float       inc, invn = 0.0, inP, inrho;
        Locstate    trist = tri->st;
        static Locstate    st = NULL, inst;
        int         dim = 2, i;
        float       L[4] = {0, 0, 0, 0}, inqcrds[MAXD], inrad;
        float       dp, dvn, drho, vtan[MAXD];
        float       dirx[2] = {1.0, 0.0}, ans;
        int         is_xdir = NO, is_ydir = NO;
        POINT       *p[3];     

        if(st == NULL)
        {
            g_alloc_state(&st, fr->sizest);
            g_alloc_state(&inst, fr->sizest);
        }

        if(rk_iter != 0)
            trist = mid_soln[tri->id].st[rk_iter];

        ans = fabs(nor[0]*dirx[0] + nor[0]*dirx[0]);  
       
        assign(bdryst, trist, fr->sizest);
        set_type_of_state(bdryst,TGAS_STATE);

        if(ans > 0.5 && nor[0] > 0.5)
        {
        // right side, post shock condition    
            Dens(bdryst) = post_sk_st[0];
            Vel(bdryst)[0] = post_sk_st[1]*cos(radians(60.0));
            Vel(bdryst)[1] = post_sk_st[1]*sin(radians(60.0));
            Press(bdryst) = post_sk_st[3];
        }
        else if(ans > 0.5 && nor[0] < -0.5)
        {
        //left side, Mach 10 shock
            // if(cent[1] < sk_y_pos)
            if(qcrds[1] < sk_y_pos)
            {
            // post shock condition
                Dens(bdryst) = post_sk_st[0];
                Vel(bdryst)[0] = post_sk_st[1]*cos(radians(60.0));
                Vel(bdryst)[1] = post_sk_st[1]*sin(radians(60.0));
                Press(bdryst) = post_sk_st[3];
            }
            else
            {
            // pre shock condition
                Dens(bdryst) = pre_sk_st[0];
                Vel(bdryst)[0] = pre_sk_st[1];
                Vel(bdryst)[1] = pre_sk_st[2];
                Press(bdryst) = pre_sk_st[3];
            }
        }
        else
        {
        // bottom side, post shock condition
            Dens(bdryst) = post_sk_st[0];
            Vel(bdryst)[0] = post_sk_st[1]*cos(radians(60.0));
            Vel(bdryst)[1] = post_sk_st[1]*sin(radians(60.0));
            Press(bdryst) = post_sk_st[3];
        } 

        set_state(bdryst,GAS_STATE,bdryst);

}

LOCAL void DG_NEU_boundary_state(
        Locstate    bdryst,
        TRI         *tri,
        float       *qcrds,
        Front       *fr,
        int         side,
        float       *nor,
        float       dt,
        Mid_soln    *mid_soln,
        int         rk_iter)
{
        double   *cent = fg_centroid(tri);
        // float       con_u[4], c, vn = 0.0, P, rho, tempnor[MAXD];
        float       con_u[4], vn = 0.0, tempnor[MAXD];
        float       inc, invn = 0.0, inP, inrho;
        Locstate    trist = tri->st;
        static Locstate    st = NULL, inst;
        int         dim = 2, i;
        float       L[4] = {0, 0, 0, 0}, inqcrds[MAXD], inrad;
        float       dp, dvn, drho, vtan[MAXD];

        if(st == NULL)
        {
            g_alloc_state(&st, fr->sizest);
            g_alloc_state(&inst, fr->sizest);
        }
   
        if(rk_iter != 0)
            trist = mid_soln[tri->id].st[rk_iter];

        con_u_at_pt(trist, qcrds, cent, con_u);

        assign(st, trist, fr->sizest);
        Dens(st) = con_u[0];
        Mom(st)[0] = con_u[1];
        Mom(st)[1] = con_u[2];
        Energy(st) = con_u[3];
        set_type_of_state(st,GAS_STATE);
        // rho = Dens(st);
        // P = pressure(st);
        // c = sound_speed(st);

use_cell_center: 
        assign(bdryst, st, fr->sizest);

        set_state(bdryst,TGAS_STATE,bdryst);
        for(i = 0; i < dim; i++) 
            tempnor[i] = nor[i];

        vn = 0.0;
        for(i = 0; i < dim; i++)
            vn += tempnor[i]*Vel(bdryst)[i];
        for ( i = 0; i < dim; ++i)
            vtan[i] = Vel(bdryst)[i] - tempnor[i]*vn;

        vn *= -1.0;
        for (i = 0; i < dim; ++i)
            Vel(bdryst)[i] = vtan[i] + tempnor[i]*vn;
        set_state(bdryst,GAS_STATE,bdryst);

        if(invalid_state("edge_integr",bdryst,NO))
        {
            printf("ERROR: DG_NEU_boundary_state()\n");
            printf("Invalid boundary state\n");
            // clean_up(ERROR);
            assign(st, trist, fr->sizest);
            goto use_cell_center;
        }
        // TMP
        /**
        if(qcrds[1] < 1.0/4.0)
        {
            printf("ENtered DG_NEU_boundary_state()\n");
            print_general_vector("Normal_dir", tempnor, dim, "\n");
            verbose_print_state("state from interior", st);
            verbose_print_state("state from boundary", bdryst);
            // clean_up(0);
        }
        **/
}

LOCAL int solution_from_tri(
	Locstate st,
        Front    *fr,
        float    *crds)
{
        SURFACE  **surf;
        TRI      *tri;
        float    con_u[4];

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf); !at_end_of_tri_list(tri,*surf);
                tri = tri->next)
            {
                if(pt_in_tri(crds, tri) == YES)
                {
                    con_u_at_pt(tri->st, crds, fg_centroid(tri), con_u);
                    assign(st, tri->st, fr->sizest);
                    Dens(st) = con_u[0];
                    Mom(st)[0] = con_u[1];
                    Mom(st)[1] = con_u[2];
                    Energy(st) = con_u[3];
                    // TMP
                    // printf("find on tri\n"); 
                    // print_tri(tri, tri->surf->interface);  
                    return YES;
                }
            }
        }        
        return NO;
}

LOCAL int solution_from_loc_tris(
        Locstate st,
        TRI      **tri,
        int      n,
        size_t   sizest,
        float    *crds)
{
        float    con_u[4];
        int      i;

        for(i = 0; i < n; i++)
        {
            if(pt_in_tri(crds, tri[i]) == YES)
            {
                con_u_at_pt(tri[i]->st, crds, fg_centroid(tri[i]), con_u);
                assign(st, tri[i]->st, sizest);
                Dens(st) = con_u[0];
                Mom(st)[0] = con_u[1];
                Mom(st)[1] = con_u[2];
                Energy(st) = con_u[3];
                return YES;
            }
        }
        return NO;
}

EXPORT void zero_moments(
        Locstate state)
{
        int i;
        dg_Dens(state)[0] = Dens(state);
        dg_Energy(state)[0] = Energy(state);
        dg_Mom(state)[0][0] = Mom(state)[0];
        dg_Mom(state)[1][0] = Mom(state)[1];

        for(i = 1; i < MAX_N_COEF; i++)
        {
            dg_Dens(state)[i] = 0.0;
            dg_Energy(state)[i] = 0.0;
            dg_Mom(state)[0][i] = 0.0;
            dg_Mom(state)[1][i] = 0.0;
        }
}

// sample density
LOCAL void sample_soln_along_line(
        const char *dname,
        int        step,
        Front      *fr)
{
        int        numpts = 101, i;
        float      dx, crds[MAXD], coord;
        float      *fsts;
        RECT_GRID  *gr = fr->rect_grid;
        static Locstate st = NULL;
        char       outname[256], outdir[256];
        static char   *fname = NULL, *ppfname = NULL;
        const char    *nstep;
        FILE          *fp;
        static size_t fname_len = 0, ppfname_len = 0;

        TRI        *tri;
        SURFACE    **surf = fr->mesh->surfaces;

        if(st == NULL)
            g_alloc_state(&st, fr->sizest);

        if(debugging("Shu_Osher"))
            numpts = gr->gmax[1]+1;

        vector(&fsts, numpts, sizeof(float));

        if(debugging("v_evo"))
        {
            dx = (gr->U[0]-gr->L[0])/(numpts-1);
            crds[1] = (gr->U[1]-gr->L[1])/2;
        }
        else
        {
            dx = (gr->U[1]-gr->L[1])/(numpts-1);
            crds[0] = (gr->U[0]-gr->L[0])/2;
        }

        // To avoid get onto domain boundaries, start with 1, end with numpts-1
        for(i = 0; i < numpts; i++)
        {
            if(debugging("v_evo"))
            {
                if(i == 0)
                    crds[0] = gr->L[0] + dx*0.00000001;
                else if(i == numpts-1)
                    crds[0] = gr->U[0] - dx*0.00000001;
                else
                    crds[0] = gr->L[0] + dx*i;
            }
            else
            {
                if(i == 0)
                    crds[1] = gr->L[1] + dx*0.000001;
                else if(i == numpts-1)
                    crds[1] = gr->U[1] - dx*0.000001;
                else 
                    crds[1] = gr->L[1] + dx*i;
            }
            if(NO  == solution_from_tri(st, fr, crds))
            {
                printf("ERROR: solution from tri, failed on %g %g\n",
                      crds[0], crds[1]);
                clean_up(ERROR);
            }
            fsts[i] = Dens(st);
        }
        // fsts[0] = fsts[1];
        // fsts[numpts-1] =  fsts[numpts-2];

        sprintf(outdir,"%s/%s",dname,"gnuplot_data");
        ppfname = set_ppfname(ppfname,"dens",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname,"%s.ts%s",ppfname,nstep);

        if (create_directory(dname,YES) == FUNCTION_FAILED)
        {
            (void) printf("WARNING in sample_soln_along_line(), directory "
                          "%s doesn't exist and can't be created\n",dname);
            return;
        }
        if (create_directory(outdir,YES) == FUNCTION_FAILED)
        {
            (void) printf("WARNING in sample_soln_along_line(), directory "
                         "%s doesn't exist and can't be created\n",outdir);
            return;
        }

        fname = get_list_file_name(fname,outdir,outname,&fname_len);
        if ((fp = fopen(fname,"w")) == NULL)
        {
            (void) printf("WARNING in sample_soln_along_line(), "
                           "can't open %s\n",fname);
            return;
        }

        for(i = 0; i < numpts; i++)
        {
            if(debugging("v_evo"))
                coord = gr->L[0] + dx*i;
            else
                coord = gr->L[1] + dx*i;
            (void) fprintf(fp,"%g %g\n", coord, fsts[i]);
        }

        free(fsts);
        fclose(fp);
}


LOCAL float compute_glb_alpha_of_LF(
        Locstate  st)
{
        float     velx, vely, c, vel;
        float     alpha = 0.0;

        velx = Mom(st)[0]/Dens(st);
        vely = Mom(st)[1]/Dens(st);
        vel = sqrt(sqr(velx) + sqr(vely));
        c = sound_speed(st);

        alpha = max(alpha, fabs(vel + c));
        alpha = max(alpha, fabs(vel - c));
        alpha = max(alpha, fabs(vel));

        return alpha;
}


LOCAL void compute_L1(
	Front   *fr)
{
        SURFACE      **surf;
        TRI          *tri, *on_tri, *in_tri;
        RECT_GRID    *gr = fr->rect_grid;
        float        *L = gr->L, *U = gr->U;
        float        crds[MAXD], coords[MAXD];
        double  *cent;
        float        total_mass = 0.0, mass_peak, dens, mass_low;
        Locstate     st;
        float        ans = 0.0, exact = 0.0, exact_total = 0.0;
        int          i, k;
        POINT   *p[3];
        float   *pcrds[3], dbcent[MAXD], nor[MAXD];
        float   emid0[3], emid1[3], emid2[3], *pemid[3], con_u[4];
        float   V = 1.0;

        mass_peak = -HUGE_VAL;
        mass_low = HUGE_VAL;
        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf); !at_end_of_tri_list(tri,*surf);
                tri = tri->next)
            {
                cent = fg_centroid(tri);
                if((L[0] < cent[0] && L[1] < cent[1] &&
                    U[1] > cent[1] && U[0] > cent[0])
                  )
                {
                    // TMP
                    /**
                    if(tri->id == 54)
                    {
                        debug_flag = YES; 
                        printf("\n---------compute_L1, see tri\n");
                    }
                    **/ 

                    ans = exact = 0.0;
                    for(i = 0; i < 3; i++)
                    {
                        p[i] = Point_of_tri(tri)[i];
                        pcrds[i] = Coords(p[i]);
                    }

                    for(i = 0; i < 2; i++)
                    {
                        emid0[i] = 0.5*(pcrds[0][i] + pcrds[1][i]);
                        emid1[i] = 0.5*(pcrds[1][i] + pcrds[2][i]);
                        emid2[i] = 0.5*(pcrds[2][i] + pcrds[0][i]);
                    }
                    pemid[0] = emid0;
                    pemid[1] = emid1;
                    pemid[2] = emid2;

                    st = tri->st;
                    crds[0] = cent[0];
                    crds[1] = cent[1];

                    // vertices
                    for(i = 0; i < 3; i++)
                    {
                        con_u_at_pt(tri->st, pcrds[i], cent, con_u);
                        // dens = vortex_evo_den_soln(pcrds[i],fr->time);
                        coords[0] = pcrds[i][0] - V*fr->time;
                        coords[1] = pcrds[i][1] - V*fr->time;
                        if(coords[0] < L[0])
                            coords[0] += (U[0]-L[0]);
                        if(coords[1] < L[1])
                            coords[1] += (U[1]-L[1]);
                        if(coords[0] > U[0])
                            coords[0] -= (U[0]-L[0]);
                        if(coords[1] > U[1])
                            coords[1] -= (U[1]-L[1]);
                        
                        dens = vortex_evo_den_soln(coords,0.0);
                        // dens = FM_vort_soln(coords,fr);

                        if(debug_flag == YES)
                        {
                            printf("WARNING: tri(%d) vert error tri-init (%12.11g %12.11g)\n",
                              tri->id, con_u[0], dens);
                        }

                        ans += 0.05*fabs(dens-con_u[0]);
                        // ans += 0.05*fabs(con_u[0]);
                        // exact += 0.05*fabs(dens);
                    }
                    // edge mid points
                    for(i = 0; i < 3; i++)
                    {
                        con_u_at_pt(tri->st, pemid[i], cent, con_u);
                        // dens = vortex_evo_den_soln(pemid[i],fr->time);
                        coords[0] = pemid[i][0] - V*fr->time;
                        coords[1] = pemid[i][1] - V*fr->time;
                        if(coords[0] < L[0])
                            coords[0] += (U[0]-L[0]);
                        if(coords[1] < L[1])
                            coords[1] += (U[1]-L[1]);
                        if(coords[0] > U[0])
                            coords[0] -= (U[0]-L[0]);
                        if(coords[1] > U[1])
                            coords[1] -= (U[1]-L[1]);
 
                        dens = vortex_evo_den_soln(coords,0.0);
                        // dens = FM_vort_soln(coords,fr);

                        ans += 2.0/15.0*fabs(dens-con_u[0]);
                        // ans += 2.0/15.0*fabs(con_u[0]);
                        // exact += 2.0/15.0*fabs(dens);
                        
                    
                    }
                    // tri centroid
                    // if(fabs(crds[0] - 5.0) < 0.5 &&
                    //    fabs(crds[1] - 5.0) < 0.5)
                    //     debug_flag = YES;

                    con_u_at_pt(tri->st, crds, cent, con_u);
                    // dens = vortex_evo_den_soln(crds,fr->time);
                    coords[0] = crds[0] - V*fr->time;
                    coords[1] = crds[1] - V*fr->time;
                    if(coords[0] < L[0])
                        coords[0] += (U[0]-L[0]);
                    if(coords[1] < L[1])
                        coords[1] += (U[1]-L[1]);
                    if(coords[0] > U[0])
                        coords[0] -= (U[0]-L[0]);
                    if(coords[1] > U[1])
                        coords[1] -= (U[1]-L[1]);
                    dens = vortex_evo_den_soln(coords,0.0);
                    // dens = FM_vort_soln(coords,fr);

                    if(debug_flag == YES)
                    {
                        // printf("crds[%g %g] tri-init cent (%12.11g %12.11g) error\n",
                        //     crds[0], crds[1],  con_u[0], dens);
                        // clean_up(0);
                        // debug_flag = NO;
                    }

                    ans += 9.0/20.0*fabs(dens-con_u[0]);
                    // ans += 9.0/20.0*fabs(con_u[0]);
                    // exact += 9.0/20.0*fabs(dens);

                    total_mass += fabs(ans)*tri->Lmass_matrix[0][0];
                    // total_mass += fabs(ans-exact)*tri->Lmass_matrix[0][0];
                    // exact_total += fabs(ans-exact)*tri->Lmass_matrix[0][0];  

                    // L_inf use centroid state
                    if(fabs(con_u[0] - dens)  > mass_peak)
                    {
                        mass_peak = fabs(con_u[0] - dens);
                        on_tri = tri;
                    }
                    if(Dens(st) < mass_low)
                        mass_low = Dens(st);
                }
            }
        }
  
        printf("compute_L1, total_mass = %24.20g, mass_peak = %24.20g\n",
                   total_mass, mass_peak);
        printf("mass_low = %24.20g time = %g, new_total = %15.12g\n", mass_low, fr->time, exact_total);
        // print_general_vector("Tri_pt", Coords(Point_of_tri(on_tri)[0]), 2, "\n");
        // print_general_vector("Tri_pt", Coords(Point_of_tri(on_tri)[1]), 2, "\n");
        // print_general_vector("Tri_pt", Coords(Point_of_tri(on_tri)[2]), 2, "\n");
        printf("Triangle(%d) ceontriod (%18.17Lg, %18.17Lg)\n\n", on_tri->id, 
                fg_centroid(on_tri)[0], fg_centroid(on_tri)[1]);
        // printf("EXIT 0, compute_L1\n");
        // clean_up(0);

}

LOCAL float vortex_evo_den_soln(
	float           *coords,
        float           time)
{
        float           r2, r, ep = 5.0;
        float           du[2], dT, T, S, nT, nS, nP;
        float           gam = 1.4, P, dens, V[MAXD];
        float           bar[3], newdens;
        int             i;

        dens = 1.0;
        P = 1.0;
        V[0] = V[1] = 0.0;
        T = 1.0;

        S = (P/(pow(dens, gam)));
 
        bar[0] = coords[0] - (5.0+time*sqrt( sqr(V[0]) + sqr(V[1]) ));
        bar[1] = coords[1] - (5.0+time*sqrt( sqr(V[0]) + sqr(V[1]) ));

        r2 = sqr(bar[0]) + sqr(bar[1]);
        // du[0] = ep/(2.0*PI)*exp(0.5*(1.0-r2))*(-bar[1]);
        // du[1] = ep/(2.0*PI)*exp(0.5*(1.0-r2))*(bar[0]);
        dT = -1.0*(gam - 1.0)*sqr(ep)/(8.0*gam*sqr(PI))*exp(1.0-r2);

        nT = T + dT;

        return (pow(nT/S, 1.0/(gam-1.0)));
}
        
LOCAL float FM_vort_soln(
	float        *coords,
        Front        *fr)
{
        SURFACE  **surf;
        TRI      *tri;
        static Locstate st = NULL;
        float    conu[4];

        if(st == NULL)
        {
            g_alloc_state(&st, fr->sizest);
            assign(st,first_tri(*(fr->mesh->surfaces))->st,fr->sizest);
        }

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf); !at_end_of_tri_list(tri,*surf);
                tri = tri->next)
            {
                if(pt_in_tri(coords, tri) == YES)
                {
                    if(debug_flag == YES)
                    {
                        printf("FM_vort_soln, find soln on (%d)\n", tri->id);
                    }
                    // assign(st,tri->st,fr->sizest);
                    vortex_evo_init(st,tri);
                    con_u_at_pt(st, coords, fg_centroid(tri), conu);
                    return conu[0];
                }
            }
        }

        printf("ERROR: FM_vort_soln()\n");
        clean_up(ERROR);
}   

LOCAL void vortex_evo_init(
	Locstate        state,
        TRI             *tri)
{
        float           r2, r, ep = 5.0;
        float           du[2], dT, T, S, nT, nS, nP;
        float           gam = 1.4, P;
        float           bar[3];
        int             i, j, k, indx;
        POINT   *p[3];
        float   *pcrds[3], dbcent[MAXD];
        float   emid0[3], emid1[3], emid2[3], *pemid[3], conu[4];
        double   *cent = fg_centroid(tri);
        double   rhs[4][MAX_N_COEF], mulrhs[4][MAX_N_COEF], dens[7];
        float   area = fg_area(tri);
        double      **mass_inv, **Lmass_matrix;
        int     debug_flag = NO;

        // gam = adiabatic_gamma(state);

        Lmass_matrix = tri->Lmass_matrix;
        mass_inv = tri->mass_inv;

        for(i = 0; i < 4; i++)
        {
            for(j = 0; j < MAX_N_COEF; j++)
                rhs[i][j] = 0.0;
        }

        for(i = 0; i < 3; i++)
        {
            p[i] = Point_of_tri(tri)[i];
            pcrds[i] = Coords(p[i]);
        }

        for(i = 0; i < 2; i++)
        {
            emid0[i] = 0.5*(pcrds[0][i] + pcrds[1][i]);
            emid1[i] = 0.5*(pcrds[1][i] + pcrds[2][i]);
            emid2[i] = 0.5*(pcrds[2][i] + pcrds[0][i]);
        }
        pemid[0] = emid0;
        pemid[1] = emid1;
        pemid[2] = emid2;

        // vertices
        for(i = 0; i < 3; i++)
        {
            set_state(state,TGAS_STATE,state);
            Dens(state) = 1.0;
            Press(state) = 1.0;
            Vel(state)[0] = Vel(state)[1] = 1.0;
            T = 1.0;
            // S = log(Press(state)/(pow(Dens(state), gam)));
            S = (Press(state)/(pow(Dens(state), gam)));

            bar[0] = pcrds[i][0] - 5.0;
            bar[1] = pcrds[i][1] - 5.0;

            r2 = sqr(bar[0]) + sqr(bar[1]);
            du[0] = ep/(2.0*PI)*exp(0.5*(1.0-r2))*(-bar[1]);
            du[1] = ep/(2.0*PI)*exp(0.5*(1.0-r2))*(bar[0]);
            dT = -1.0*(gam - 1.0)*sqr(ep)/(8.0*gam*sqr(PI))*exp(1.0-r2);
            nT = T + dT;

            // Dens(state) = pow(exp(S)/nT, 1.0/(1.0-gam));
            dens[i] = Dens(state) = pow(nT/S, 1.0/(gam-1.0));
            Press(state) = nT*Dens(state);

            Vel(state)[0] += du[0];
            Vel(state)[1] += du[1];

            nS = Press(state)/pow(Dens(state), gam);
            set_state(state,GAS_STATE,state);
            conu[0] = Dens(state);
            conu[1] = Mom(state)[0];
            conu[2] = Mom(state)[1];
            conu[3] = Energy(state);
            for(j = 0; j < 4; j++)
            {
                for(k = 0; k < MAX_N_COEF; k++)
                    rhs[j][k] += 0.05*(conu[j]*vh_val(pcrds[i],cent,k));
            }
        }

        // edge mid points
        for(i = 0; i < 3; i++)
        {
            set_state(state,TGAS_STATE,state);
            Dens(state) = 1.0;
            Press(state) = 1.0;
            Vel(state)[0] = Vel(state)[1] = 1.0;
            T = 1.0;
            // S = log(Press(state)/(pow(Dens(state), gam)));
            S = (Press(state)/(pow(Dens(state), gam)));

            bar[0] = pemid[i][0] - 5.0;
            bar[1] = pemid[i][1] - 5.0;

            r2 = sqr(bar[0]) + sqr(bar[1]);
            du[0] = ep/(2.0*PI)*exp(0.5*(1.0-r2))*(-bar[1]);
            du[1] = ep/(2.0*PI)*exp(0.5*(1.0-r2))*(bar[0]);
            dT = -1.0*(gam - 1.0)*sqr(ep)/(8.0*gam*sqr(PI))*exp(1.0-r2);
            nT = T + dT;

            // Dens(state) = pow(exp(S)/nT, 1.0/(1.0-gam));
            dens[i+3] = Dens(state) = pow(nT/S, 1.0/(gam-1.0));
            Press(state) = nT*Dens(state);

            Vel(state)[0] += du[0];
            Vel(state)[1] += du[1];

            nS = Press(state)/pow(Dens(state), gam);
            set_state(state,GAS_STATE,state);
            conu[0] = Dens(state);
            conu[1] = Mom(state)[0];
            conu[2] = Mom(state)[1];
            conu[3] = Energy(state);
            for(j = 0; j < 4; j++)
            {
                for(k = 0; k < MAX_N_COEF; k++)
                    rhs[j][k] += 2.0/15.0*(conu[j]*vh_val(pemid[i],cent,k));
            }
        }


        // tri centriod
        for(i = 0; i < 2; i++)
            dbcent[i] = cent[i];
        set_state(state,TGAS_STATE,state);
        Dens(state) = 1.0;
        Press(state) = 1.0;
        Vel(state)[0] = Vel(state)[1] = 1.0;
        T = 1.0;
        // S = log(Press(state)/(pow(Dens(state), gam)));
        S = (Press(state)/(pow(Dens(state), gam)));

        bar[0] = dbcent[0] - 5.0;
        bar[1] = dbcent[1] - 5.0;

        r2 = sqr(bar[0]) + sqr(bar[1]);
        du[0] = ep/(2.0*PI)*exp(0.5*(1.0-r2))*(-bar[1]);
        du[1] = ep/(2.0*PI)*exp(0.5*(1.0-r2))*(bar[0]);
        dT = -1.0*(gam - 1.0)*sqr(ep)/(8.0*gam*sqr(PI))*exp(1.0-r2);
        nT = T + dT;

        // Dens(state) = pow(exp(S)/nT, 1.0/(1.0-gam));
        dens[6] = Dens(state) = pow(nT/S, 1.0/(gam-1.0));
        Press(state) = nT*Dens(state);

        Vel(state)[0] += du[0];
        Vel(state)[1] += du[1];

        nS = Press(state)/pow(Dens(state), gam);
        set_state(state,GAS_STATE,state);
        nP = pressure(state);
        conu[0] = Dens(state);
        conu[1] = Mom(state)[0];
        conu[2] = Mom(state)[1];
        conu[3] = Energy(state);

        for(j = 0; j < 4; j++)
        {
            for(k = 0; k < MAX_N_COEF; k++)
                rhs[j][k] += 9.0/20.0*(conu[j]*vh_val(dbcent,cent,k));
        }

        for(j = 0; j < 4; j++)
        {
            for(k = 0; k < MAX_N_COEF; k++)
            {
                rhs[j][k] *= area;
            }
        }

        for(i = 0; i < 4; i++)
            matrix_vec_mult(mass_inv, rhs[i], MAX_N_COEF, MAX_N_COEF, mulrhs[i]);

        for(indx = 0; indx < MAX_N_COEF; indx++)
        {
            dg_Dens(state)[indx] = mulrhs[0][indx];
            dg_Mom(state)[0][indx] = mulrhs[1][indx];
            dg_Mom(state)[1][indx] = mulrhs[2][indx];
            dg_Energy(state)[indx] = mulrhs[3][indx];
        }

        // Compute average soln
        Dens(state) = 0.0;
        Mom(state)[0] = 0.0;
        Mom(state)[1] = 0.0;
        Energy(state) = 0.0;
        for(indx = 0; indx < MAX_N_COEF; indx++)
        {
            Dens(state) += dg_Dens(state)[indx]*Lmass_matrix[0][indx];
            Mom(state)[0] += dg_Mom(state)[0][indx]*Lmass_matrix[0][indx];
            Mom(state)[1] += dg_Mom(state)[1][indx]*Lmass_matrix[0][indx];
            Energy(state) += dg_Energy(state)[indx]*Lmass_matrix[0][indx];
        }
        Dens(state) /= Lmass_matrix[0][0];
        Mom(state)[0] /= Lmass_matrix[0][0];
        Mom(state)[1] /= Lmass_matrix[0][0];
        Energy(state) /= Lmass_matrix[0][0];

}

LOCAL void  inverse_2_2_matrix(
        double      **mat,
        int        size,
        double      **inv)
{
	double det;
        det = mat[0][0]*mat[1][1] - mat[0][1]*mat[1][0];

        if(fabsl(det) < 10.0*MACH_EPS)
        {
            printf("ERROR: inverse_2_2_matrix, singular matrix\n");
            clean_up(ERROR);
        }

        inv[0][0] = 1.0/det*mat[1][1];
        inv[0][1] = -1.0/det*mat[0][1];
        inv[1][0] = -1.0/det*mat[1][0];
        inv[1][1] = 1.0/det*mat[0][0];
}

LOCAL void  d_inverse_2_2_matrix(
        double      **mat,
        int        size,
        double      **inv)
{
        double det;
        det = mat[0][0]*mat[1][1] - mat[0][1]*mat[1][0];

        if(fabsl(det) < 10.0*MACH_EPS)
        {
            printf("ERROR: inverse_2_2_matrix, singular matrix\n");
            clean_up(ERROR);
        }

        inv[0][0] = 1.0/det*mat[1][1];
        inv[0][1] = -1.0/det*mat[0][1];
        inv[1][0] = -1.0/det*mat[1][0];
        inv[1][1] = 1.0/det*mat[0][0];
}

LOCAL void      LR_matrix_in_dir(
	float     *nor,
        Locstate  st,
        float     **L,
        float     **R)
{
        float    Gam, gam; // gamma-1.0
        float    v[MAXD], a, h, p, ek, h0, vn;
        // float    e;

        Gam = gruneisen_gamma(st);
        gam = Gam + 1.0;
        v[0] = Mom(st)[0]/Dens(st);
        v[1] = Mom(st)[1]/Dens(st);

        a = sound_speed(st);
        // e = sqr(a)/(Gam*(Gam+1));
        h = sqr(a)/Gam;
        p = Dens(st)*sqr(a)/Gam; 
        ek = 0.5*(sqr(v[0]) + sqr(v[1]));
        h0 = h + ek;
        vn = v[0]*nor[0] + v[1]*nor[1];

        R[0][0] = R[0][1] = R[0][2] = 1.0; R[0][3] = 0.0;
        R[1][0] = v[0] - a*nor[0]; R[1][1] = v[0]; R[1][2] = v[0] + a*nor[0]; R[1][3] = nor[1];
        R[2][0] = v[1] - a*nor[1]; R[2][1] = v[1]; R[2][2] = v[1] + a*nor[1]; R[2][3] = -nor[0];
        R[3][0] = h0 - a*vn; R[3][1] = ek; R[3][2] = h0 + a*vn; R[3][3] = v[0]*nor[1]-v[1]*nor[0];

        L[0][0] = (Gam*ek + a*vn)/(2.0*sqr(a)); L[0][1] = ((1-gam)*v[0] - a*nor[0])/(2.0*sqr(a));
        L[0][2] = ((1-gam)*v[1] - a*nor[1])/(2.0*sqr(a)); L[0][3] = Gam/(2.0*sqr(a));

        L[1][0] = (sqr(a) - Gam*ek)/(sqr(a)); L[1][1] = (Gam*v[0])/(sqr(a)); L[1][2] = (Gam*v[1])/(sqr(a));
        L[1][3] = (1-gam)/(sqr(a));

        L[2][0] = (Gam*ek - a*vn)/(2.0*sqr(a)); L[2][1] = ((1-gam)*v[0] + a*nor[0])/(2.0*sqr(a));
        L[2][2] = ((1-gam)*v[1] + a*nor[1])/(2.0*sqr(a)); L[2][3] = Gam/(2.0*sqr(a));

        L[3][0] = v[1]*nor[0] - v[0]*nor[1]; L[3][1] = nor[1]; L[3][2] = -nor[0]; L[3][3] = 0.0;
        	
}


LOCAL void d_matrix_vec_mult(
        float    **mat,
        float    *vec,
        int      row,
        int      col,
        float    *ans)
{
        int      i, j;

        for(i = 0; i < row; i++)
        {
            ans[i] = 0.0;
            for(j = 0; j < col; j++)
            {
                ans[i] += mat[i][j]*vec[j];
            }
        }
}

LOCAL void ld_matrix_vec_mult(
        double    **mat,
        float    *vec,
        int      row,
        int      col,
        float    *ans)
{
        int      i, j;

        for(i = 0; i < row; i++)
        {
            ans[i] = 0.0;
            for(j = 0; j < col; j++)
            {
                ans[i] += mat[i][j]*vec[j];
            }
        }
}

LOCAL void d_matrix_matrix_mult(
        float    **mat,
        float    **matr,
        int      row,
        int      col,
        float    **ans)
{
        int      i, j, k;

        for(i = 0; i < row; i++)
        {
            for(j = 0; j < col; j++)
            {
                ans[i][j] = 0.0;
                for(k = 0; k < col; k++)
                    ans[i][j] += mat[i][k]*matr[k][j];
            }
        }
}


LOCAL void impose_gas_sine_states(
        Locstate    bdryst,
        TRI         *tri,
        float       *qcrds,
        Front       *fr,
        int         side,
        float       *nor,
        float       dt,
        Mid_soln    *mid_soln,
        int         rk_iter,
        float       t)
{
        Locstate    trist = tri->st;
        float       con_u[4];
        double *cent = fg_centroid(tri);

        if(rk_iter != 0)
            trist = mid_soln[tri->id].st[rk_iter];

        assign(bdryst, trist, fr->sizest); 

        /**
        con_u_at_pt(trist, qcrds, cent, con_u);

        Dens(bdryst) = con_u[0];
        Mom(bdryst)[0] = con_u[1];
        Mom(bdryst)[1] = con_u[2];
        Energy(bdryst) = con_u[3];
        **/

        set_state(bdryst,TGAS_STATE,bdryst);
        Vel(bdryst)[0] = 1.0; Vel(bdryst)[1] = -0.7;
        Dens(bdryst) = 1.0 + 0.5*sin(qcrds[0] + qcrds[1] - (Vel(bdryst)[0] + Vel(bdryst)[1])*t);
        Press(bdryst) = 1.0;

        set_state(bdryst,GAS_STATE,bdryst);
}

LOCAL void gas_sine_L1(
	Front   *fr)
{
        SURFACE      **surf;
        TRI          *tri, *on_tri, *in_tri;
        RECT_GRID    *gr = fr->rect_grid;
        float        *L = gr->L, *U = gr->U;
        float        crds[MAXD], coords[MAXD];
        double  *cent;
        float        total_mass = 0.0, mass_peak, dens, tmpexact, tmpans;
        Locstate     st;
        float        ans = 0.0, cent_total = 0.0, total_avg = 0.0;
        int          i, k;
        POINT   *p[3];
        float   *pcrds[3], dbcent[MAXD], nor[MAXD];
        float   emid0[3], emid1[3], emid2[3], *pemid[3], con_u[4];
        float   V = 0.0;

        mass_peak = -HUGE_VAL;
        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf); !at_end_of_tri_list(tri,*surf);
                tri = tri->next)
            {
                if(tri->BC_type == SUBDOMAIN)
                    continue;

                cent = fg_centroid(tri);
                {
                    tmpexact = tmpans = ans = 0.0;
                    for(i = 0; i < 3; i++)
                    {
                        p[i] = Point_of_tri(tri)[i];
                        pcrds[i] = Coords(p[i]);
                    }

                    for(i = 0; i < 2; i++)
                    {
                        emid0[i] = 0.5*(pcrds[0][i] + pcrds[1][i]);
                        emid1[i] = 0.5*(pcrds[1][i] + pcrds[2][i]);
                        emid2[i] = 0.5*(pcrds[2][i] + pcrds[0][i]);
                    }
                    pemid[0] = emid0;
                    pemid[1] = emid1;
                    pemid[2] = emid2;

                    st = tri->st;
                    crds[0] = cent[0];
                    crds[1] = cent[1];

                    // vertices
                    for(i = 0; i < 3; i++)
                    {
                        con_u_at_pt(tri->st, pcrds[i], cent, con_u);
                        // dens = vortex_evo_den_soln(pcrds[i],fr->time);
                        coords[0] = pcrds[i][0];
                        coords[1] = pcrds[i][1];
                        
                        dens = gas_sine_den_soln(coords,fr->time);

                        if(debug_flag == YES)
                        {
                            printf("WARNING: tri(%d) vert error tri-init (%12.11g %12.11g)\n",
                              tri->id, con_u[0], dens);
                        }

                        ans += 0.05*fabs(dens-con_u[0]);
                        tmpans += 0.05*(con_u[0]);
                        tmpexact += 0.05*(dens);
                    }
                    // edge mid points
                    for(i = 0; i < 3; i++)
                    {
                        con_u_at_pt(tri->st, pemid[i], cent, con_u);
                        coords[0] = pemid[i][0];
                        coords[1] = pemid[i][1];
 
                        dens = gas_sine_den_soln(coords,fr->time);

                        ans += 2.0/15.0*fabs(dens-con_u[0]);
                        tmpans += 2.0/15.0*(con_u[0]);
                        tmpexact += 2.0/15.0*(dens);
                        
                    
                    }

                    con_u_at_pt(tri->st, crds, cent, con_u);
                    coords[0] = crds[0];
                    coords[1] = crds[1];
                    dens = gas_sine_den_soln(coords,fr->time);
                    // dens = FM_vort_soln(coords,fr);

                    if(debug_flag == YES)
                    {
                        printf("crds[%g %g] tri-init cent (%12.11g %12.11g) error\n",
                            crds[0], crds[1],  con_u[0], dens);
                        // clean_up(0);
                        debug_flag = NO;
                    }

                    ans += 9.0/20.0*fabs(dens-con_u[0]);
                    tmpans += 9.0/20.0*(con_u[0]);
                    tmpexact += 9.0/20.0*(dens);

                    total_mass += fabs(ans)*tri->Lmass_matrix[0][0];
                    total_avg += fabs(tmpans-tmpexact)*tri->Lmass_matrix[0][0];
                    cent_total += fabs(con_u[0]-dens)*tri->Lmass_matrix[0][0];  

                    // L_inf use centroid state
                    if(fabs(con_u[0] - dens)  > mass_peak)
                    {
                        mass_peak = fabs(con_u[0] - dens);
                        on_tri = tri;
                    }
                }
            }
        }
  
        printf("compute_L1, total_mass = %24.20g, mass_peak = %24.20g\n",
                   total_mass, mass_peak);
        printf("time = %g, cent averaged L1 total = %24.20g, avg and subtract = %24.20g\n",
                 fr->time, cent_total, total_avg);
        // print_general_vector("Tri_pt", Coords(Point_of_tri(on_tri)[0]), 2, "\n");
        // print_general_vector("Tri_pt", Coords(Point_of_tri(on_tri)[1]), 2, "\n");
        // print_general_vector("Tri_pt", Coords(Point_of_tri(on_tri)[2]), 2, "\n");
        // printf("Triangle(%d) ceontriod (%18.17Lg, %18.17Lg)\n\n", on_tri->id, 
        //        fg_centroid(on_tri)[0], fg_centroid(on_tri)[1]);
        // printf("EXIT 0, compute_L1\n");
        // clean_up(0);

}

LOCAL float gas_sine_den_soln(
        float           *coords,
        float           time)
{
        float           r2, r, ep = 5.0;
        float           du[2], dT, T, S, nT, nS, nP;
        float           gam = 1.4, P, dens, V[MAXD];
        float           bar[3], newdens, u = 1.0, v = -0.7;
        int             i;

        dens = 1.0 + 0.5*sin(coords[0] + coords[1] - (u+v)*time);
        return dens;
}

// This is used to solve a tri geom defined in 
// Shu-Cockburn V
// m - cent = \alpha_1 (nb0-cent) + \alpha_2 (nb1-cent);
LOCAL void compute_tri_geom(
	float        *mid,
        double  *cent,
        double  *nbcent0,
        double  *nbcent1,
        float        *coef)
{
	float rside[2], A[2][2];
        rside[0] = mid[0] - cent[0];
        rside[1] = mid[1] - cent[1];

        A[0][0] = (nbcent0[0]-cent[0]);
        A[0][1] = (nbcent1[0]-cent[0]);

        A[1][0] = (nbcent0[1]-cent[1]);
        A[1][1] = (nbcent1[1]-cent[1]);

        comp_coef(A,rside,coef);
}

// solve u_t+f(u)_x=0 and return the solution value at (x,t) with Newton interation.
// One has to provide the initial guess of the solution at (x,t) and the intial
// value burg_init(x) and its derivative deriv_burg_init(x),
// f', fprime() and its derivative f", fprimeprime()

LOCAL float burg_sol(
	float *crds,
        float t,
        float intial_guess)
{
        float x, u;
        double y1,y2, er_tol=1.0e-15;
        int iter=0;

        x = (crds[0]+crds[1])/sqrt(2);
        // x=(crds[1]);

        y2=x-fprime(intial_guess)*t;   //root of the characteristics
        y1=y2+1.0;
        while(fabs(y2-y1) >= er_tol & iter <= 300)
        {
            y1=y2;
            y2=y1-(y1+fprime(burg_init(y1))*t-x)/(1.0+fprimeprime(burg_init(y1))*deriv_burg_init(y1)*t);
            iter++;
        }
        if(iter>=200)
        {
            printf("ERROR: exact solver doesn't converge at (%g %g %g)\n", crds[0], crds[1], t);
            clean_up(ERROR);
        }
        return burg_init(y2);
}

LOCAL float burg_init(
	float s)
{
  double a=0.25, b=0.5;
  return a+b*sin(PI*sqrt(2.0)*s);
  // return a+b*sin(PI*s);
}

LOCAL double deriv_burg_init(double s){   //the derivative of initial solution of the Burgers' equation
  double b=0.5;
  return PI*b*sqrt(2.0)*cos(PI*sqrt(2.0)*s);
  // return PI*b*cos(PI*s);
}

LOCAL double fprime(double s){
  return sqrt(2.0)*s;
  // return sqrt(1.0)*s;
  // return sqrt(2.0); linear adv
}

LOCAL double fprimeprime(double s){
  return sqrt(2.0);
  // return sqrt(1.0);
  // return 0.0; linear adv
}

LOCAL void Burgers_L1(
	Front   *fr)
{
        SURFACE      **surf;
        TRI          *tri, *on_tri, *in_tri;
        RECT_GRID    *gr = fr->rect_grid;
        float        *L = gr->L, *U = gr->U;
        float        crds[MAXD], coords[MAXD];
        double  *cent;
        float        total_mass = 0.0, mass_peak, dens, tmpexact, tmpans;
        Locstate     st;
        float        ans = 0.0, cent_total = 0.0, total_avg = 0.0;
        int          i, k;
        POINT        *p[3];
        float        *pcrds[3], dbcent[MAXD], nor[MAXD];
        float        emid0[3], emid1[3], emid2[3], *pemid[3], con_u[4];
        float        V = 0.0;
        float        qcrds[13][2], ansarray[13];
        float w1 =-0.149570044467670, w2 = 0.053347235608839,
              w3 = 0.175615257433204,  w4 = 0.077113760890257;

        mass_peak = -HUGE_VAL;
        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf); !at_end_of_tri_list(tri,*surf);
                tri = tri->next)
            {
                if(tri->BC_type == SUBDOMAIN)
                    continue;
                cent = fg_centroid(tri);
                tmpexact = tmpans = ans = 0.0;
                for(i = 0; i < 3; i++)
                {
                    p[i] = Point_of_tri(tri)[i];
                    pcrds[i] = Coords(p[i]);
                }

                tri_quadrature_13_pts(pcrds[0], pcrds[1], pcrds[2], qcrds);
                for(i = 0; i < 13; i++)
                {
                    con_u_at_pt(tri->st, qcrds[i], cent, con_u);
                    dens = burg_sol(qcrds[i],fr->time,con_u[0]);
                    ansarray[i] = fabs(dens-con_u[0]);
                } 

                ans = (w1*ansarray[0] +
                      w2*(ansarray[1] + ansarray[2] + ansarray[3]) +
                      w3*(ansarray[4] + ansarray[5] + ansarray[6]) +
                      w4*(ansarray[7] + ansarray[8] + ansarray[9] +
                          ansarray[10] + ansarray[11] + ansarray[12]));

                /* tri center */
                crds[0] = cent[0];
                crds[1] = cent[1];
                con_u_at_pt(tri->st, crds, cent, con_u);
                dens = burg_sol(crds,fr->time,con_u[0]);

                total_mass += fabs(ans)*tri->Lmass_matrix[0][0];
                cent_total += fabs(con_u[0]-dens)*tri->Lmass_matrix[0][0];

                // L_inf use centroid state
                if(fabs(con_u[0] - dens)  > mass_peak)
                {
                    // mass_peak = fabs(Dens(st) - dens);
                    mass_peak = fabs(con_u[0] - dens);
                    on_tri = tri;
                }
            }
        }
  
        printf("compute_L1, total_mass = %24.20g, mass_peak = %24.20g\n",
                   total_mass, mass_peak);
        printf("time = %g, cent averaged L1 total = %24.20g\n",
                 fr->time, cent_total);
        print_general_vector("Tri_pt", Coords(Point_of_tri(on_tri)[0]), 2, "\n");
        print_general_vector("Tri_pt", Coords(Point_of_tri(on_tri)[1]), 2, "\n");
        print_general_vector("Tri_pt", Coords(Point_of_tri(on_tri)[2]), 2, "\n");
        printf("Triangle(%d) ceontriod (%18.17Lg, %18.17Lg)\n\n", on_tri->id, 
                fg_centroid(on_tri)[0], fg_centroid(on_tri)[1]);
}

LOCAL int max_angle(float *cos_th, int n)
{
	int i, imin = 0;
        float min_th;
        min_th = cos_th[0];
        for(i = 1; i < n; i++)
        {
            if(cos_th[i] < min_th)
            {
                min_th = cos_th[i];
                imin = i;
            }
        }
        return imin;
}

LOCAL void     weight_mod(
	float   *a,
	int     n,
        float   *w)
{
        float    sum = 0.0, tmp;
        float    wtmp[15];
        int      i, j;
        float    eps = 0.0000001;
        float    d;

        if(n == 1)
        {
            w[0] = 1.0;
            return;
        }

        for(i = 0; i < n; i++)
            sum += fabs(a[i]);
            // sum += sqr(a[i]);
        if(fabs(sum) < MACH_EPS*100.0)
        {
            for(i = 0; i < n; i++)
                w[i] = 0.0;
            return;
        }

        // version 1 (|a_2| + |a_3|)/(|a_1| + |a_2| + |a_3|) 
        // square version does not work well for shock
        for(i = 0; i < n; i++)
        {
            w[i] = 0.0;
            for(j = 0; j < n; j++)
            {
                if(j == i) continue;
                w[i] += fabs(a[j]);
                // w[i] += sqr(a[j]);
            }
            w[i] /= (sum*(n-1)); 
        }
        // return; 

        // version 2, 1/|a_i|
        /**
        sum = 0.0; 
        for(i = 0; i < n; i++)
        {
            if(fabs(a[i]) < MACH_EPS*100.0)
                wtmp[i] = 100000.0;
            else
                wtmp[i] = 1.0/fabs(a[i]);
            sum += (wtmp[i]);
        }
        for(i = 0; i < n; i++)
            wtmp[i] = wtmp[i]/sum; 
        **/
        // End version 2
  
        // version 3, 1/sqr(a_i) too smooth
        /**
        sum = 0.0;
        for(i = 0; i < n; i++)
        {
            if(fabs(a[i]) < MACH_EPS*100.0)
                wtmp[i] = 100000.0;
            else
                wtmp[i] = 1.0/sqr(fabs(a[i]));
            sum += (wtmp[i]);
        }

        for(i = 0; i < n; i++)
            wtmp[i] = wtmp[i]/sum;
        **/
        // End version 3

        // combination of version 1 and two (or three)
        for(i = 0; i < n; i++)
            w[i] = 2.0/3.0*w[i] + 1.0/3.0*wtmp[i];
}

LOCAL int WENO_mod(
	float  *a,
        float  *b,
        float  *angle,
        int    n,   
        float  h,
        float  *w)
{
        float  sum = 0.0, wtmp[10], d;
        int    i, j;
        float  eps = 0.000001;
        float  da[10]; 

        sum = 0.0;
        for(i = 0; i < n; i++)
        {
            da[i] = 1.0/(angle[i]);
            sum += da[i];
        }
        for(i = 0; i < n; i++)
            da[i] = da[i]/sum; 

        sum = 0.0;
        for(i = 0; i < n; i++)
        {
            wtmp[i] = da[i]/(1.0 + h*(sqr(a[i]) + sqr(b[i])));
            // wtmp[i] = da[i]/(1.0 + (fabs(a[i]) + fabs(b[i]))*(sqr(a[i]) + sqr(b[i])));
            // wtmp[i] = da[i]/(1.0 + (sqr(a[i]) + sqr(b[i])));
            sum += wtmp[i];
        }
        for(i = 0; i < n; i++)
            w[i] = wtmp[i]/sum;
}

/// WENO_mod_on_3rd &&
/// WENO_mod_on_2nd are just for
/// the convenience of programming.
LOCAL int WENO_mod_on_3rd(
        float  *a,
        float  *b,
        float  *angle,
        int    n,
        float  h,
        float  *w)
{
        float  sum = 0.0, wtmp[10], d;
        int    i, j;
        float  eps = 0.000001;
        float  da[10];

        sum = 0.0;
        for(i = 0; i < n; i++)
        {
            da[i] = 1.0/(angle[i]);
            wtmp[i] = da[i]/(1.0 + h*sqr(sqr(a[i]) + sqr(b[i])));
            // wtmp[i] = da[i]/(1.0 + h*sqr(sqr(a[i]) + sqr(b[i])));
            // wtmp[i] = da[i]/(1.0 + h*(fabs(a[i])*sqr(a[i]) + fabs(b[i])*sqr(b[i])));
            sum += wtmp[i];
        }
        for(i = 0; i < n; i++)
            w[i] = wtmp[i]/sum;
}

LOCAL int WENO_mod_on_2nd(
        float  *a,
        float  *b,
        float  *angle,
        int    n,
        float  h,
        float  *w)
{
        float  sum = 0.0, wtmp[10], d;
        int    i, j;
        float  eps = 0.000001;
        float  da[10];

        sum = 0.0;
        for(i = 0; i < n; i++)
        {
            da[i] = 1.0/(angle[i]);
            wtmp[i] = da[i]/(1.0 + h*(sqr(a[i]) + sqr(b[i])));
            sum += wtmp[i];
        }
        for(i = 0; i < n; i++)
            w[i] = wtmp[i]/sum;
}


LOCAL int WENO_mod_cand1_P3(
        float  *a,
        float  *angle,
        int    n,
        float  h,
        float  *w)
{
        float  sum = 0.0, wtmp[10], d;
        int    i, j;
        float  eps = 0.000001;
        float  da[10], L_const;

        sum = 0.0;
        // L_const = eps;
        for(i = 0; i < n; i++)
            sum += a[i];
        L_const = max(fabs(sum/n),eps);
        sum = 0.0;
        for(i = 0; i < n; i++)
        {
            da[i] = 1.0/(angle[i]);
            wtmp[i] = da[i]/(L_const + h*(sqr(a[i])));
            // wtmp[i] = da[i]/(L_const + h*sqr(sqr(a[i])));
            sum += wtmp[i];
        }
        for(i = 0; i < n; i++)
            w[i] = wtmp[i]/sum;
        return NO;
}

LOCAL void harmonic_mod(
        float  *a,
        float  *angle,
        int    n,
        float  *w)
{
        float  sum = 0.0, wtmp[10], d;
        int    i, j;
        float  eps = 0.00001;
        float  da[10]; 

        sum = 0.0;
        for(i = 0; i < n; i++)
        {
            da[i] = 1.0/(angle[i]);
            wtmp[i] = da[i]/(eps + fabs(a[i]));
            sum += wtmp[i];
        }
        for(i = 0; i < n; i++)
            w[i] = wtmp[i]/sum;
}

LOCAL int WENO_mod_cand1_P2(
        float  *a,
        float  *angle,
        int    n,
        float  h,
        float  *w)
{
        float  sum = 0.0, wtmp[10], d;
        int    i, j;
        float  eps = 0.000001;
        float  da[10], L_const;

        sum = 0.0;
        // L_const = eps;
        for(i = 0; i < n; i++)
            sum += a[i];
        L_const = max(fabs(sum/n),eps);
        sum = 0.0;
        for(i = 0; i < n; i++)
        {
            da[i] = 1.0/(angle[i]);
            wtmp[i] = da[i]/(L_const + (sqr(a[i])));
            // wtmp[i] = da[i]/(L_const + h*sqr(sqr(a[i])));
            sum += wtmp[i];
        }
        for(i = 0; i < n; i++)
            w[i] = wtmp[i]/sum;
        return NO;
}

LOCAL int WENO_mod_1(
        float  *a,
        float  *b,
        float  *angle,
        int     n,
        float  *w)
{
        float  sum = 0.0, wtmp[10], d;
        int    i, j;
        float  eps = 0.000001;
        float  da[10];
        float  ww[10];

        sum = 0.0;  
        for(i = 0; i < n; i++)
        {
            da[i] = 1.0/(angle[i]);
            wtmp[i] = (da[i])/sqr(eps + (sqr(a[i]) + sqr(b[i])));
            // wtmp[i] = (da[i])/(eps + (sqr(a[i]) + sqr(b[i])));
            // wtmp[i] = (da[i])/(1.0+sqr((sqr(a[i]) + sqr(b[i]))));
            sum += wtmp[i];
        }
        for(i = 0; i < n; i++)
            w[i] = wtmp[i]/sum;
}

LOCAL int unify_weight(
        float  *angle,
        int     n,
        float  *w)
{
        float  sum = 0.0;
        int    i;

        for(i = 0; i < n; i++)
        {
            w[i] = 1.0/(angle[i]);
            sum += w[i];
        }
        for(i = 0; i < n; i++)
            w[i] = w[i]/sum;
}

LOCAL void g_sine_evolution_initializer(
        TRI             *tri,
        Locstate        state,
        float           time)
{
        float           r2, r, ep = 5.0;
        float           du[2], dT, T, S, nT, nS, nP;
        float           gam = 1.4, P;
        float           bar[3];
        int             i, j, k, indx;
        POINT   *p[3];
        float   *pcrds[3], dbcent[MAXD];
        float   emid0[3], emid1[3], emid2[3], *pemid[3], conu[4];
        double   *cent = fg_centroid(tri);
        double   rhs[4][MAX_N_COEF], mulrhs[4][MAX_N_COEF], dens[7];
        float   area = fg_area(tri), v = 0.0, t = 0.0;
        double      **mass_inv, **Lmass_matrix;
        float a = 0.065130102902216, b = 0.869739794195568;
        float c = 0.312865496004875, d = 0.638444188569809;
        float e = 0.048690315425316, f = 0.260345966079038;
        float g = 0.479308067841923, third;
        float w1 =-0.149570044467670, w2 = 0.053347235608839,
          w3 = 0.175615257433204,  w4 = 0.077113760890257;
        float crds[13][2];

        third = 1.0/3.0;

        Lmass_matrix = tri->Lmass_matrix;
        mass_inv = tri->mass_inv;

        for(i = 0; i < 4; i++)
        {
            for(j = 0; j < MAX_N_COEF; j++)
                rhs[i][j] = 0.0;
        }

        for(i = 0; i < 3; i++)
        {
            p[i] = Point_of_tri(tri)[i];
            pcrds[i] = Coords(p[i]);
        }

        tri_quadrature_13_pts(pcrds[0], pcrds[1], pcrds[2], crds);

        for(j = 0; j < N_EQN; j++)
        {
            for(k = 0; k < MAX_N_COEF; k++)
            {
                rhs[j][k] = w1*ff_g_sin(crds[0],j, time)*vh_val(crds[0],cent,k) +
                      w2*(ff_g_sin(crds[1],j, time)*vh_val(crds[1],cent,k) +
                          ff_g_sin(crds[2],j, time)*vh_val(crds[2],cent,k) +
                          ff_g_sin(crds[3],j, time)*vh_val(crds[3],cent,k)) +
                      w3*(ff_g_sin(crds[4],j, time)*vh_val(crds[4],cent,k) +
                          ff_g_sin(crds[5],j, time)*vh_val(crds[5],cent,k) +
                          ff_g_sin(crds[6],j, time)*vh_val(crds[6],cent,k)) +
                      w4*(ff_g_sin(crds[7],j, time)*vh_val(crds[7],cent,k) +
                          ff_g_sin(crds[8],j, time)*vh_val(crds[8],cent,k) +
                          ff_g_sin(crds[9],j, time)*vh_val(crds[9],cent,k) +
                          ff_g_sin(crds[10],j, time)*vh_val(crds[10],cent,k) +
                          ff_g_sin(crds[11],j, time)*vh_val(crds[11],cent,k) +
                          ff_g_sin(crds[12],j, time)*vh_val(crds[12],cent,k));
            }
        }

        for(j = 0; j < N_EQN; j++)
        {
            for(k = 0; k < MAX_N_COEF; k++)
            {
                rhs[j][k] *= area;
            }
        }

        for(i = 0; i < N_EQN; i++)
            matrix_vec_mult(mass_inv, rhs[i], MAX_N_COEF, MAX_N_COEF, mulrhs[i]);

        for(indx = 0; indx < MAX_N_COEF; indx++)
        {
            dg_Dens(state)[indx] = mulrhs[0][indx];
            dg_Mom(state)[0][indx] = mulrhs[1][indx];
            dg_Mom(state)[1][indx] = mulrhs[2][indx];
            dg_Energy(state)[indx] = mulrhs[3][indx];
        }

        // Compute average soln
        Dens(state) = 0.0;
        Mom(state)[0] = 0.0;
        Mom(state)[1] = 0.0;
        Energy(state) = 0.0;
        for(indx = 0; indx < MAX_N_COEF; indx++)
        {
            Dens(state) += dg_Dens(state)[indx]*Lmass_matrix[0][indx];
            Mom(state)[0] += dg_Mom(state)[0][indx]*Lmass_matrix[0][indx];
            Mom(state)[1] += dg_Mom(state)[1][indx]*Lmass_matrix[0][indx];
            Energy(state) += dg_Energy(state)[indx]*Lmass_matrix[0][indx];
        }
        Dens(state) /= Lmass_matrix[0][0];
        Mom(state)[0] /= Lmass_matrix[0][0];
        Mom(state)[1] /= Lmass_matrix[0][0];
        Energy(state) /= Lmass_matrix[0][0];

        return;
}


EXPORT float ff_g_sin(
        float *crds,
        int   n,
        float t)
{
        float den, p = 1.0, u[2] = {1.0, -0.7};
        float tmp;

        den = 1.0 + 0.5*sin(crds[0] + crds[1] - (u[0] + u[1])*t);
        switch(n)
        {
        case 0:
            tmp = den;
        break;
        case 1:
            tmp = den*u[0];
        break;
        case 2:
            tmp = den*u[1];
        break;
        case 3:
            tmp = p/0.4 + 0.5*den*(sqr(u[0]) + sqr(u[1]));
        break;
        }
        return tmp;
}

// condition number of Ax = b, 2 by 2 system
// use 1 norm
LOCAL float cond_num(
        float  A[ ][2])
{
        float  tmp, inv[2][2], tmp2;
        float  n_a, n_i;

        tmp = A[0][0]*A[1][1] - A[0][1]*A[1][0];
        if(fabs(tmp) < MACH_EPS*1000.0)
            return HUGE_VAL;

        tmp2 = 1.0/tmp;
        inv[0][0] = tmp2*A[1][1];
        inv[0][1] = -tmp2*A[0][1];
        inv[1][0] = -tmp2*A[1][0];
        inv[1][1] = tmp2*A[0][0];

        n_a = max(( fabs(A[0][0]) + fabs(A[1][0]) ), ( fabs(A[0][1]) + fabs(A[1][1]) ) );
        n_i = max(( fabs(inv[0][0]) + fabs(inv[1][0]) ), ( fabs(inv[0][1]) + fabs(inv[1][1]) ) );

        return n_a*n_i;
}

// To remove unnecessary anti-diffusion
LOCAL float comput_loc_alpha(
	int       k_comp, 
        Locstate  st, 
        float     *con_u, 
        Locstate  nbst, 
        float     *nbcon_u, 
        float     alpha)
{
        if(N_EQN == 1)
        {
            if((Dens(st)-Dens(nbst))*(con_u[0] - nbcon_u[0]) < 0.0 &&
               (con_u[0] > max(Dens(st), Dens(nbst)) ||
                con_u[0] < min(Dens(st), Dens(nbst)) ||
                nbcon_u[0] > max(Dens(st), Dens(nbst)) ||
                nbcon_u[0] < min(Dens(st), Dens(nbst))
               ) 
              )
                return 0.0;
            return alpha; 
        }

        switch(k_comp)
        {
        case 0:
            if((Dens(st)-Dens(nbst))*(con_u[0] - nbcon_u[0]) < 0.0 &&
               (con_u[0] > max(Dens(st), Dens(nbst)) ||
                con_u[0] < min(Dens(st), Dens(nbst)) ||
                nbcon_u[0] > max(Dens(st), Dens(nbst)) ||
                nbcon_u[0] < min(Dens(st), Dens(nbst))
               )
              )
                return 0.0;
        break;
        case 1:
            if((Mom(st)[0]-Mom(nbst)[0])*(con_u[1] - nbcon_u[1]) < 0.0 &&
            // if(
               (con_u[1] > max(Mom(st)[0], Mom(nbst)[0]) ||
                con_u[1] < min(Mom(st)[0], Mom(nbst)[0]) ||
                nbcon_u[1] > max(Mom(st)[0], Mom(nbst)[0]) ||
                nbcon_u[1] < min(Mom(st)[0], Mom(nbst)[0])
               )
              )
                return 0.0; 
        break;
        case 2:
            if((Mom(st)[1]-Mom(nbst)[1])*(con_u[2] - nbcon_u[2]) < 0.0 &&
            // if(
               (con_u[2] > max(Mom(st)[1], Mom(nbst)[1]) ||
                con_u[2] < min(Mom(st)[1], Mom(nbst)[1]) ||
                nbcon_u[2] > max(Mom(st)[1], Mom(nbst)[1]) ||
                nbcon_u[2] < min(Mom(st)[1], Mom(nbst)[1])
               )
              )
                return 0.0;
        break;
        case 3:
            if((Energy(st)-Energy(nbst))*(con_u[3] - nbcon_u[3]) < 0.0 &&
            // if(
               (con_u[3] > max(Energy(st), Energy(nbst)) ||
                con_u[3] < min(Energy(st), Energy(nbst)) ||
                nbcon_u[3] > max(Energy(st), Energy(nbst)) ||
                nbcon_u[3] < min(Energy(st), Energy(nbst))
               )
              )
                return 0.0;
        break;  
        }
        return alpha;
}


LOCAL int is_phy_bdry_side(
        TRI      *tri,
        int      side)
{
        if(fg_e_type(tri)[side] == NEUMANN ||
           fg_e_type(tri)[side] == IN_FLOW ||
           fg_e_type(tri)[side] == OUT_FLOW ||
           fg_e_type(tri)[side] == CONST_P)
        {
            return YES;
        }

        return NO; 
}


LOCAL int install_local_tris(
        TRI   *tri,
        TRI   **loc_tris)
{
        int  loc_N, i, j, tmp_N, tmp_N2;
        TRI  *tmptri;

        loc_tris[0] = tri;
        loc_N = 1;
        for(i = 0; i < 3; i++)
        {
            tmptri = Tri_on_side(tri,i);
            if(tmptri == NULL) continue;
            // OLD
            // if(tri_on_phy_bdry(tri) == YES && tmptri->BC_type == SUBDOMAIN)
            //     continue;
            if(tri_on_phy_bdry(tmptri) == YES && tmptri->BC_type == SUBDOMAIN)
                continue;
            loc_tris[loc_N] = tmptri;
            loc_N++;
        }
        tmp_N = loc_N;
        for(j = 1; j < tmp_N; j++)
        {
            for(i = 0; i < 3; i++)
            {
                tmptri = Tri_on_side(loc_tris[j],i);
                if(tmptri == NULL) continue;
                // OLD
                // if(tri_on_phy_bdry(loc_tris[j]) == YES && tmptri->BC_type == SUBDOMAIN)
                //     continue;
                if(tri_on_phy_bdry(tmptri) == YES && tmptri->BC_type == SUBDOMAIN)
                    continue;
                loc_tris[loc_N] = tmptri;
                loc_N++;
            }
        }
        tmp_N2 = tmp_N;
        tmp_N = loc_N;
        for(j = tmp_N2; j < tmp_N; j++)
        {
            for(i = 0; i < 3; i++)
            {
                tmptri = Tri_on_side(loc_tris[j],i);
                if(tmptri == NULL) continue;
                // OLD
                // if(tri_on_phy_bdry(loc_tris[j]) == YES && tmptri->BC_type == SUBDOMAIN)
                //     continue;
                if(tri_on_phy_bdry(tmptri) == YES && tmptri->BC_type == SUBDOMAIN)
                    continue;
                loc_tris[loc_N] = tmptri;
                loc_N++;
            }
        }

        return loc_N;
}

EXPORT float obtain_time_step_on_tri(
	Front  *fr)
{
        // print_tri_crds(time_on_tri);
        // verbose_print_state("time_on_tri state", time_on_tri->st);
        return newdt;
}

LOCAL int fsame_sign(
	float *a,
        int   n)
{
	int   i;
        for(i = 1; i < n; i++)
        {
            if(a[i]*a[0] < 0.0)
                return NO;        
        }
        return YES;
}


LOCAL int unphysical_state(
        TRI       *tri,
	Mid_soln  *midsoln,
        int       rk_iter)
{
        static  Locstate Tst = NULL;
        POINT    *p[3];
        int      i;
        float    *pcrds[3], con_u[4];
        double   *cent = fg_centroid(tri);
        Locstate  st;

        if(rk_iter == RK_STEP)
            st = tri->st;
        else
            st = midsoln[tri->id].st[rk_iter];

        if(Tst == NULL)
        {
            (*Params(st)->_alloc_state)(&Tst,Params(st)->sizest);
            assign(Tst, st, Params(st)->sizest);
        }

        for(i = 0; i < 3; i++)
        {
            p[i] = Point_of_tri(tri)[i];
            pcrds[i] = Coords(p[i]);
        }

        for(i = 0; i < 3; i++)
        {
            con_u_at_pt(st, pcrds[i], cent, con_u);
            Dens(Tst) = con_u[0];
            Mom(Tst)[0] = con_u[1];
            Mom(Tst)[1] = con_u[2];
            Energy(Tst) = con_u[3];
            if(invalid_state("unphysical_state",Tst,NO))
            {
                return YES;
            }
        }
        return NO;
}


LOCAL void reflect_state_about_y(
	Locstate gst,
	double   **Lmass_matrix)
{
        int indx;

        dg_Dens(gst)[1] *= -1.0;
        dg_Dens(gst)[4] *= -1.0;
        dg_Energy(gst)[1] *= -1.0;
        dg_Energy(gst)[4] *= -1.0;

        dg_Mom(gst)[1][1] *= -1.0;
        dg_Mom(gst)[1][4] *= -1.0;

        dg_Mom(gst)[0][0] *= -1.0;
        dg_Mom(gst)[0][2] *= -1.0;
        dg_Mom(gst)[0][3] *= -1.0;
        dg_Mom(gst)[0][5] *= -1.0;

                    // Compute average soln
                    Dens(gst) = 0.0;
                    Mom(gst)[0] = 0.0;
                    Mom(gst)[1] = 0.0;
                    Energy(gst) = 0.0;
                    for(indx = 0; indx < MAX_N_COEF; indx++)
                    {
                        Dens(gst) += dg_Dens(gst)[indx]*Lmass_matrix[0][indx];
                        Mom(gst)[0] += dg_Mom(gst)[0][indx]*Lmass_matrix[0][indx];
                        Mom(gst)[1] += dg_Mom(gst)[1][indx]*Lmass_matrix[0][indx];
                        Energy(gst) += dg_Energy(gst)[indx]*Lmass_matrix[0][indx];
                    }
                    Dens(gst) /= Lmass_matrix[0][0];
                    Mom(gst)[0] /= Lmass_matrix[0][0];
                    Mom(gst)[1] /= Lmass_matrix[0][0];
                    Energy(gst) /= Lmass_matrix[0][0];

}

LOCAL void local_find_time_step_on_tris(
        Front *nfr)
{
        SURFACE      **surf;
        TRI          *tri;
        float       max_dt;

        for(surf = nfr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf); !at_end_of_tri_list(tri,*surf);
                tri = tri->next)
            {
                // time_step_on_tri
                max_dt = (*nfr->_time_step_on_tri)(nfr, tri);
                if(max_dt < newdt)
                    time_on_tri = tri;
                newdt = min(newdt, max_dt);
            }
        }
}

/* use edge center to evaluate integral */
// may use more accurate quadrature
LOCAL void p1_L2projection_ver2(
         TRI *tri,
         Locstate st,
         Locstate stout)
{
        int          i, j, k, max_n_coef = 3, indx;
        double  *cent = fg_centroid(tri);
        double  rhs[4][3], mulrhs[4][3], dens[7];
        POINT   *p[3];
        float   *pcrds[3], dbcent[MAXD], conu0[4], conu1[4], conu2[4];
        float   area = fg_area(tri);
        static  double  **mass_inv = NULL, **Lmass_matrix;
        float   crds[7][2], conu[7][4];

        if(mass_inv == NULL)
        {
            matrix(&(Lmass_matrix), max_n_coef, max_n_coef, sizeof(double));
            matrix(&(mass_inv), max_n_coef, max_n_coef, sizeof(double));
        }

        for(i = 0; i < max_n_coef; i++)
        {
            for(j = 0; j < max_n_coef; j++)
            {
                Lmass_matrix[i][j] = tri->Lmass_matrix[i][j];
            }
        }

        inverse_matrix(Lmass_matrix, max_n_coef, mass_inv);

        for(i = 0; i < N_EQN; i++)
        {
            for(j = 0; j < max_n_coef; j++)
                rhs[i][j] = 0.0;
        }

        for(i = 0; i < 3; i++)
            p[i] = Point_of_tri(tri)[i];

        Set_params(stout,st);
        set_type_of_state(stout,GAS_STATE);

        tri_quadrature_7_pts(Coords(p[0]), Coords(p[1]), Coords(p[2]), cent, crds);
        for(i = 0; i < 7; i++)
            con_u_at_pt(st, crds[i], cent, conu[i]);
        for(j = 0; j < N_EQN; j++)
        {
            for(k = 0; k < max_n_coef; k++)
            {
                rhs[j][k] = 0.05*conu[0][j]*vh_val(crds[0],cent,k) +
                            0.05*conu[1][j]*vh_val(crds[1],cent,k) +
                            0.05*conu[2][j]*vh_val(crds[2],cent,k) +
                            2.0/15.0*conu[3][j]*vh_val(crds[3],cent,k) +
                            2.0/15.0*conu[4][j]*vh_val(crds[4],cent,k) +
                            2.0/15.0*conu[5][j]*vh_val(crds[5],cent,k) +
                            9.0/20.0*conu[6][j]*vh_val(crds[6],cent,k);
            }
        }
        for(j = 0; j < N_EQN; j++)
        {
            for(k = 0; k < max_n_coef; k++)
                rhs[j][k] *= area;
        }
        /* vertice version
        for(i = 0; i < 3; i++)
            pcrds[i] = Coords(p[i]);
        // edge center version
        for(i = 0; i < 3; i++)
        {
            crds[i][0] = (Coords(p[i])[0]+ Coords(p[(i+1)%3])[0])/2.0;
            crds[i][1] = (Coords(p[i])[1]+ Coords(p[(i+1)%3])[1])/2.0;
            pcrds[i] = crds[i];
        }

        con_u_at_pt(st, pcrds[0], cent, conu0);
        con_u_at_pt(st, pcrds[1], cent, conu1);
        con_u_at_pt(st, pcrds[2], cent, conu2);

        for(j = 0; j < N_EQN; j++)
        {
            for(k = 0; k < max_n_coef; k++)
            {
                rhs[j][k] = conu0[j]*vh_val(pcrds[0],cent,k) +
                            conu1[j]*vh_val(pcrds[1],cent,k) +
                            conu2[j]*vh_val(pcrds[2],cent,k);
            }
        }
        for(j = 0; j < N_EQN; j++)
        {
            for(k = 0; k < max_n_coef; k++)
            {
                rhs[j][k] *= (area/3.0);
            }
        }
        */

        for(i = 0; i < N_EQN; i++)
            matrix_vec_mult(mass_inv, rhs[i], max_n_coef, max_n_coef, mulrhs[i]);
        for(indx = 0; indx < max_n_coef; indx++)
        {
            dg_Dens(stout)[indx] = mulrhs[0][indx];
            dg_Mom(stout)[0][indx] = mulrhs[1][indx];
            dg_Mom(stout)[1][indx] = mulrhs[2][indx];
            dg_Energy(stout)[indx] = mulrhs[3][indx];
        }
        for(indx = max_n_coef; indx < MAX_N_COEF; indx++)
        {
            dg_Dens(stout)[indx] = 0.0;
            dg_Mom(stout)[0][indx] = 0.0;
            dg_Mom(stout)[1][indx] = 0.0;
            dg_Energy(stout)[indx] = 0.0;
        }
        /**
        Dens(stout) = dg_Dens(stout)[0];
        Mom(stout)[0] = dg_Mom(stout)[0][0];
        Mom(stout)[1] = dg_Mom(stout)[1][0];
        Energy(stout) = dg_Energy(stout)[0];
        **/
        Dens(stout) = Dens(st);
        Mom(stout)[0] = Mom(st)[0];
        Mom(stout)[1] = Mom(st)[1];
        Energy(stout) = Energy(st);
}

// Check edge vertices + tri vertices for
// 7 interior quadrature, that is sufficient
LOCAL int jump_at_quadrature(
         TRI       *tri,
         TRI       *nbtri[3],
         Mid_soln  *midsoln,
         int       rk_iter,    // normally we use zero
         int       *imax_side, // overshoot max for[N_EQN] at side,
         int       *imax,      // overshoot max for[N_EQN] on side at quadrature pt
         int       *imin_side, // undershoot min for[N_EQN] at side,
         int       *imin,      // undershoot min for[N_EQN] on side at quadrature pt
         float     *umax,
         float     *umin,
         float     *st_max,
         float     *st_min,
         int       skip_find_umax)
{
         Locstate st, nbst[3], st2;
         // static int     Gauss_N = 3, first = YES;
         int     i, j, k, dim = 2, side;
         float   *pcrds[3], qcrds[2];
         // float   st_min[4], st_max[4], con_u[4];
         float   con_u[4];
         // static double  q[4];
         // int    imax[4], imin[4], imax_side[4], imin_side[4];
         // float  umin[4], umax[4];
         double *cent = fg_centroid(tri);
         int    over_shoot = NO;
         int    debug = NO;
         if(rk_iter == RK_STEP)
         {
             st = tri->st;
             nbst[0] = nbtri[0]->st;
             nbst[1] = nbtri[1]->st;
             nbst[2] = nbtri[2]->st;
         }
         else
         {
             st = midsoln[tri->id].st[rk_iter];

             for(i = 0; i < 3; i++)
             {
                 // the zero level state in the buffer zone are set before reconstruction
                 nbst[i] = midsoln[nbtri[i]->id].st[rk_iter];
                 /**
                 if(nbtri[i]->id >= 0)
                     nbst[i] = midsoln[nbtri[i]->id].st[rk_iter];
                 else
                 {
                     nbst[i] = nbtri[i]->st;
                 }
                 **/
             }
         }

         if(debug == YES)
         {
             printf("Dens avg of nghbrs[%g %g %g]\n",
                   Dens(nbst[0]), Dens(nbst[1]), Dens(nbst[2]));
         }

         if(skip_find_umax == NO)
         {
             st_min[0] = st_max[0] = Dens(st);
             st_min[1] = st_max[1] = fabs(Mom(st)[0]);
             st_min[2] = st_max[2] = fabs(Mom(st)[1]);
             st_min[3] = st_max[3] = Energy(st);

             // local max and min of state average
             for(i = 0; i < 3; i++)
             {
                 if(Dens(nbst[i]) > st_max[0])
                     st_max[0] = Dens(nbst[i]);
                 // if(fabs(Mom(nbst[i])[0]) > st_max[1])
                 //     st_max[1] = fabs(Mom(nbst[i])[0]);
                 if((Mom(nbst[i])[0]) > st_max[1])
                     st_max[1] = (Mom(nbst[i])[0]);
                 // if(fabs(Mom(nbst[i])[1]) > st_max[2])
                 //     st_max[2] = fabs(Mom(nbst[i])[1]);
                 if((Mom(nbst[i])[1]) > st_max[2])
                     st_max[2] = (Mom(nbst[i])[1]);
                 if(Energy(nbst[i]) > st_max[3])
                     st_max[3] = Energy(nbst[i]);

                 if(Dens(nbst[i]) < st_min[0])
                     st_min[0] = Dens(nbst[i]);
                 if((Mom(nbst[i])[0]) < st_min[1])
                     st_min[1] = (Mom(nbst[i])[0]);
                 if((Mom(nbst[i])[1]) < st_min[2])
                     st_min[2] = (Mom(nbst[i])[1]);
                 if(Energy(nbst[i]) < st_min[3])
                     st_min[3] = Energy(nbst[i]);
             }
         }

         // TMP
         if(debug == YES)
         {
             printf("local max and min state[%g %g]\n", st_max[0], st_min[0]);
         }
         for(i = 0; i < 3; i++)
             pcrds[i] = Coords(Point_of_tri(tri)[i]);

         for(i = 0; i < N_EQN; i++)
         {
             imax_side[i] = -1;
             imin_side[i] = -1;
         }

         for(side  = 0; side < 3; side++)
         {
             for(k = 0; k < Gauss_N; k++)
             {
                 for(i = 0; i < dim; i++)
                     qcrds[i] = (pcrds[(side+1)%3][i] + pcrds[side][i])/2.0 +
                        (pcrds[(side+1)%3][i] - pcrds[side][i])/2.0*q[k];
                 con_u_at_pt(st, qcrds, cent, con_u);
                 for(i = 0; i < N_EQN; i++)
                 {
                     if((con_u[i]) > st_max[i])
                     {
                         // TMP
                         if(debug == YES)
                         {
                             printf("tri state overshoot = %g at side[%d], pt[%d]\n",
                                      con_u[i], side, k);
                         }

                         if(imax_side[i] == -1)
                         {
                             imax[i] = k;
                             imax_side[i] = side;
                             // umax[i] = fabs(con_u[i]);
                             umax[i] = (con_u[i]);
                         }
                         else
                         {
                             if((con_u[i]) > umax[i])
                             {
                                 imax[i] = k;
                                 imax_side[i] = side;
                                 // umax[i] = fabs(con_u[i]);
                                 umax[i] = (con_u[i]);
                             }
                         }
                         over_shoot = YES;
                     }

                     if((con_u[i]) < st_min[i])
                     {
                         // TMP
                         if(debug == YES)
                         {
                             printf("tri state undershoot = %g at side[%d], pt[%d]\n",
                                      con_u[i], side, k);
                         }

                         if(imin_side[i] == -1)
                         {
                             imin[i] = k;
                             imin_side[i] = side;
                             // umin[i] = fabs(con_u[i]);
                             umin[i] = (con_u[i]);
                         }
                         else
                         {
                             if((con_u[i]) < umin[i])
                             {
                                 imin[i] = k;
                                 imin_side[i] = side;
                                 // umin[i] = fabs(con_u[i]);
                                 umin[i] = (con_u[i]);
                             }
                         }
                         over_shoot = YES;
                     }
                 }
             }
         }

         // tri vertices
         for(k = 0; k < 3; k++)
         {
             con_u_at_pt(st, pcrds[k], cent, con_u);
             for(i = 0; i < N_EQN; i++)
             {
                 if((con_u[i]) > st_max[i])
                 {
                         // TMP
                     if(debug == YES)
                     {
                         printf("tri state overshoot = %g at side[%d], pt[%d]\n",
                                      con_u[i], side, k);
                     }

                     if(imax_side[i] == -1)
                     {
                         imax[i] = 3 + k;
                         imax_side[i] = 4;
                         // umax[i] = fabs(con_u[i]);
                         umax[i] = (con_u[i]);
                     }
                     else
                     {
                         if((con_u[i]) > umax[i])
                         {
                             imax[i] = 3 + k;
                             imax_side[i] = 4;
                             // umax[i] = fabs(con_u[i]);
                             umax[i] = (con_u[i]);
                         }
                     }
                     over_shoot = YES;
                 }
                 if((con_u[i]) < st_min[i])
                 {
                     // TMP
                     if(debug == YES)
                     {
                         printf("tri state undershoot = %g at side[%d], pt[%d]\n",
                                      con_u[i], side, k);
                     }

                     if(imin_side[i] == -1)
                     {
                         imin[i] = 3 + k;
                         imin_side[i] = 4;
                         // umin[i] = fabs(con_u[i]);
                         umin[i] = (con_u[i]);
                     }
                     else
                     {
                         if((con_u[i]) < umin[i])
                         {
                             imin[i] = 3 + k;
                             imin_side[i] = 4;
                             // umin[i] = fabs(con_u[i]);
                             umin[i] = (con_u[i]);
                         }
                     }
                     over_shoot = YES;
                 }
             }
         }

         return over_shoot;
}

LOCAL void fix_overshoot(
         TRI       *tri,
         Mid_soln  *midsoln,
         int       rk_iter, // iteration always use 0
         Front     *fr)
{
         int    imax[4], imin[4], imax_side[4], imin_side[4];
         float  umin[4], umax[4];
         int    side, i, k;
         TRI    *nbtri[3];
         float  a_max[4], a_min[4], u_avg[4];
         float  st_min[4], st_max[4];
         Locstate st;
         int    debug_flag = NO, loop;

         for(side = 0; side < 3; side++)
         {
             nbtri[side] = Tri_on_side(tri,side);
             // This should only occur on the boundary
             if(nbtri[side] == NULL)
                 return;
         }

         // TMP
         /**
         if(tri->id == 187)
         {
             printf("\n\nTRI[%d] enter p1_fix_overshoot\n", tri->id);
             debug_flag = YES;
         }
         **/

         if(rk_iter == RK_STEP)
             st = tri->st;
         else
             st = midsoln[tri->id].st[rk_iter];

         if(NO == jump_at_quadrature(tri, nbtri, midsoln, rk_iter, imax_side,
                imax, imin_side, imin, umax, umin, st_max, st_min,NO))
         {
             loop = 0;
             while(unphysical_st_at_quadrature(tri, st) == YES)
             {
                 for(i = 0; i < N_EQN; i++)
                 {
                     switch(i)
                     {
                     case 0:
                         dg_Dens(st)[0] = Dens(st)*(1.0-0.5) + dg_Dens(st)[0]*0.5;
                         for(k = 1; k < MAX_N_COEF; k++)
                             dg_Dens(st)[k] *= 0.5;
                     break;
                     case 1:
                         dg_Mom(st)[0][0] = Mom(st)[0]*(1.0-0.5) + dg_Mom(st)[0][0]*0.5;
                         for(k = 1; k < MAX_N_COEF; k++)
                             dg_Mom(st)[0][k] *= 0.5;
                     break;
                     case 2:
                         dg_Mom(st)[1][0] = Mom(st)[1]*(1.0-0.5) + dg_Mom(st)[1][0]*0.5;
                         for(k = 1; k < MAX_N_COEF; k++)
                             dg_Mom(st)[1][k] *= 0.5;
                     break;
                     case 3:
                         dg_Energy(st)[0] = Energy(st)*(1.0-0.5) + dg_Energy(st)[0]*0.5;
                         for(k = 1; k < MAX_N_COEF; k++)
                             dg_Energy(st)[k] *= 0.5;
                     break;
                     }
                 }
                 loop++;
                 if(loop == 30)
                 {
                     printf("ERROR: UNphysical_st_at_quadrature in p1_fix_overshoot\n");
                     clean_up(ERROR);
                 }
             }

             if(debug_flag == YES)
             {
                 printf("TRI[%d] is ****not**** overshoot, loop to reduce unphysical = %d\n", tri->id, loop);
                 printf("avg_min[%g %g %g %g], avg_max[%g %g %g %g]\n",
                      st_min[0], st_min[1], st_min[2], st_min[3],
                      st_max[0], st_max[1], st_max[2], st_max[3]);
                 verbose_print_state("HR state",st);
                 printf("display states at edge after projection\n");
                 show_states_at_edge_quadrature(tri,st);
             }
             return;
         }
         // TMP
         if(debug_flag == YES)
         {
             printf("TRI[%d] is overshoot\n", tri->id);
             printf("avg_min[%g %g %g %g], avg_max[%g %g %g %g]\n",
                      st_min[0], st_min[1], st_min[2], st_min[3],
                      st_max[0], st_max[1], st_max[2], st_max[3]);
             verbose_print_state("HR state",st);

             // verbose_print_state("scaled state",st);
             printf("display states at edge after projection\n");
             show_states_at_edge_quadrature(tri,st);
             printf("\n");
         }

         u_avg[0] = Dens(st);
         u_avg[1] = fabs(Mom(st)[0]);
         u_avg[2] = fabs(Mom(st)[1]);
         u_avg[3] = Energy(st);

         for(i = 0; i < N_EQN; i++)
         {
             a_max[i] = a_min[i] = HUGE_VAL;
             if(imax_side[i] != -1)
             {
                 a_max[i] = (st_max[i] - u_avg[i])/(umax[i]-u_avg[i]);
             }
             if(imin_side[i] != -1)
             {
                 a_min[i] = (u_avg[i] - st_min[i])/(u_avg[i] - umin[i]);
             }
             a_max[i] = min(a_max[i], a_min[i]);
         }
         // TMP
         if(debug_flag == YES)
         {
             verbose_print_state("projection state",st);
             for(i = 0; i < N_EQN; i++)
                 printf("scale of eqn[%d] = %g, max_from_side[%d]pt[%d],"
                   " min_from_side[%d]pt[%d]\n",
                      i, a_max[i], imax_side[i], imax[i], imin_side[i], imin[i]);
             printf("state at edge quadratures after projection\n");
             show_states_at_edge_quadrature(tri,st);
             printf("\n");
         }

         for(i = 0; i < N_EQN; i++)
         {
             if(imax_side[i] != -1 || imin_side[i] != -1)
             {
                 switch(i)
                 {
                 case 0:
                     dg_Dens(st)[0] = Dens(st)*(1.0-a_max[i]) + dg_Dens(st)[0]*a_max[i];
                     for(k = 1; k < MAX_N_COEF; k++)
                         dg_Dens(st)[k] *= a_max[i];
                 break;
                 case 1:
                     dg_Mom(st)[0][0] = Mom(st)[0]*(1.0-a_max[i]) + dg_Mom(st)[0][0]*a_max[i];
                     for(k = 1; k < MAX_N_COEF; k++)
                         dg_Mom(st)[0][k] *= a_max[i];
                 break;
                 case 2:
                     dg_Mom(st)[1][0] = Mom(st)[1]*(1.0-a_max[i]) + dg_Mom(st)[1][0]*a_max[i];
                     for(k = 1; k < MAX_N_COEF; k++)
                         dg_Mom(st)[1][k] *= a_max[i];
                 break;
                 case 3:
                     dg_Energy(st)[0] = Energy(st)*(1.0-a_max[i]) + dg_Energy(st)[0]*a_max[i];
                     for(k = 1; k < MAX_N_COEF; k++)
                         dg_Energy(st)[k] *= a_max[i];
                 break;
                 }
             }
         }

         loop = 0;
         while(unphysical_st_at_quadrature(tri, st) == YES)
         {
             for(i = 0; i < N_EQN; i++)
             {
                 switch(i)
                 {
                 case 0:
                     dg_Dens(st)[0] = Dens(st)*(1.0-0.5) + dg_Dens(st)[0]*0.5;
                     for(k = 1; k < MAX_N_COEF; k++)
                         dg_Dens(st)[k] *= 0.5;
                 break;
                 case 1:
                     dg_Mom(st)[0][0] = Mom(st)[0]*(1.0-0.5) + dg_Mom(st)[0][0]*0.5;
                     for(k = 1; k < MAX_N_COEF; k++)
                         dg_Mom(st)[0][k] *= 0.5;
                 break;
                 case 2:
                     dg_Mom(st)[1][0] = Mom(st)[1]*(1.0-0.5) + dg_Mom(st)[1][0]*0.5;
                     for(k = 1; k < MAX_N_COEF; k++)
                         dg_Mom(st)[1][k] *= 0.5;
                 break;
                 case 3:
                     dg_Energy(st)[0] = Energy(st)*(1.0-0.5) + dg_Energy(st)[0]*0.5;
                     for(k = 1; k < MAX_N_COEF; k++)
                         dg_Energy(st)[k] *= 0.5;
                 break;
                 }
             }
             loop++;
             if(loop == 30)
             {
                 printf("ERROR: UNphysical_st_at_quadrature in fix_overshoot 2, loop = %d\n", loop);
                 printf("TRI[%d] enter fix_overshoot\n", tri->id);
                 verbose_print_state("scaled state",st);
                 show_states_at_edge_quadrature(tri,st);
                 clean_up(ERROR);
             }
         }
         // TMP
         if(debug_flag == YES)
         {
             printf("\n Looped %d times to fix unphysical states\n", loop);
             verbose_print_state("scaled state",st);
             printf("state at edge quadratures after scaling\n");
             show_states_at_edge_quadrature(tri,st);
         }
}

LOCAL void fix_unphysical_st(
         TRI       *tri,
         Mid_soln  *midsoln,
         int       rk_iter, // iteration always use 0
         Front     *fr)
{
         int    imax[4], imin[4], imax_side[4], imin_side[4];
         float  umin[4], umax[4];
         int    side, i, k;
         TRI    *nbtri[3];
         float  a_max[4], a_min[4], u_avg[4];
         float  st_min[4], st_max[4];
         Locstate st;
         int    debug_flag = NO, loop;

         // TMP
         /**
         if(tri->id == 187)
         {
             printf("\n\nTRI[%d] enter fix_unphysical_st\n", tri->id);
             debug_flag = YES;
         }
         **/

         if(rk_iter == RK_STEP)
             st = tri->st;
         else
             st = midsoln[tri->id].st[rk_iter];
         // TMP
         if(debug_flag == YES)
         {
             printf("TRI[%d] is overshoot\n", tri->id);
             printf("avg_min[%g %g %g %g], avg_max[%g %g %g %g]\n",
                      st_min[0], st_min[1], st_min[2], st_min[3],
                      st_max[0], st_max[1], st_max[2], st_max[3]);
             verbose_print_state("HR state",st);

             // verbose_print_state("scaled state",st);
             printf("display states at edge after projection\n");
             show_states_at_edge_quadrature(tri,st);
             printf("\n");
         }
         loop = 0;
         while(unphysical_st_at_quadrature(tri, st) == YES)
         {
             for(i = 0; i < N_EQN; i++)
             {
                 switch(i)
                 {
                 case 0:
                     dg_Dens(st)[0] = Dens(st)*(1.0-0.5) + dg_Dens(st)[0]*0.5;
                     for(k = 1; k < MAX_N_COEF; k++)
                         dg_Dens(st)[k] *= 0.5;
                 break;
                 case 1:
                     dg_Mom(st)[0][0] = Mom(st)[0]*(1.0-0.5) + dg_Mom(st)[0][0]*0.5;
                     for(k = 1; k < MAX_N_COEF; k++)
                         dg_Mom(st)[0][k] *= 0.5;
                 break;
                 case 2:
                     dg_Mom(st)[1][0] = Mom(st)[1]*(1.0-0.5) + dg_Mom(st)[1][0]*0.5;
                     for(k = 1; k < MAX_N_COEF; k++)
                         dg_Mom(st)[1][k] *= 0.5;
                 break;
                 case 3:
                     dg_Energy(st)[0] = Energy(st)*(1.0-0.5) + dg_Energy(st)[0]*0.5;
                     for(k = 1; k < MAX_N_COEF; k++)
                         dg_Energy(st)[k] *= 0.5;
                 break;
                 }
             }
             loop++;
             if(loop == 30)
             {
                 printf("ERROR: UNphysical_st_at_quadrature, loop = %d\n", loop);
                 printf("TRI[%d] enter fix_unphysical_st\n", tri->id);
                 verbose_print_state("scaled state",st);
                 show_states_at_edge_quadrature(tri,st);
                 clean_up(ERROR);
             }
         }

         // TMP
         if(debug_flag == YES)
         {
             printf("\n Looped %d times to fix unphysical states\n", loop);
             verbose_print_state("scaled state",st);
             printf("state at edge quadratures after scaling\n");
             show_states_at_edge_quadrature(tri,st);
         }
}

LOCAL void show_states_at_edge_quadrature(
        TRI       *tri,
        Locstate  st)
{
        int       k, i, side, dim =2;
        POINT   *p[3];
        float   *pcrds[4], qcrds[4];
        float   nor[3], t[3], length, con_u[4];
        double   *cent = fg_centroid(tri);
        static  Locstate Tst = NULL;

        if(Tst == NULL)
        {
            (*Params(st)->_alloc_state)(&Tst,Params(st)->sizest);
            assign(Tst, st, Params(st)->sizest);
        }

        for(i = 0; i < 3; i++)
        {
            p[i] = Point_of_tri(tri)[i];
            pcrds[i] = Coords(p[i]);
        }

        for(side = 0; side < 3; side++)
        {
            for(k = 0; k < Gauss_N; k++)
            {
                for(i = 0; i < dim; i++)
                    qcrds[i] = (pcrds[(side+1)%3][i] + pcrds[side][i])/2.0 +
                           (pcrds[(side+1)%3][i] - pcrds[side][i])/2.0*q[k];

                con_u_at_pt(st, qcrds, cent, con_u);
                // con_u_to_state(con_u, dim, Tst);
                printf("side[%d], pt[%g %g] state[%g %g %g %g]\n",
                      side, qcrds[0], qcrds[1], con_u[0],con_u[1], con_u[2], con_u[3]);
            }
        }
}

// include edge quadrature and vertices
LOCAL int unphysical_st_at_quadrature(
        TRI       *tri,
        Locstate  st)
{
        POINT        *p[3];
        int          i, k, side, dim =2;
        float        *pcrds[3], con_u[4], qcrds[4];
        double  *cent = fg_centroid(tri);
        static   Locstate Tst = NULL;

        if(Tst == NULL)
        {
            (*Params(st)->_alloc_state)(&Tst,Params(st)->sizest);
            assign(Tst, st, Params(st)->sizest);
        }

        if(N_EQN == 1)
            return NO;

        for(i = 0; i < 3; i++)
        {
            p[i] = Point_of_tri(tri)[i];
            pcrds[i] = Coords(p[i]);
        }

        for(i = 0; i < 3; i++)
        {
            con_u_at_pt(st, pcrds[i], cent, con_u);
            con_u_to_state(con_u, dim, Tst);
            if(invalid_state("unphysical_state",Tst,NO))
                return YES;
        }
        for(side = 0; side < 3; side++)
        {
            for(k = 0; k < Gauss_N; k++)
            {
                for(i = 0; i < dim; i++)
                    qcrds[i] = (pcrds[(side+1)%3][i] + pcrds[side][i])/2.0 +
                           (pcrds[(side+1)%3][i] - pcrds[side][i])/2.0*q[k];
                con_u_at_pt(st, qcrds, cent, con_u);
                con_u_to_state(con_u, dim, Tst);
                if(invalid_state("unphysical_state",Tst,NO))
                    return YES;
            }
        }
        return NO;
}

LOCAL int Shu_V_smooth_dect(
         TRI       *tri,
         Mid_soln  *midsoln,
         int       rk_iter,
         float     *uave,
         float     nbuave[3][4],
         int       *is_bad)
{
         int    debug_flag = NO, loop;
         int     i, side, dim =2, k, j, indx;
         float   con_u[4];
         double   *cent = fg_centroid(tri), *nbcent[3];
         Locstate      st, nbst[3];
         TRI           *nbtri[3];
         float         dir[3][MAXD], len;
         float    mid[3][2]; //midpt on side
         // float   du[3][4]; // jumps of  linear polynomial at edge midpts.[midpt][comp_of_soln]
         // float   ch_du[3][4], t[2], ch_slp[3][4];
         float   mu = 1.2;
         float   coef[3][2];
         float   slp[3][4], limt_slp[3][4]; // midpt, four comp of soln
         float   pos, neg, theta_p,  theta_n;
         // float   rside[3], A[2][2], rside2[3];

         for(side = 0; side < 3; side++)
             nbtri[side] = Tri_on_side(tri,side);

         for(i = 0; i < N_EQN; i++)
             is_bad[i] = NO;

         for(i = 0; i < 3; i++)
         {
             mid[i][0] = (Coords(Point_of_tri(tri)[i])[0] + Coords(Point_of_tri(tri)[(i+1)%3])[0])*0.5;
             mid[i][1] = (Coords(Point_of_tri(tri)[i])[1] + Coords(Point_of_tri(tri)[(i+1)%3])[1])*0.5;
         }
         for(i = 0; i < 3; i++)
             nbcent[i] = fg_centroid(nbtri[i]);

         /**
         for(i = 0; i < 3; i++)
         {
             dir[i][0] = mid[i][0] - cent[0];
             dir[i][1] = mid[i][1] - cent[1];
             len = sqrt(sqr(dir[i][0]) + sqr(dir[i][1]));
             for(j = 0; j < 2; j++)
                 dir[i][j] = dir[i][j]/len;
         }
         **/

         // compute alphas for midpt[0]
         //choice, (tri, nb0, nb1)
         compute_tri_geom(mid[0], cent, nbcent[0], nbcent[1], coef[0]);
         for(k = 0; k < N_EQN; k++)
             slp[0][k] = coef[0][0]*(nbuave[0][k]-uave[k]) + coef[0][1]*(nbuave[1][k]-uave[k]);

         // compute alphas for midpt[1],
         // choice, (tri, nb1, nb2)
         compute_tri_geom(mid[1], cent, nbcent[1], nbcent[2], coef[0]);
         for(k = 0; k < N_EQN; k++)
             slp[1][k] = coef[0][0]*(nbuave[1][k]-uave[k]) + coef[0][1]*(nbuave[2][k]-uave[k]);

         // compute alphas for midpt[2],
         // choice, (tri, nb2, nb0)
         compute_tri_geom(mid[2], cent, nbcent[2], nbcent[0], coef[0]);
         for(k = 0; k < N_EQN; k++)
             slp[2][k] = coef[0][0]*(nbuave[2][k]-uave[k]) + coef[0][1]*(nbuave[0][k]-uave[k]);

         // Check if the slopes can be reconstructed in
         // terms of the jumps at the edges. If no, we conclude that the stencil is bad.
         // i.e. We can not construct slopes from cell averages

         for(k = 0; k < N_EQN; k++)
         {
             for(i = 0; i < 3; i++)
             {
                 if(fabs(slp[i][k]) < MACH_EPS)
                     slp[i][k] = 0.0;
             }
             if(fabs(slp[0][k] + slp[1][k] + slp[2][k]) > MACH_EPS)
             {
                 pos = neg = 0.0;
                 for(i = 0; i < 3; i++)
                 {
                     pos += max(0.0, slp[i][k]);
                     neg += max(0.0, -slp[i][k]);
                 }

                 if(fabs(pos) < MACH_EPS)
                     theta_p = 1.0;
                 else
                     theta_p = min(1.0, neg/pos);
                 if(fabs(neg) < MACH_EPS)
                     theta_n = 1.0;
                 else
                     theta_n = min(1.0, pos/neg);
                 for(i = 0; i < 3; i++)
                 {
                     slp[i][k] = theta_p*max(0.0, slp[i][k]) - theta_n*max(0.0, -slp[i][k]);
                 }
             }
             /**
             else
             {
                 for(i = 0; i < 3; i++)
                 {
                     if(fabs(slp[i][k]) < MACH_EPS)
                         slp[i][k] = 0.0;
                 }
             }
             **/
             if(slp[0][k] == 0.0 && slp[1][k] == 0.0 && slp[2][k] == 0.0)
                 is_bad[k] = YES;
         }
}

LOCAL void extrema_detec(
         float     *uave,
         float     nbuave[3][4],
         int       *is_bad)
{
         int      i, j;
         float    umax[N_EQN], umin[N_EQN];

         for(i = 0; i < N_EQN; i++)
         {
             umax[i] = umin[i] = uave[i];
             is_bad[i] = NO;
         }

         for(j = 0; j < 3; j++)
         {
             for(i = 0; i < N_EQN; i++)
             {
                 if(nbuave[j][i] > umax[i])
                     umax[i] = nbuave[j][i];
                 if(nbuave[j][i] < umin[i])
                     umin[i] = nbuave[j][i];
             }
         }

         for(i = 0; i < N_EQN; i++)
         {
             if(uave[i] >= umax[i] || uave[i] <= umin[i])
                 is_bad[i] = YES;
         }
}

// use Shu-V paper stencil to compute
LOCAL void Shu_V_sten_coeff(
         TRI       *tri,
         Mid_soln  *midsoln,
         int       rk_iter,
         float     *uave,
         float     nbuave[3][4],
         float     *ux,
         float     *uy)
{
         int     i, side, dim =2, k, j, indx;
         float   con_u[4];
         double   *cent = fg_centroid(tri), *nbcent[3];
         Locstate      st, nbst[3];
         TRI           *nbtri[3];
         float         dir[3][MAXD], len;
         float    mid[3][2]; //midpt on side
         static float  **L[3] = {NULL, NULL, NULL}, **R[3];
         float   du[3][4]; 
         float   ch_du[3][4], t[2], ch_slp[3][4];
         float   mu = 1.0; // 1.2
         float   coef[3][2];
         float   slp[3][4], limt_slp[3][4]; // midpt, four comp of soln
         float   pos, neg, theta_p,  theta_n;
         float   rside[3], A[2][2], rside2[3];

         if(L[0] == NULL)
         {
             for(i = 0; i < 3; i++)
             {
                 matrix(&L[i], 4, 4, sizeof(float));
                 matrix(&R[i], 4, 4, sizeof(float));
             }
         }

         for(side = 0; side < 3; side++)
             nbtri[side] = Tri_on_side(tri,side);

         if(rk_iter == RK_STEP)
             st = tri->st;
         else
             st = midsoln[tri->id].st[rk_iter];

         for(i = 0; i < 3; i++)
         {
             mid[i][0] = (Coords(Point_of_tri(tri)[i])[0] + Coords(Point_of_tri(tri)[(i+1)%3])[0])*0.5;
             mid[i][1] = (Coords(Point_of_tri(tri)[i])[1] + Coords(Point_of_tri(tri)[(i+1)%3])[1])*0.5;
         }
         for(i = 0; i < 3; i++)
             nbcent[i] = fg_centroid(nbtri[i]);
         for(i = 0; i < 3; i++)
         {
             dir[i][0] = mid[i][0] - cent[0];
             dir[i][1] = mid[i][1] - cent[1];
             len = sqrt(sqr(dir[i][0]) + sqr(dir[i][1]));
             for(j = 0; j < 2; j++)
                 dir[i][j] = dir[i][j]/len;

             if(N_EQN != 1)
                 LR_matrix_in_dir(dir[i], st, L[i], R[i]);
         }

         // compute alphas for midpt[0]
         //choice, (tri, nb0, nb1)
         compute_tri_geom(mid[0], cent, nbcent[0], nbcent[1], coef[0]);
         for(k = 0; k < N_EQN; k++)
             slp[0][k] = coef[0][0]*(nbuave[0][k]-uave[k]) + coef[0][1]*(nbuave[1][k]-uave[k]);

         // compute alphas for midpt[1],
         // choice, (tri, nb1, nb2)
         compute_tri_geom(mid[1], cent, nbcent[1], nbcent[2], coef[0]);
         for(k = 0; k < N_EQN; k++)
             slp[1][k] = coef[0][0]*(nbuave[1][k]-uave[k]) + coef[0][1]*(nbuave[2][k]-uave[k]);

         // compute alphas for midpt[2],
         // choice, (tri, nb2, nb0)
         compute_tri_geom(mid[2], cent, nbcent[2], nbcent[0], coef[0]);
         for(k = 0; k < N_EQN; k++)
             slp[2][k] = coef[0][0]*(nbuave[2][k]-uave[k]) + coef[0][1]*(nbuave[0][k]-uave[k]);
         // limit slopes at 3 midpts
         for(i = 0; i < 3; i++)
         {
             if(N_EQN != 1)
                 d_matrix_vec_mult(L[i], slp[i], 4, 4, ch_slp[i]);
             else
             {
                 for(k = 0; k < N_EQN; k++)
                     ch_slp[i][k] = slp[i][k]; 
             }

             // TVB limiter version
             for(k = 0; k < N_EQN; k++)
                 ch_slp[i][k] = mu*ch_slp[i][k];
             if(N_EQN != 1)
                 d_matrix_vec_mult(R[i], ch_slp[i], 4, 4, limt_slp[i]);
             else
             {
                 for(k = 0; k < N_EQN; k++)
                     limt_slp[i][k] = ch_slp[i][k]; 
             }
         }

         // if the sum of the slope != 0.0
         // limiting the slopes
         for(k = 0; k < N_EQN; k++)
         {
             if(fabs(limt_slp[0][k] + limt_slp[1][k] + limt_slp[2][k]) > MACH_EPS)
             {
                 pos = neg = 0.0;
                 for(i = 0; i < 3; i++)
                 {
                     pos += max(0.0, limt_slp[i][k]);
                     neg += max(0.0, -limt_slp[i][k]);
                 }

                 if(fabs(pos) < MACH_EPS)
                     theta_p = 1.0;
                 else
                     theta_p = min(1.0, neg/pos);
                 if(fabs(neg) < MACH_EPS)
                     theta_n = 1.0;
                 else
                     theta_n = min(1.0, pos/neg);
                 for(i = 0; i < 3; i++)
                 {
                     limt_slp[i][k] = theta_p*max(0.0, limt_slp[i][k]) - theta_n*max(0.0, -limt_slp[i][k]);
                 }
             }
         }
         // find soln of u_x(x_mid - x_i) + u_y(y_mid-y_i) = du
         //  by solving linear eqn
         //  Use (tri, nb0, nb1). (tri, nb0, nb2) not used
         A[0][0] = (mid[0][0]-cent[0]);
         A[0][1] = (mid[0][1]-cent[1]);

         A[1][0] = (mid[1][0]-cent[0]);
         A[1][1] = (mid[1][1]-cent[1]);
         for(k = 0; k < N_EQN; k++)
         {
             rside[0] = limt_slp[0][k];
             rside[1] = limt_slp[1][k];

             comp_coef(A,rside,coef[0]);

             ux[k] = coef[0][0];
             uy[k] = coef[0][1];
         }
}

LOCAL void limit_by_edge_cent_val(
        float    coef[][2],
        float    A[][2],
        float    *ans)
{
        int      i, j;
        float    jump[3][3], lmt_jump[3]; // edge, values
        float    pos, neg, theta_p,  theta_n;
        float    rside[2];

        for(i = 0; i < 3; i++) // edge
        {
            for(j = 0; j < 3; j++) // polynomial candidate
            {
                jump[i][j] = coef[j][0]*A[i][0] + coef[j][1]*A[i][1];
            }
        }

        for(i = 0; i < 3; i++)
            lmt_jump[i] = minmod2(minmod2(jump[i][0], jump[i][1]), jump[i][2]);
        if(fabs(lmt_jump[0] + lmt_jump[1] + lmt_jump[2]) > MACH_EPS)
        {
             pos = neg = 0.0;
             for(i = 0; i < 3; i++) 
             {
                 pos += max(0.0, lmt_jump[i]);
                 neg += max(0.0, -lmt_jump[i]);
             }

             if(fabs(pos) < MACH_EPS)
                 theta_p = 1.0;
             else
                 theta_p = min(1.0, neg/pos);
             if(fabs(neg) < MACH_EPS)
                 theta_n = 1.0;
             else
                 theta_n = min(1.0, pos/neg);
             for(i = 0; i < 3; i++)
             {
                 lmt_jump[i] = theta_p*max(0.0, lmt_jump[i]) - theta_n*max(0.0, -lmt_jump[i]);
             }
        }
        // find soln of u_0(x_mid - x_i) + u_1(y_mid-y_i) = du
        rside[0] = lmt_jump[0];
        rside[1] = lmt_jump[1];

        comp_coef(A,rside,ans);
}


LOCAL void least_sqr(
        float    *rhs,
        float    A[][2],
        float    *ans)
{
        static double **mat = NULL, **mat_tran, **AA, **inv;
        int i;
        double rside2[3];

        if(mat == NULL)
        {
            matrix(&(AA), 3, 3, sizeof(double));
            matrix(&(inv), 3, 3, sizeof(double));
            matrix(&(mat_tran), 3, 3, sizeof(double));
            matrix(&(mat), 3, 3, sizeof(double));
        }

        for(i = 0; i < 3; i++)
        {
            mat[i][0] = A[i][0];
            mat[i][1] = A[i][1];
        }
        d_trans_matrix(mat, 3, 2, mat_tran);
        d_matrix_matrix_mult(mat_tran, mat, 3, 3, AA);
        d_inverse_2_2_matrix(AA,2,inv);
        d_matrix_vec_mult(mat_tran, rhs, 2, 3, rside2);
        d_matrix_vec_mult(inv, rside2, 2, 2, ans);
}

LOCAL void weno_weight_ls(
        float    coef[][2],
        float    A[][2],
        float    *rhs,
        float    A_edge[][2],
        int      degree,
        float    *ans)
{
        float    ls_ans[2], gamma[3];
        float    jump[3][3], ls_jump[3];
        int      i, j; 

        least_sqr(rhs, A, ls_ans); 
        ans[0] = ls_ans[0]; 
        ans[1] = ls_ans[1]; 

        for(i = 0; i < 3; i++) // edge
        {
            for(j = 0; j < 3; j++) // polynomial candidate
            {
                jump[i][j] = coef[j][0]*A[i][0] + coef[j][1]*A[i][1];
            }
            ls_jump[i] = ans[0]*A[i][0] + ans[1]*A[i][1];
        }

        comp_coef_3eqns(jump, ls_jump, gamma);
 
        /**
        if((gamma[0] < 0.0 && gamma[1] < 0.0 && gamma[2] < 0.0))
        {
            printf("ERROR: gamma < 0.0 [%g %g %g]\n", gamma[0], gamma[1], gamma[2]);
            printf("edge[0]_jump[%g %g %g]\n", jump[0][0], jump[0][1], jump[0][2]);
            printf("edge[1]_jump[%g %g %g]\n", jump[1][0], jump[1][1], jump[1][2]);
            printf("edge[2]_jump[%g %g %g]\n", jump[2][0], jump[2][1], jump[2][2]);
            printf("least square jump[%g %g %g]\n", ls_jump[0], ls_jump[1], ls_jump[2]);
            clean_up(ERROR);
        }
        **/
}

// compute  3 by 3 eqns linear system
// solve Ax = b, 3 by 3 system
LOCAL void comp_coef_3eqns(
        float  A[][3],
        float  *rside,
        float  *x)
{
        float tmp;
        static double **tmpa = NULL, **inv;
        int i,j;
        double vec[3], ans[3];

        if(tmpa == NULL)
        {
            matrix(&(tmpa), 3, 3, sizeof(double));
            matrix(&(inv), 3, 3, sizeof(double));
        }

        tmp =  A[0][0]*A[1][1]*A[2][2]+A[0][1]*A[1][2]*A[2][0]+A[0][2]*A[1][0]*A[2][1] - 
              (A[2][0]*A[1][1]*A[0][2]+A[2][1]*A[1][2]*A[0][0]+A[2][2]*A[1][0]*A[0][1]);

        if(fabs(tmp) < MACH_EPS*100.0)
        {
            x[0] = x[1] = x[2] = 1.0/3.0;
            return;
        }

        for(i = 0; i < 3; i++)
        {
            for(j = 0; j < 3; j++)
            {
                tmpa[i][j] = A[i][j];
            }
            vec[i] = rside[i];
        }
        
        inverse_matrix(tmpa,3,inv);
        matrix_vec_mult(inv, vec, 3, 3, ans);
        for(i = 0; i < 3; i++)
            x[i] = ans[i];
}

// Reconstruct 3rd degree coeffs. based on the 
// reconstructed polynomial.
LOCAL void redo_limiting_3rd_degreeP3(
         TRI       *tri,
         TRI       *nbtri[3],  
         Mid_soln  *midsoln,
         int       rk_iter)
{
         Locstate st, nbst[3], st2;
         float    uxxave[4], nbuxxave[3][4];
         float    uxyave[4], nbuxyave[3][4];
         float    uyyave[4], nbuyyave[3][4];
         int      i, dim = 2, k;
         double   *cent, *nbcent[3];
         float    rside[3], Axx[3][2][2], Axy[3][2][2], Ayy[3][2][2],
                     rside2[3];
         float    coefxx[6][2], coefxy[6][2], coefyy[6][2];
         float    u6, u7, u8, u9, u7_0, u7_1, u8_0, u8_1,
                   avg3, avg4, avg5, arrya[9], arryb[9], w[9];
         double **Lmass_matrix = tri->Lmass_matrix;
         float    eps = 0.0;
         float    dirx[3], diry[3], theta[3]; // dirx, diry: cos of the angle
         float    ud[3][2]; // soln uxx, uxy, uyy computed using one line
         int      idirx, idiry;
         float    c_num_xx[6], c_num_xy[6], c_num_yy[6]; // condition number of stencils
         float    diam, sqr_diam, sqr_sqr_diam, sv_coef[6];
         int      is_bad_stenxx[4], is_bad_stenxy[4], is_bad_stenyy[4];
         float    A_edgexx[3][2], A_edgexy[3][2], A_edgeyy[3][2], ls_soln1[3], ls_Axx[3][2],
                  ls_Axy[3][2], ls_Ayy[3][2], mid[3][2];
         float    old_u[N_EQN][7], old_total[N_EQN], new_total[N_EQN];
         float      area;
         int      debug = NO;


         st2 = st = midsoln[tri->id].st[0];
         for(i = 0; i < 3; i++)
             nbst[i] = midsoln[nbtri[i]->id].st[0];

         degree3_term_Int(st2,Lmass_matrix, old_total);

         if(tri->id == 1088)
         {
             debug = YES;
             printf("Entered redo_limiting_3rd_degreeP3\n");
             printf("OLd total[%g %g %g %g]\n", old_total[0], old_total[1], old_total[2], old_total[3]); 
             verbose_print_state("before redo", st2);
         }

         /**
         for(k = 0; k < N_EQN; k++)
         {
             switch(k)
             {
             case 0:
                 old_u[k][0] = dg_Dens(st2)[6];
                 old_u[k][1] = dg_Dens(st2)[7];
                 old_u[k][2] = dg_Dens(st2)[8];
                 old_u[k][3] = dg_Dens(st2)[9];
             break;
             case 1:
                 old_u[k][0] = dg_Mom(st2)[0][6];
                 old_u[k][1] = dg_Mom(st2)[0][7];
                 old_u[k][2] = dg_Mom(st2)[0][8];
                 old_u[k][3] = dg_Mom(st2)[0][9];
             break;
             case 2:
                 old_u[k][0] = dg_Mom(st2)[1][6];
                 old_u[k][1] = dg_Mom(st2)[1][7];
                 old_u[k][2] = dg_Mom(st2)[1][8];
                 old_u[k][3] = dg_Mom(st2)[1][9];
             break;
             case 3:
                 old_u[k][0] = dg_Energy(st2)[6];
                 old_u[k][1] = dg_Energy(st2)[7];
                 old_u[k][2] = dg_Energy(st2)[8];
                 old_u[k][3] = dg_Energy(st2)[9];
             break;
             }
         }
         **/

         diam = fg_diam(tri);
         sqr_diam = sqr(diam);
         sqr_sqr_diam = sqr(sqr_diam);

         cent = fg_centroid(tri);
         for(i = 0; i < 3; i++)
             nbcent[i] = fg_centroid(nbtri[i]);

         u_average_indx(tri,st,3,uxxave);
         u_average_indx(tri,st,4,uxyave);
         u_average_indx(tri,st,5,uyyave);
         for(k = 0; k < N_EQN; k++)
         {
             uxxave[k] *= 2.0;
             uyyave[k] *= 2.0;
         }

         for(i = 0; i < 3; i++)
         {
             u_average_indx(nbtri[i],nbst[i],3,nbuxxave[i]);
             u_average_indx(nbtri[i],nbst[i],4,nbuxyave[i]);
             u_average_indx(nbtri[i],nbst[i],5,nbuyyave[i]);
             for(k = 0; k < N_EQN; k++)
             {
                 nbuxxave[i][k] *= 2.0;
                 nbuyyave[i][k] *= 2.0;
             }
         }

         extrema_detec(uxxave,nbuxxave,is_bad_stenxx);
         extrema_detec(uxyave,nbuxyave,is_bad_stenxy);
         extrema_detec(uyyave,nbuyyave,is_bad_stenyy);

         for(i = 0; i < 3; i++)
         {
             Axx[i][0][0] = 6.0*(nbcent[i][0]-cent[0]);
             Axx[i][0][1] = 2.0*(nbcent[i][1]-cent[1]);
             Axx[i][1][0] = 6.0*(nbcent[(i+1)%3][0]-cent[0]);
             Axx[i][1][1] = 2.0*(nbcent[(i+1)%3][1]-cent[1]);
             // ls_Axx[i][0] = 6.0*(nbcent[i][0]-cent[0]);
             // ls_Axx[i][1] = 2.0*(nbcent[i][1]-cent[1]);
             c_num_xx[i] = cond_num(Axx[i]);

             Axy[i][0][0] = 2.0*(nbcent[i][0]-cent[0]);
             Axy[i][0][1] = 2.0*(nbcent[i][1]-cent[1]);
             Axy[i][1][0] = 2.0*(nbcent[(i+1)%3][0]-cent[0]);
             Axy[i][1][1] = 2.0*(nbcent[(i+1)%3][1]-cent[1]);
             // ls_Axy[i][0] = 2.0*(nbcent[i][0]-cent[0]);
             // ls_Axy[i][1] = 2.0*(nbcent[i][1]-cent[1]);
             c_num_xy[i] = cond_num(Axy[i]);

             Ayy[i][0][0] = 2.0*(nbcent[i][0]-cent[0]);
             Ayy[i][0][1] = 6.0*(nbcent[i][1]-cent[1]);
             Ayy[i][1][0] = 2.0*(nbcent[(i+1)%3][0]-cent[0]);
             Ayy[i][1][1] = 6.0*(nbcent[(i+1)%3][1]-cent[1]);
             // ls_Ayy[i][0] = 2.0*(nbcent[i][0]-cent[0]);
             // ls_Ayy[i][1] = 6.0*(nbcent[i][1]-cent[1]);
             c_num_yy[i] = cond_num(Ayy[i]);
         }

         for(k = 0; k < N_EQN; k++)
         {
             // tri, nb0, nb1 // tri, nb1, nb2 // tri, nb2, nb0
             for(i = 0; i < 3; i++)
             {
                 rside[0] = nbuxxave[i][k] - uxxave[k];
                 rside[1] = nbuxxave[(i+1)%3][k] - uxxave[k];
                 comp_coef(Axx[i],rside,coefxx[i]);

                 rside[0] = nbuxyave[i][k] - uxyave[k];
                 rside[1] = nbuxyave[(i+1)%3][k] - uxyave[k];
                 comp_coef(Axy[i],rside,coefxy[i]);

                 rside[0] = nbuyyave[i][k] - uyyave[k];
                 rside[1] = nbuyyave[(i+1)%3][k] - uyyave[k];
                 comp_coef(Ayy[i],rside,coefyy[i]);
             }
             /////////////////// WENO
             ///// u_xx polynomial
             for(i = 0; i < 3; i++)
             {
                 arrya[i] = coefxx[i][0];
                 arryb[i] = coefxx[i][1];
             }
             // WENO_mod_on_3rd(arrya,arryb,c_num_xx,3,sqr_sqr_diam,w);
             WENO_mod_on_3rd(arrya,arryb,c_num_xx,3,sqr_diam,w);
             u6   = w[0]*coefxx[0][0] + w[1]*coefxx[1][0] + w[2]*coefxx[2][0];
             u7_0 = w[0]*coefxx[0][1] + w[1]*coefxx[1][1] + w[2]*coefxx[2][1];

             if(is_bad_stenxx[k] == YES)
             {
                 u6 = 0.0; u7_0 = 0.0;
             }

             ///// u_xy polynomial
             for(i = 0; i < 3; i++)
             {
                 arrya[i] = coefxy[i][0];
                 arryb[i] = coefxy[i][1];
             }
             // WENO_mod_on_3rd(arrya,arryb,c_num_xy,3,sqr_sqr_diam,w);
             WENO_mod_on_3rd(arrya,arryb,c_num_xy,3,sqr_diam,w);
             u7_1 = w[0]*coefxy[0][0] + w[1]*coefxy[1][0] + w[2]*coefxy[2][0];
             u8_0 = w[0]*coefxy[0][1] + w[1]*coefxy[1][1] + w[2]*coefxy[2][1];

             if(is_bad_stenxy[k] == YES)
             {
                 u7_1 = 0.0; u8_0 = 0.0;
             }
             ///// u_yy polynomial
             for(i = 0; i < 3; i++)
             {
                 arrya[i] = coefyy[i][0];
                 arryb[i] = coefyy[i][1];
             }
             // WENO_mod_on_3rd(arrya,arryb,c_num_yy,3,sqr_sqr_diam,w);
             WENO_mod(arrya,arryb,c_num_yy,3,sqr_diam,w);
             // WENO_mod_cand1_P3(arrya,c_num_yy,3,sqr_sqr_diam,w);
             u8_1 = w[0]*coefyy[0][0] + w[1]*coefyy[1][0] + w[2]*coefyy[2][0];
             // WENO_mod_cand1_P3(arryb,c_num_yy,3,sqr_sqr_diam,w);
             u9   = w[0]*coefyy[0][1] + w[1]*coefyy[1][1] + w[2]*coefyy[2][1];

             if(is_bad_stenyy[k] == YES)
             {
                 u8_1 = 0.0; u9 = 0.0;
             }

             u7 = minmod2((1+0.05)*minmod2(u7_0,u7_1), 0.5*(u7_0 + u7_1)); // eps = 0.05, 0.01
             u8 = minmod2((1+0.05)*minmod2(u8_0,u8_1), 0.5*(u8_0 + u8_1));
             /////////////////// End WENO
             switch(k)
             {
             case 0:
                 dg_Dens(st2)[6] = u6;
                 dg_Dens(st2)[7] = u7;
                 dg_Dens(st2)[8] = u8;
                 dg_Dens(st2)[9] = u9;
             break;
             case 1:
                 dg_Mom(st2)[0][6] = u6;
                 dg_Mom(st2)[0][7] = u7;
                 dg_Mom(st2)[0][8] = u8;
                 dg_Mom(st2)[0][9] = u9;
             break;
             case 2:
                 dg_Mom(st2)[1][6] = u6;
                 dg_Mom(st2)[1][7] = u7;
                 dg_Mom(st2)[1][8] = u8;
                 dg_Mom(st2)[1][9] = u9;
             break;
             case 3:
                 dg_Energy(st2)[6] = u6;
                 dg_Energy(st2)[7] = u7;
                 dg_Energy(st2)[8] = u8;
                 dg_Energy(st2)[9] = u9;
             break;
             }
         }
         
         degree3_term_Int(st2,Lmass_matrix,new_total);
         area = Lmass_matrix[0][0];
         for(k = 0; k < N_EQN; k++)
         {
             old_total[k] -= new_total[k];
             switch(k)
             {
             case 0:
                 dg_Dens(st2)[0] += old_total[k]/area;
             break;
             case 1:
                 dg_Mom(st2)[0][0] += old_total[k]/area;
             break;
             case 2:
                 dg_Mom(st2)[1][0] += old_total[k]/area;
             break;
             case 3:
                 dg_Energy(st2)[0] += old_total[k]/area;
             break;
             }
         }

         if(debug == YES)
         {
             printf("New total[%g %g %g %g]\n", new_total[0], new_total[1], new_total[2], new_total[3]);
             verbose_print_state("after redo", st2);
         }
}

// Integral of 3rd degree terms of polynomial
// (without differenation.)
LOCAL void degree3_term_Int(
         Locstate  st,
         double **Lmass_matrix,
         float     *ave)
{
         float      area;
         // area = Lmass_matrix[0][0];
         ave[0] =(dg_Dens(st)[6]*Lmass_matrix[0][6] +
                  dg_Dens(st)[7]*Lmass_matrix[0][7] +
                  dg_Dens(st)[8]*Lmass_matrix[0][8] +
                  dg_Dens(st)[9]*Lmass_matrix[0][9]);

         if(N_EQN == 1) return;

         ave[1] =(dg_Mom(st)[0][6]*Lmass_matrix[0][6] +
                  dg_Mom(st)[0][7]*Lmass_matrix[0][7] +
                  dg_Mom(st)[0][8]*Lmass_matrix[0][8] +
                  dg_Mom(st)[0][9]*Lmass_matrix[0][9]);

         ave[2] =(dg_Mom(st)[1][6]*Lmass_matrix[0][6] +
                  dg_Mom(st)[1][7]*Lmass_matrix[0][7] +
                  dg_Mom(st)[1][8]*Lmass_matrix[0][8] +
                  dg_Mom(st)[1][9]*Lmass_matrix[0][9]);

         ave[3] =(dg_Energy(st)[6]*Lmass_matrix[0][6] +
                  dg_Energy(st)[7]*Lmass_matrix[0][7] +
                  dg_Energy(st)[8]*Lmass_matrix[0][8] +
                  dg_Energy(st)[9]*Lmass_matrix[0][9]);
}


LOCAL   void    pp_send_interior_fields(
        int             *me,
        int             dir,
        int             side,
        float           extra_buf,
        Front           *front,
        Mid_soln *midsoln,
        int      rk_step)
{
        INTERFACE     *intfc = front->interf;
        PP_GRID       *pp_grid = front->pp_grid;
        RECT_GRID     *gr = front->rect_grid;
        float         L[MAXD], U[MAXD];
        int           i;
        int           him[MAXD];
        int           myid, dst_id, send_num;
        int           dim = gr->dim, num_tris;
        size_t        len;
        byte          *storage = NULL, *buf, *ps;
        TRI           **senttris;
        Locstate      state, st;

        if (rect_boundary_type(intfc,dir,side) == REFLECTION_BOUNDARY)
        {
            printf("ERROR: pp_send_interior_fields\n");
            printf("Implement  reflect_fields_across_domain\n");
            clean_up(ERROR);
            return;
        }
        if (rect_boundary_type(intfc,dir,side) != SUBDOMAIN_BOUNDARY)
            return;

        myid = pp_mynode();
        dst_id = neighbor_id(him,me,dir,side,pp_grid);
        if (myid == dst_id)
        {
            printf("ERROR: pp_send_interior_fields\n");
            printf("Implement  copy_fields_across_domain\n");
            clean_up(ERROR);
            return;
        }

        tmp_set_send_domain(L,U,dir,side,gr,extra_buf);

        send_num = num_tris = count_num_of_tris_inside(L,U,front,&senttris);
        scalar(&storage,(sizeof(float)*2+front->sizest)*num_tris);
        len = (sizeof(float)*2+front->sizest)*num_tris;
        buf = storage;
      
        for(i = 0; i < num_tris; i++)
        {
             if(rk_step == RK_STEP)
                 st = senttris[i]->st;
             else
                 st = midsoln[senttris[i]->id].st[rk_step];

            state = (Locstate)buf;
            assign(state,&(fg_centroid(senttris[i])[0]),sizeof(float));
            buf+= sizeof(float);

            state = (Locstate)buf; 
            assign(state,&(fg_centroid(senttris[i])[1]),sizeof(float));
            buf+= sizeof(float);

            state = (Locstate)buf;
            bundle_single_st(st,front->sizest,(byte*)state);
            // assign(state,st,front->sizest);
            buf+= front->sizest;
        }

#if defined(__MPI__)
        MPI_Send(&send_num,1,MPI_INT,dst_id,0, MPI_COMM_WORLD);
#endif // if defined(__MPI__) //

        for (ps = storage, i = 0; len >= BLOCK_SIZE;
                                len -= BLOCK_SIZE, ps += BLOCK_SIZE, ++i)
        {
            pp_send(state_id(i),(POINTER)ps,BLOCK_SIZE,dst_id);
        }
        if (len != 0)
            pp_send(state_id(i),(POINTER)ps,len,dst_id);

        // printf("me[%d] dir[%d] side[%d] send x[%g %g], y[%g %g]"
        //        " to node[%d], number_tris %d, len %d\n",  
        //         myid, dir, side, L[0], U[0], L[1], U[1], dst_id, num_tris, len);

        free(storage);
        free(senttris);
}

// TMP, copied from hscatter.
LOCAL   void    tmp_set_send_domain(
        float           *L,
        float           *U,
        int             dir,
        int             side,
        RECT_GRID       *gr,
        float           extra_buf)
{
        int             dim = gr->dim;
        int             j;

        for (j = 0; j < dir; ++j)
        {
            L[j] = gr->L[j]-extra_buf;
            U[j] = gr->U[j]+extra_buf;
        }
        if (side == 0)
        {
            L[dir] = gr->L[j];
            U[dir] = gr->L[j]+extra_buf;
        }
        else
        {
            L[dir] = gr->U[dir] - extra_buf;
            U[dir] = gr->U[dir];
        }
        for (j = dir+1; j < dim; ++j)
        {
            L[j] = gr->L[j]-extra_buf;
            U[j] = gr->U[j]+extra_buf;
        }
}               /*end set_send_domain*/


LOCAL   void    pp_receive_interior_fields(
        int             *me,
        int             dir,
        int             side,
        float           extra_buf,
        Front           *front,
        Mid_soln *midsoln,
        int      rk_step)
{
        INTERFACE     *intfc = front->interf;
        PP_GRID       *pp_grid = front->pp_grid;
        RECT_GRID     *gr = front->rect_grid;
        float         L[MAXD], U[MAXD];
        int           him[MAXD];
        int           myid, src_id;
        int           dim = gr->dim;
        int           i, num_tris, recv_num;
        size_t        len;
        byte          *storage = NULL, *buf, *ps;
        size_t        alloc_len = 0;
        TRI           **recvtris;
        Locstate      state, st;  
        static Buf_soln    *buf_soln = NULL;
        static int     buf_alloc_num = 0;
        Buf_soln       *match;
#if defined(__MPI__)
        MPI_Status     status;
#endif // if defined(__MPI__) //
        Gas_param      **prmslst;

        if (rect_boundary_type(intfc,dir,side) != SUBDOMAIN_BOUNDARY)
        {
            return;
        }

        myid = pp_mynode();
        src_id = neighbor_id(him,me,dir,side,pp_grid);
        if (myid == src_id)
        {
            return; /* Already done */
        }

        tmp_set_receive_domain(L,U,dir,side,gr,extra_buf);

        num_tris = count_num_of_tris_inside(L,U,front,&recvtris);

#if defined(__MPI__)
        MPI_Recv(&recv_num,1,MPI_INT,src_id,0,MPI_COMM_WORLD,&status);
#endif // if defined(__MPI__) //

        scalar(&storage,(sizeof(float)*2+front->sizest)*recv_num);
        len = (sizeof(float)*2+front->sizest)*recv_num;
        buf = storage;

        // printf("me[%d] dir[%d] side[%d] receive x[%g %g],"
        //       " y[%g %g] from node[%d], number_tris %d, recv %d, len = %d\n",
        //         myid, dir, side, L[0], U[0], L[1], U[1], src_id, num_tris, recv_num, len);

        for (ps = storage, i = 0; len >= BLOCK_SIZE;
                                len -= BLOCK_SIZE, ps += BLOCK_SIZE,++i)
        {
            pp_recv(state_id(i),src_id,(POINTER)ps,BLOCK_SIZE);
        }
        if (len != 0)
            pp_recv(state_id(i),src_id,(POINTER)ps,len);

        (void) return_params_list(&prmslst);

        if(buf_soln == NULL || buf_alloc_num < recv_num)
        {
            if(buf_soln != NULL)
            {
                for(i = 0; i < buf_alloc_num; i++)
                    free(buf_soln[i].st);
                free(buf_soln);
            }
            vector(&buf_soln,recv_num,sizeof(Buf_soln));
            for(i = 0; i < recv_num; i++)
                scalar(&(buf_soln[i].st), front->sizest);
            buf_alloc_num = recv_num;
        }
        for(i = 0; i < recv_num; i++)
        {
            assign(&(buf_soln[i].cent[0]), buf, sizeof(float));
            buf += sizeof(float);
            assign(&(buf_soln[i].cent[1]), buf, sizeof(float));
            buf += sizeof(float);

            assign(buf_soln[i].st, buf, front->sizest);
            Params(buf_soln[i].st) = prmslst[(size_t) Params(buf_soln[i].st)];
            buf += front->sizest;
        }

	for(i = 0; i < num_tris; i++) 
	{
            if(NULL != (match = find_match_tri(fg_centroid(recvtris[i]),buf_soln,recv_num)))
            {
                 if(rk_step == RK_STEP)
                     st = recvtris[i]->st;
                 else
                     st = midsoln[recvtris[i]->id].st[rk_step];
                 assign(st,match->st,front->sizest);
                 /*
                 printf("buffer tri[%g %g] id[%d] state:", fg_centroid(recvtris[i])[0],
                      fg_centroid(recvtris[i])[1], recvtris[i]->id); 
                 verbose_print_state("state of matched tri",st);
                 */
            }
	}

        free(storage); 
        free(recvtris);
}

LOCAL   void    tmp_set_receive_domain(
        float           *L,
        float           *U,
        int             dir,
        int             side,
        RECT_GRID       *gr,
        float           extra_buf)
{
        int             dim = gr->dim;
        int             j;

        for (j = 0; j < dir; ++j)
        {
            L[j] = gr->L[j]-extra_buf;
            U[j] = gr->U[j] + extra_buf;
        }
        if (side == 0)
        {
            L[dir] = gr->L[dir]-extra_buf;
            U[dir] = gr->L[dir];
        }
        else
        {
            L[dir] = gr->U[dir];
            U[dir] = gr->U[dir] + extra_buf;
        }
        for (j = dir+1; j < dim; ++j)
        {
            L[j] = gr->L[j]-extra_buf;
            U[j] = gr->U[j]+extra_buf;
        }
}

LOCAL int count_num_of_tris_inside(
	float  *L,
        float  *U,
        Front  *fr,
        TRI    ***senttris)
{
        TRI       *tri, **stris;
        SURFACE   **s;
        int       num = 0, alloc = 5, i = 0, total_alloc;

        total_alloc = alloc;
        vector(&stris, alloc, sizeof(TRI*));
 
        for (s = fr->mesh->surfaces; s && *s; ++s)
        {
            for (tri=first_tri(*s); !at_end_of_tri_list(tri,*s); tri=tri->next)
            {
                if (tri_out_rect(tri,L,U) == NO)
                {
                    if(num == total_alloc)
                    {
                        TRI **tmptris;
                        vector(&tmptris, alloc+num, sizeof(TRI*));
                        for(i = 0; i < num; i++)
                            tmptris[i] = stris[i];
                        free(stris);
                        stris = tmptris;
                        total_alloc = alloc+num;
                    }
                    stris[num] = tri;
                    num++;
                }
            }
        }

        *senttris = stris;
        return num;
}

LOCAL void bundle_single_st(
        Locstate        st,         
        size_t          sizest,         
        byte            *buf) 
{         
	Gas_param **prmslst;         
        size_t    i;         
        int       num_params;          

        num_params = return_params_list(&prmslst);         
        assign((Locstate)buf,st,sizest);          
        for (i = 0; i < num_params; ++i)             
            if (Params((Locstate)buf) == prmslst[i])                 
                break;         
        Params((Locstate)buf) = (Gas_param *) i; 
} 

LOCAL void unbundle_single_st(         
	Locstate        st,         
        size_t          sizest,         
        byte            *buf) 
{         
	Gas_param **prmslst;         
        size_t    i;
        
        return_params_list(&prmslst);
        assign(st,buf,sizest);         
        Params(st) = prmslst[(size_t) Params(st)];
} 


LOCAL Buf_soln *find_match_tri(
	float    *cent,
        Buf_soln *buf_soln,
        int      num)
{
	int i;
        for(i = 0; i < num; i++)
        {
            if(fabs(cent[0]-buf_soln[i].cent[0]) < MACH_EPS*1000.0 &&
               fabs(cent[1]-buf_soln[i].cent[1]) < MACH_EPS*1000.0)
            {
                return &(buf_soln[i]); 
            }
        }
        return NULL;
}


LOCAL int tri_outside_db_Mach_boundary(
	TRI *tri)
{
        double   *cent = fg_centroid(tri);

        if(cent[0] < 0.0 || cent[0] > 1.0 ||
           cent[1] < 0.0 || cent[1] > 4.0)
            return YES;
        return NO;
}
#endif /* #if defined(TWOD) */
