/*
*                               gDG2.c
*
*       Copyright 1999 by The University at Stony Brook, All rights reserved.
*/

#include <ghyp/ghyp.h>
#include <gdecs/vecdecs.h>

#define state_id(i)     (STATE_ID + (i+1)) // combine with other replications.
LOCAL size_t BLOCK_SIZE = 0; /*TOLERANCE - TODO: what is a good value*/ // combine with other replications.

#if defined(TWOD)

LOCAL Tri_mass_1st_rows   *alltri_mass_1st_rows = NULL, *alltri_mass_1st_rows_grp = NULL;


LOCAL double      **mass_1st_row = NULL, **mass_1st_rows[30];
LOCAL int         debug_flag = NO, USE_2nd_degree_tech = YES;
LOCAL Tri_HR_sten *alltri_HR_sten_2 = NULL, *alltri_HR_sten_2_grp = NULL;
LOCAL Dual_cell_sten *dual_cell_sten_2 = NULL;

// LOCAL Tri_HR_sten *alltri_HR_sten = NULL, *alltri_HR_sten_grp = NULL;
LOCAL CV_Soln     *cv_soln_2degree_tech1 = NULL;


// defined in ghp/gDG.c
IMPORT float    newdt; 
IMPORT TRI    *time_on_tri;

LOCAL int    find_nghbr_nghbr(TRI *nbtri[3],TRI*,TRI *nnb[9],int*,Front*);
LOCAL void   limiting_2nd_degree_ENO(TRI*, TRI *nbtri[3],TRI *nntri[9],int,int*,Mid_soln*,int);
LOCAL void   limiting_1st_degree_ENO(TRI*, TRI *nbtri[3],TRI *nntri[9],int,int*,Mid_soln*,int);
LOCAL TRI    *min_angle_tri(TRI*,TRI*,TRI**,int,int*);
LOCAL int    min_angle(float*, int);
// LOCAL void   set_HR_sten(TRI *nbtri[3],TRI*,TRI *nntri[30],int*);
LOCAL void   LARGE_STEN_limiting_3rd_degreeP3(TRI*,TRI *nbtri[30],int,TRI *edgetri[3],Mid_soln*,int,int);
LOCAL void   LARGE_STEN_limiting_2nd_degreeP3(TRI*,TRI *nbtri[30],int,TRI *edgetri[3],Mid_soln*,int);
LOCAL void   LARGE_STEN_limiting_1st_degreeP3(TRI*,TRI *nbtri[30],int,TRI *edgetri[3],Mid_soln*,int,int);
// LOCAL void   NEW_extrema_detec(float*,float nbuave[][8],int,int*);
LOCAL void   tris_around_pt(TRI *nbtri[3],TRI*,int,TRI**,int*);
LOCAL int    get_tri_cent_CV_stencil(TRI*,TRI *nbtri[3],int,int*,int*,TRI**,int*);
LOCAL int    get_tri_vert_CV_stencil(TRI*,TRI *nbtri[3],int,int*,int*,TRI**,int*);
LOCAL int    is_3tris_around_pt(TRI *nbtri[3],TRI*,int);
LOCAL void   Subcell_limiting_3rd_degreeP3(TRI*,TRI *nbtri[3],Mid_soln*,Limiting_store**,int);
LOCAL void   Subcell_limiting_2nd_degreeP3(TRI*,TRI *nbtri[3],Mid_soln*,Limiting_store**,int);

LOCAL void   Subcell_limiting_3rd_degreeP3_PNC(TRI*,TRI *nbtri[3],Mid_soln*,Limiting_store**,int,int);
LOCAL void   Subcell_limiting_3rd_degreeP3_PNC_grouping(TRI*,TRI *nbtri[3],Mid_soln*,Limiting_store**,int,int);

LOCAL void   Subcell_limiting_2nd_degreeP3_PNC(TRI*,TRI *nbtri[3],Mid_soln*,Limiting_store**,int,int);
LOCAL void   Subcell_limiting_2nd_degreeP3_PNC_multi_pt_limit(TRI*,TRI *nbtri[3],Mid_soln*,Limiting_store**,int,int);
LOCAL void   Subcell_limiting_2nd_degreeP3_PNC_grouping(TRI*,TRI *nbtri[3],Mid_soln*,Limiting_store**,int,int);

LOCAL void   Subcell_limiting_1st_degreeP3_PNC(TRI*,TRI *nbtri[3],Mid_soln*,Limiting_store**,int,int,int,int);
LOCAL void   Subcell_limiting_1st_degreeP3_PNC_multi_pt_limit(TRI*,TRI *nbtri[3],Mid_soln*,Limiting_store**,int,int,int,int);
LOCAL void   Subcell_limiting_1st_degreeP3_PNC_pt_limit(TRI*,TRI *nbtri[3],Mid_soln*,Limiting_store**,int,int,int,int);
LOCAL void   Subcell_limiting_1st_degreeP3_PNC_grouping(TRI*,TRI *nbtri[3],Mid_soln*,Limiting_store**,int,int,int,int);

LOCAL void   pre_Subcell_limiting_3rd_degreeP3_PNC(TRI*,TRI *nbtri[3],Mid_soln*,Limiting_store**,int,int);
LOCAL void   pre_Subcell_limiting_2nd_degreeP3_PNC(TRI*,TRI *nbtri[3],Mid_soln*,Limiting_store**,int,int);
LOCAL void   pre_Subcell_limiting_1st_degreeP3_PNC(TRI*,TRI *nbtri[3],Mid_soln*,Limiting_store**,int,int,int,int);
LOCAL void   CV_u_average_indx(TRI*,int,int,float*);
LOCAL void   CV_u_average_indx_from_store(TRI*,int,int,Limiting_store*,float*);
LOCAL void   CV_R_degree3_term_averageP3_store(TRI*,int,Limiting_store,float**,int,float*);
LOCAL void   CV_R_degree3_term_averageP3(TRI*,int,double**,int,float*);
LOCAL void   avg_st_on_cv_ver2(TRI*,int,Locstate,float*);
LOCAL void   avg_st_on_cv_from_store(TRI*,int,Locstate,Limiting_store*,float*);
LOCAL void   update_CVs(Front*,Mid_soln*,Limiting_store**,int);
LOCAL void   update_tri_CVs(TRI*,Mid_soln*,Limiting_store**,int);

LOCAL void   comput_Roe_ver2(TRI*,TRI *nbtri[3],Mid_soln*,Limiting_store**,int,Locstate*,float **L[],float **R[]);
LOCAL void   convert_cand_conu_charu(TRI*,TRI  *nbtri[3],Mid_soln*,Limiting_store**,int,float**);
LOCAL void   compute_Roemean(Locstate,Locstate,Locstate,float*);
// LOCAL void   compute_Roemean_MHD(Locstate,Locstate,float*,float*,float*);
LOCAL void   LR_matrix_in_dir(float*,Locstate,float**,float**);
LOCAL int    get_tri_edge_vert_CV_stencil(TRI*,TRI *nbtri[3],int,int*,int*,TRI**,int*);
LOCAL int    get_tri_center_CV_stencil(TRI*,TRI *nbtri[3],int,int*,int*,TRI**,int*);
LOCAL int    find_char_dir(TRI*,TRI *nbtri[3],Mid_soln*,int);
LOCAL void   update_coef_layers(TRI*,Mid_soln *,int, Front*,int*);
LOCAL void   pre_process_limiting_P3(TRI*,TRI *nbtri[3],Mid_soln*,Limiting_store**,int);
LOCAL void   preprocess_P3_times_rearrange_order(Front*,Mid_soln*,Limiting_store**,TRI***,int,int,int,int,int);
LOCAL int    identify_limiting_candidates(Front*,Mid_soln*,Limiting_store**,int,int,int,TRI **row_tris[],int*);
LOCAL void   subcell_update_high_degree_terms(Front*,Mid_soln*,Limiting_store**,TRI***,int,int,int,int,int);
LOCAL void   update_buffer_of_test_problems(Front*,Mid_soln*,int,Limiting_store**,TRI***,int,int,int);

////////////// for Quadratic tech.
LOCAL void   Subcell_limiting_1st_degreeP3_PNC_2nd_degree_tech(TRI*,TRI *nbtri[3],Mid_soln*,Limiting_store**,
                               int, int, int, int);
LOCAL void   map_p2_part_of_p3_ploy(TRI**,int*,int*,int,Limiting_store**,int,double*);
LOCAL void   change_cent_of_p2_poly(TRI**,int*,int*,int,double*,CV_Soln*);
LOCAL void   change_cent_of_p3_poly(TRI**,int*,int*,int,double*,CV_Soln*);
LOCAL void   avg_st_on_cv_for_mapped_p2_poly(TRI*,int,double**,double*);
LOCAL void   R_quadr_term_averageP3(TRI*,Locstate,double**,double*);
LOCAL void   avg_st_for_mapped_p2(TRI*,CV_Soln,int,float*);
LOCAL void   degree1_above_term_average_tmp(TRI*,Locstate,double**,float*); 
LOCAL void   avg_st_on_tri_for_p2_poly(TRI*,Locstate,double**,float*);

LOCAL void   CV_du_indx(TRI*,int,int,double*,double*,float*);
LOCAL void   CV_du_indx_from_store(TRI*,int,int,Limiting_store*,double*,double*,float*);

// LOCAL void   WENO_FV_P2(TRI*,TRI *nbtri[3],Mid_soln*,Limiting_store**,int,Front*);
LOCAL void   Subcell_limiting_2nd_degreeP2(TRI*,TRI *nbtri[3],Mid_soln*,Limiting_store**,int);
LOCAL void   Subcell_limiting_1st_degreeP2(TRI*,TRI *nbtri[3],Mid_soln*,Limiting_store**,int);
LOCAL void   Subcell_limiting_1st_degreeP2_multi_pt_limit(TRI*,TRI *nbtri[3],Mid_soln*,Limiting_store**,int);
LOCAL void   CV_R_degree2_term_average(TRI*,int,Locstate,double**,float*);

LOCAL void   CV_u_x_average_indx(TRI*,int,float*);
LOCAL void   CV_u_y_average_indx(TRI*,int,float*);
LOCAL void   CV_u_x_average_indx_from_store(TRI*,int,Limiting_store*,float*);
LOCAL void   CV_u_y_average_indx_from_store(TRI*,int,Limiting_store*,float*);
LOCAL void   limiting_1st_degreeP1(TRI*,TRI *nbtri[], Mid_soln*,int);
LOCAL void   Subcell_limiting_1st_degreeP1_MHD_FV(TRI*,TRI *nbtri[], Mid_soln*,int,Front*);
LOCAL void   avg_st_on_cv_ver3_MHD(TRI*,int,Locstate,float*);
LOCAL void   CV_u_x_average_indx_MHD(TRI*,int,Locstate,float*);
LOCAL void   CV_u_y_average_indx_MHD(TRI*,int,Locstate,float*);
LOCAL void   get_one_sided_sten_P2(TRI*,int,TRI *tris[],int*);
LOCAL void   get_reverse_sten_P2(TRI*,int,TRI *tris[],int*);
LOCAL void   get_reverse_sten_P1(TRI*,int,TRI *tris[],int*);
LOCAL void   p2_reverse_sten_5pt_vertex(TRI*,int,TRI  *tris[],int*);
LOCAL void   Compute_flattener(TRI*,TRI *nbtri[], Mid_soln*,Limiting_store**,int,Front*);
LOCAL void   tri_comput_P1_polynomials_from_avg_MHD_3sten(TRI*,TRI *nbtri[],int,Mid_soln*,Limiting_store**,int,int,double**);
LOCAL void   negative_therm_press_after_B_reconstruction(TRI*,TRI *nbtri[],Mid_soln*,Limiting_store**,int,Front*);
LOCAL void   limiting_1st_degreeP1_MHD_FV(TRI*,TRI  *nbtri[3],Mid_soln*,int,Front*);
LOCAL void   limiting_1st_degreeP1_MHD(TRI*,TRI  *sten_tris[][3],int, Mid_soln*,int,Front*);
// LOCAL void   limiting_P1_MHD(TRI*,TRI *sten_tri[],int,Mid_soln*,int,Front*);
// LOCAL void   Positivity_preserve(TRI*,TRI *nbtri[], Mid_soln*,Limiting_store**,int,Front*);
// LOCAL int    dens_out_range(TRI*,Locstate,float,float);
// LOCAL float  POLY_thermal_pressure_MHD(Locstate);

// LOCAL void   tri_comput_P2_polynomials_from_avg_MHD_6sten_one_side(TRI*,TRI *nbtri[],int,Mid_soln*,Limiting_store**,int,double**);
// LOCAL void   tri_comput_P2_polynomials_from_avg_MHD_7sten_reverse(TRI*,TRI *nbtri[],int,Mid_soln*,Limiting_store**,int,double**);

LOCAL void   all_neighboring_cells_ver2(TRI*,TRI *sten_tris[],int*);//Huijing 0514

#if defined(__cplusplus)
extern "C" {
#endif /* defined(__cplusplus) */
FORTRAN     void    FORTRAN_NAME(dgglse)(int*,int*,int*,double*,int*, ///// int M,int N,int P, double array A, int LDA
                                double*,int*,double*,double*,double*, ///// double array B, int LDB, C, D, X,
                                double*,int*,int*);                   ///// double array WORK, int LWORK, int INFO
#if defined(__cplusplus)
}
#endif /* defined(__cplusplus) */


/* Lax 22, just one under shoot
 */
////// group 1-------------------
/**
LOCAL int   grouping_n[9] = {3, 2, 2, 3, 2, 2, 3, 2, 2},  grouping_indx[9][3] = {{0, 1, 11},
                                                                              {0, 1, 0},
                                                                              {3, 4, 0},
                                                                              {3, 4, 5},
                                                                              {4, 5, 0},
                                                                              {7, 8, 0},
                                                                              {7, 8, 9},
                                                                              {8, 9, 0},
                                                                              {0, 11, 0}
                                                                             };
LOCAL int   gnum_CVs = 9;
**/
/// together with preprocess, makes current best result-- same as p2
////// group 2------------------------------
LOCAL int   grouping_n[9] = {2, 2, 2, 2, 2, 2, 2, 2, 2},  grouping_indx[9][3] = {{1, 11, 0},
                                                                              {0, 1, 0},
                                                                              {3, 4, 0},
                                                                              {3, 5, 0},
                                                                              {4, 5, 0},
                                                                              {7, 8, 0},
                                                                              {7, 9, 0},
                                                                              {8, 9, 0},
                                                                              {0, 11, 0}
                                                                             };
LOCAL int    gnum_CVs = 9;
//////// group 3---------------------
///// large overshoots
/***
LOCAL int   grouping_n[9] = {3, 2, 2, 3, 2, 2, 3, 2, 2},  grouping_indx[9][3] = {{0, 1, 11},
                                                                              {1, 2, 0},
                                                                              {2, 3, 0},
                                                                              {3, 4, 5},
                                                                              {5, 6, 0},
                                                                              {6, 7, 0},
                                                                              {7, 8, 9},
                                                                              {9, 10, 0},
                                                                              {10, 11, 0}
                                                                             };
LOCAL int  gnum_CVs = 9;
****/
//////// group 5---------------------
/***
LOCAL int   grouping_n[9] = {3, 1, 1, 3, 1, 1, 3, 1, 1},  grouping_indx[9][3] = {{0, 1, 11},
                                                                              {1, 0, 0},
                                                                              {3, 0, 0},
                                                                              {3, 4, 5},
                                                                              {5, 0, 0},
                                                                              {7, 0, 0},
                                                                              {7, 8, 9},
                                                                              {9, 0, 0},
                                                                              {11, 0, 0}
                                                                             };
LOCAL int      gnum_CVs = 9;
***/
/*
LOCAL int   grouping_n[9] = {2, 1, 1, 2, 1, 1, 2, 1, 1},  grouping_indx[9][3] = {{1, 11, 0},
                                                                              {1, 0, 0},
                                                                              {3, 0, 0},
                                                                              {3, 5, 0},
                                                                              {5, 0, 0},
                                                                              {7, 0, 0},
                                                                              {7, 9, 0},
                                                                              {9, 0, 0},
                                                                              {11, 0, 0}
                                                                             };
LOCAL int      gnum_CVs = 9;
*/
// no good, large overshoot
/* 
LOCAL int   grouping_n[9] = {3, 3, 3, 3, 3, 3, 3, 3, 3},  grouping_indx[9][3] = {{0, 1, 11},
                                                                              {1, 2, 0},
                                                                              {2, 3, 4},
                                                                              {3, 4, 5},
                                                                              {5, 6, 4},
                                                                              {6, 7, 8},
                                                                              {7, 8, 9},
                                                                              {9, 10, 8},
                                                                              {10, 11, 0}
                                                                             };
LOCAL int   gnum_CVs = 9;
*/
// no good, large over/under shoot
//// group4-----------------------
/***
LOCAL int  grouping_n[6] = {3, 3, 3, 3, 3, 3},  grouping_indx[6][3] = {{0, 1, 11},
                                                                              {1, 2, 3},
                                                                              {3, 4, 5},
                                                                              {5, 6, 7},
                                                                              {7, 8, 9},
                                                                              {9, 10, 11}};
LOCAL int  gnum_CVs = 6;
***/
/*
LOCAL int   grouping_n[6] = {3, 2, 3, 2, 3, 2},  grouping_indx[6][3] = {{0, 1, 11},
                                                                              {1, 3, 0},
                                                                              {3, 4, 5},
                                                                              {5, 7, 0},
                                                                              {7, 8, 9},
                                                                              {9, 11, 0}};
LOCAL int  gnum_CVs = 6;
*/
/*
LOCAL int   grouping_n[6] = {3, 1, 3, 1, 3, 1},  grouping_indx[6][3] = {{0, 1, 11},
                                                                              {2, 0, 0},
                                                                              {3, 4, 5},
                                                                              {6, 0, 0},
                                                                              {7, 8, 9},
                                                                              {10, 0, 0}};
LOCAL int   gnum_CVs = 6;
*/
//// P2 SV unstable partition suggested by yingjie. Partial cells from current triangle
//// will be completely used.
/**
LOCAL int   grouping_n[6] = {2, 2, 2, 2, 2, 2},  grouping_indx[9][3] = {{0, 3, 0},
                                                                              {1, 2, 0},
                                                                              {4, 7, 0},
                                                                              {5, 6, 0},
                                                                              {8, 11, 0},
                                                                              {9, 10, 0}
                                                                             };
LOCAL int  gnum_CVs = 6;
***/
//// P2 SV unstable partition suggested by yingjie. Partial cells from current triangle
////// will be completely used.
/*
LOCAL int   grouping_n[6] = {1, 1, 1, 1, 1, 1},  grouping_indx[9][3] = {{3, 0, 0},
                                                                              {2, 0, 0},
                                                                              {7, 0, 0},
                                                                              {6, 0, 0},
                                                                              {11,0, 0},
                                                                              {10,0, 0}
                                                                             };
LOCAL int  gnum_CVs = 6;
*/

 
// LOCAL int    area_WENO_mod_on_3rd(float*,float*,float*,int,float,float,float*);
// LOCAL int    area_WENO_mod_on_2nd(float*,float*,float*,int,float,float,float*);
// LOCAL int    area_WENO_mod_1(float*,float*,float*,int,float,float*);
LOCAL void     set_WENO_combined_sten(TRI*,TRI *sten_tris[][3],int*);
LOCAL void     NEW_extrema_detec_on_WENO_sten(float*,float nbuave[][2][8],int,int*);
LOCAL void     CV_u_average_indx_MHD(TRI*,int,int,Locstate,float*);
LOCAL void     impose_loc_divergence_free(TRI*,Locstate);
LOCAL void     B_at_dual_cell_cent(POLYGON*,Front*,Mid_soln*,int,double*);
// LOCAL void     Bn_at_dual_cell_edges(POLYGON*,Mid_soln*,Dual_cell_Mid_soln*,int,double Bn[][N_COEF_EDGE]);
LOCAL void     trace_of_dual_cell_P1(POLYGON*,int,Dual_cell_Mid_soln*,Mid_soln*);
LOCAL void     trace_of_dual_cell_P1_with_quad_bubbles(POLYGON*,int,Dual_cell_Mid_soln*,Mid_soln*);
LOCAL POLYGON  *tmp_construct_rect(void);
LOCAL POLYGON  *tmp_construct_rect_DG(void);
LOCAL POLYGON  *tmp_construct_rect_DG_closed_config(void);
LOCAL void     Project_dual_Mag_to_tri_skeleton(Front*,Mid_soln*,Dual_cell_Mid_soln*,int);
LOCAL void     check_global_divgerence_free_on_tri_mesh(Front*,Mid_soln*,Dual_cell_Mid_soln*,int);

LOCAL void     pp_receive_interior_tri_edge_B_fields_on_Periodic_Side(int*,int,int,float,Front*,Mid_soln*,int);
LOCAL void     pp_receive_interior_tri_edge_B_fields(int*,int,int,float,Front*,Mid_soln*,int,Limiting_store**,int);
#if defined(__MPI__)
LOCAL void     pp_send_interior_tri_edge_B_fields_on_Periodic_Side(int*,int,int,float,Front*,Mid_soln*,int,MPI_Request*);
LOCAL void     pp_send_interior_tri_edge_B_fields(int*,int,int,float,Front*,Mid_soln*,int,MPI_Request*,int);
#endif // #if defined(__MPI__)

#if defined(__MPI__)
LOCAL void     pp_send_interior_dual_cell_edge_B_fields(int*,int,int,float,Front*,
                         Dual_cell_Mid_soln*,int,MPI_Request*,int);
LOCAL void     pp_receive_interior_dual_cell_edge_B_fields(int*,int,int,float,Front*,
                         Dual_cell_Mid_soln*,int,int);
LOCAL void     pp_send_interior_dual_cell_edge_B_fields_on_Periodic_Side(int*,int,int,float,Front*,
                         Dual_cell_Mid_soln*,int,MPI_Request*);
LOCAL void     pp_receive_interior_dual_cell_edge_B_fields_on_Periodic_Side(int*,int,int,float,Front*,
                         Dual_cell_Mid_soln*,int,MPI_Request*);
#endif // if defined(__MPI__) //
LOCAL void     print_dual_cell_match(Dual_cell_Buf_soln*);
LOCAL bool     check_dual_cell_on_subdomain(Front*,DUAL_CELL_PAIR*,int);
LOCAL void     Zero_out_edge_B_variance_on_dual_cell(Front*,Mid_soln*,Dual_cell_Mid_soln*,int);
LOCAL void     Build_B_on_dual_cell_second_time(Front*,Mid_soln*,Dual_cell_Mid_soln*,int);
LOCAL int      add_dual_cells_on_vert_tri_in_dual_pairs(Front*,DUAL_CELL_PAIR*,int);


EXPORT void all_neighboring_cells(
         TRI       *tri,
         TRI       *sten_tris[],
         int       *nn_num)
{
         TRI       *crsp_tri, *tmptri, *new_cand[3][60], *Nbtri[3];
         int       side, N_between[3], N_cells = 0, i;

         for(side = 0; side < 3; side++)
         {
             tris_between_edge_neighbrs(tri, NULL, side, new_cand[side], &N_between[side]);
             for(i = 2; i < N_between[side]; i++)
             {
                 sten_tris[N_cells] = new_cand[side][i];
                 N_cells++;
             }
             sten_tris[N_cells] = Tri_on_side(tri,side);
             N_cells++;
         }

         *nn_num = N_cells;
}

LOCAL void set_WENO_combined_sten(
		TRI      *tri,
        TRI      *sten_tris[][3],
        int      *out_N_sten)
{
		int      side, N_sten = 0, tmp_side;
        TRI      *tm_tris[20];
        int      nn_num;

        ///// from one-sided (6)
        for(side = 0; side < 3; side++)
        {
             sten_tris[N_sten][0] = tri;
             sten_tris[N_sten][1] = Tri_on_side(tri,side);

             for(tmp_side = 0; tmp_side < 3; tmp_side++)
             {
                 if(tri == Tri_on_side(sten_tris[N_sten][1],tmp_side))
                     break;
             }
             sten_tris[N_sten][2] = Tri_on_side(sten_tris[N_sten][1],(tmp_side+1)%3);
             N_sten++;

             /// next one on the same side
             sten_tris[N_sten][0] = tri;
             sten_tris[N_sten][1] = Tri_on_side(tri,side);
             sten_tris[N_sten][2] = Tri_on_side(sten_tris[N_sten-1][1],(tmp_side+2)%3);
             N_sten++;
        }

         ///// from reverse-sided (3)
         for(side = 0; side < 3; side++)
         {
             get_reverse_sten_P1(tri, side, tm_tris, &nn_num);
             sten_tris[N_sten][0] = tm_tris[0];
             sten_tris[N_sten][1] = tm_tris[1];
             sten_tris[N_sten][2] = tm_tris[2];
             N_sten++;
         }

        *out_N_sten = N_sten; 
}

// add tris in the counter-clock wise direction
// 9 tris as in the 3rd order WENO reconstruction on triangle
EXPORT void set_HR_sten(
        TRI       *nbtri[3],
        TRI       *tri,
        TRI       *nntri[],
        int       *nn_num)
{
	    int       i, j, k, l, side, in_list;
        TRI       *tmp, *tmp2;
        // POINT     *p[3];
        int       share_v, N_nn = 0, num_side;

        // for(i = 0; i < 3; i++)
        //     p[i] = Point_of_tri(tri)[i];

        nntri[N_nn] = nbtri[0]; // 1111
        N_nn++;

        for(side = 0; side < 3; side++)
        {
            if(tri == Tri_on_side(nbtri[0],side))
                break;
        }
        // add nbri[0] upper side tri, 2222
        tmp = Tri_on_side(nbtri[0],(side+2)%3);
        if(tmp != NULL)
        {
            nntri[N_nn] = tmp;
            N_nn++;
        }
        // add nbri[1] lower side tri, 3333
        for(side = 0; side < 3; side++)
        {
            if(tri == Tri_on_side(nbtri[1],side))
                break;
        }
        tmp = Tri_on_side(nbtri[1],(side+1)%3);
        if(tmp != NULL)
        {
            in_list = NO;
            for(i = 0; i < N_nn; i++)
            {
                if(nntri[i] == tmp)
                {
                    in_list = YES;
                    break;
                }
            }
            if(in_list == NO)
            {
                nntri[N_nn] = tmp;
                N_nn++;
            }
        }
        // add nbtri[1], 4444
        nntri[N_nn] = nbtri[1];
        N_nn++;
        // add nbtri[1] upper side tri, 5555
        tmp = Tri_on_side(nbtri[1],(side+2)%3);
        if(tmp != NULL)
        {
            nntri[N_nn] = tmp;
            N_nn++;
        }
        // add nbri[2], lower tri, 6666
        for(side = 0; side < 3; side++)
        {
            if(tri == Tri_on_side(nbtri[2],side))
                break;
        }
        tmp = Tri_on_side(nbtri[2],(side+1)%3);
        if(tmp != NULL)
        {
            in_list = NO;
            for(i = 0; i < N_nn; i++)
            {
                if(nntri[i] == tmp)
                {
                    in_list = YES;
                    break;
                }
            }
            if(in_list == NO)
            {
                nntri[N_nn] = tmp;
                N_nn++;
            }
        }
        // add nbri[2], 7777
        nntri[N_nn] = nbtri[2];
        N_nn++;
        // add nbri[2], upper tri, 8888
        tmp = Tri_on_side(nbtri[2],(side+2)%3);
        if(tmp != NULL)
        {
            nntri[N_nn] = tmp;
            N_nn++;
        }
        // add nbri[0] lower side tri, 9999
        for(side = 0; side < 3; side++)
        {
            if(tri == Tri_on_side(nbtri[0],side))
                break;
        }
        tmp = Tri_on_side(nbtri[0],(side+1)%3);
        if(tmp != NULL)
        {
            in_list = NO;
            for(i = 0; i < N_nn; i++)
            {
                if(nntri[i] == tmp)
                {
                    in_list = YES;
                    break;
                }
            }
            if(in_list == NO)
            {
                nntri[N_nn] = tmp;
                N_nn++;
            }
        }
        *nn_num = N_nn;
}

LOCAL int find_nghbr_nghbr(
	TRI       *nbtri[3],
        TRI       *tri,
        TRI       *nntri[9],
        int       *nn_num,
        Front     *fr)
{
	int       i, j, k, l;
        TRI       *tmp;
        POINT     *p[3];
        int       share_v, N_nn = 0, num_side;

        for(i = 0; i < 3; i++)
            p[i] = Point_of_tri(tri)[i];

        for(i = 0; i < 3; i++)
        {
            num_side = 0;
            if(nbtri[i] != NULL)
            {
                // nbtri has 3 nghbrs.
                for(j = 0; j < 3; j++)
                {
                    tmp = Tri_on_side(nbtri[i],j);
                    if(tmp == tri)
                        continue;
                    share_v = NO;
                    if(tmp != NULL)
                    {
                        for(k = 0; k < 3; k++)
                        {
                            for(l = 0; l < 3; l++)
                            {
                                if(Point_of_tri(tmp)[l] == p[k])
                                {
                                    share_v = YES;
                                    break;
                                }
                            }
                            if(share_v == YES)
                                break;
                        }  
                    }
                    if(share_v == YES)
                    {
                        nntri[N_nn] = tmp;
                        N_nn++; 
                        num_side++;
                    }
                }
            }
            nn_num[i] = num_side;
        }
        return N_nn;
}

// Reconstructed coeffs. are stored in RK_STEP[0]
LOCAL void limiting_1st_degree_ENO(
         TRI       *tri,
         TRI       *nbtri[3],
         TRI       *nntri[9],
         int       N_nn,
         int       *nn_side,
         Mid_soln  *midsoln,
         int       rk_iter)
{
         Locstate st, nbst[3], st2, nnst[9], tmpst, tmpst0;
         float    uavg[4], nbuavg[3][4], nnuavg[9][4], tmpavg[4];
         float    Ravg[4], nbRavg[3][4], nnRavg[9][4], tmpRavg[4];
         float    Lavg[4], nbLavg[3][4], nnLavg[9][4], tmpLavg[4];
         int      i, dim = 2, indx, k;
         double    *cent, *nbcent[3], *tmpcent, *nncent[9];
         float    rside[2], A[2][2];
         float    coef[3][2];
         float    u0, u1, u2, avg1, avg2;
         double **Lmass_matrix = tri->Lmass_matrix;
         float    Ma = 100.0, eps = 0.01;
         float    dir[3];
         int      idir, ipos;
         TRI      *minangle;

         if(rk_iter == RK_STEP)
         {
             st = tri->st;
             for(i = 0; i < 3; i++)
                 nbst[i] = nbtri[i]->st;
             for(i = 0; i < N_nn; i++)
                 nnst[i] = nntri[i]->st;
         }
         else
         {
             st = midsoln[tri->id].st[rk_iter];
             for(i = 0; i < 3; i++)
             {
                 if(nbtri[i]->id >= 0)
                     nbst[i] = midsoln[nbtri[i]->id].st[rk_iter];
                 else
                     nbst[i] = nbtri[i]->st;
             }
             for(i = 0; i < N_nn; i++)
                 nnst[i] = midsoln[nntri[i]->id].st[rk_iter];
         }

         st2 = midsoln[tri->id].st[0];

         uavg[0] = Dens(st);
         uavg[1] = Mom(st)[0];
         uavg[2] = Mom(st)[1];
         uavg[3] = Energy(st);

         R_degree2_term_average(tri,st2,Ravg);

         // 3 neighbor tris
         for(i = 0; i < 3; i++)
         {
             nbuavg[i][0] = Dens(nbst[i]);
             nbuavg[i][1] = Mom(nbst[i])[0];
             nbuavg[i][2] = Mom(nbst[i])[1];
             nbuavg[i][3] = Energy(nbst[i]);

             comp_mass_matrix_1st_row(MAX_N_COEF,nbtri[i],dim,fg_centroid(tri),mass_1st_row);
             R_degree2_term_average_Liu(nbtri[i],st2,mass_1st_row,nbRavg[i]);
         }

         // neighbr's nghbr
         for(i = 0; i < N_nn; i++)
         {
             nnuavg[i][0] = Dens(nnst[i]);
             nnuavg[i][1] = Mom(nnst[i])[0];
             nnuavg[i][2] = Mom(nnst[i])[1];
             nnuavg[i][3] = Energy(nnst[i]);

             comp_mass_matrix_1st_row(MAX_N_COEF,nntri[i],dim,fg_centroid(tri),mass_1st_row);
             R_degree2_term_average_Liu(nntri[i],st2,mass_1st_row,nnRavg[i]);
         }

         for(k = 0; k < N_EQN; k++)
         {
             Lavg[k] = uavg[k]-Ravg[k];
             for(i = 0; i < 3; i++)
                 nbLavg[i][k] = nbuavg[i][k]-nbRavg[i][k];
             for(i = 0; i < N_nn; i++)
                 nnLavg[i][k] = nnuavg[i][k]-nnRavg[i][k];
         }

         cent = fg_centroid(tri);
         for(i = 0; i < 3; i++)
             nbcent[i] = fg_centroid(nbtri[i]);
         for(i = 0; i < N_nn; i++)
             nncent[i] = fg_centroid(nntri[i]);

         for(k = 0; k < N_EQN; k++)
         {
             // linear part of polynomial
             // tri, nbtri[i], + nghbr of nbtri[i] with min angle
             for(i = 0; i < 3; i++)
             {
                 if(nn_side[i] != 0)
                 {
                     minangle = min_angle_tri(tri, nbtri[i], nntri, N_nn, &ipos);
                     rside[0] = nbLavg[i][k] - Lavg[k];
                     rside[1] = nbLavg[ipos][k] - Lavg[k];
                     A[0][0] = (nbcent[i][0]-cent[0]);
                     A[0][1] = (nbcent[i][1]-cent[1]);
                     A[1][0] = (nncent[ipos][0]-cent[0]);
                     A[1][1] = (nncent[ipos][1]-cent[1]);

                     comp_coef(A,rside,coef[i]);
                 }
                 else
                 {
                     // nghbr does not possess nghbrs.
                     TRI *tmparray[12];
                     int jj, ll = 0;
                     for(jj = 0; jj < 3; jj++)
                     {
                         if(nbtri[jj] != nbtri[i])
                         {
                             tmparray[ll] = nbtri[jj];
                             ll++;
                         }
                     }
                     for(jj = 0; jj < N_nn; jj++)
                     {
                         tmparray[ll] = nntri[jj];
                         ll++;
                     }
                     minangle = min_angle_tri(tri, nbtri[i], tmparray, ll, &ipos);
                     if(rk_iter == RK_STEP)
                     {
                         tmpst = minangle->st;
                     }
                     else
                     {
                         if(minangle->id >= 0)
                             tmpst = midsoln[minangle->id].st[rk_iter];
                         else
                             tmpst = minangle->st;
                     }
                     tmpavg[0] = Dens(tmpst);
                     tmpavg[1] = Mom(tmpst)[0];
                     tmpavg[2] = Mom(tmpst)[1];
                     tmpavg[3] = Energy(tmpst);

                     comp_mass_matrix_1st_row(MAX_N_COEF,minangle,dim,fg_centroid(tri),mass_1st_row);
                     R_degree2_term_average_Liu(minangle,st2,mass_1st_row,tmpRavg);
                     for(jj = 0; jj < N_EQN; jj++)
                         tmpLavg[jj] = tmpavg[jj]-tmpRavg[jj];
                     tmpcent = fg_centroid(minangle);

                     rside[0] = nbLavg[i][k] - Lavg[k];
                     rside[1] = tmpLavg[k] - Lavg[k];
                     A[0][0] = (nbcent[i][0]-cent[0]);
                     A[0][1] = (nbcent[i][1]-cent[1]);
                     A[1][0] = (tmpcent[0]-cent[0]);
                     A[1][1] = (tmpcent[1]-cent[1]);

                     comp_coef(A,rside,coef[i]);
                 }
             }             

             avg1 = 1.0/3.0*(coef[0][0] + coef[1][0] + coef[2][0]);
             u1 = minmod(coef[0][0],coef[1][0]);
             u1 = minmod(coef[2][0],u1);
             // u1 = minmod(((1+eps)*u1), avg1);

             avg2 = 1.0/3.0*(coef[0][1] + coef[1][1] + coef[2][1]);
             u2 = minmod(coef[0][1],coef[1][1]);
             u2 = minmod(coef[2][1],u2);
             // u2 = minmod(((1+eps)*u2), avg2);

             u0 = Lavg[k];

             switch(k)
             {
             case 0:
                 dg_Dens(st2)[0] = u0;
                 dg_Dens(st2)[1] = u1;
                 dg_Dens(st2)[2] = u2;
             break;
             case 1:
                 dg_Mom(st2)[0][0] = u0;
                 dg_Mom(st2)[0][1] = u1;
                 dg_Mom(st2)[0][2] = u2;
             break;
             case 2:
                 dg_Mom(st2)[1][0] = u0;
                 dg_Mom(st2)[1][1] = u1;
                 dg_Mom(st2)[1][2] = u2;
             break;
             case 3:
                 dg_Energy(st2)[0] = u0;
                 dg_Energy(st2)[1] = u1;
                 dg_Energy(st2)[2] = u2;
             break;
             }
         } // End of: for(k = 0; k < N_EQN; k++)

         Dens(st2) = Dens(st);
         Mom(st2)[0] = Mom(st)[0];
         Mom(st2)[1] = Mom(st)[1];
         Energy(st2) = Energy(st);
}

// Reconstructed coeffs. are stored in RK_STEP[0]
LOCAL void limiting_2nd_degree_ENO(
         TRI       *tri,
         TRI       *nbtri[3],
         TRI       *nntri[9],
         int       N_nn,
         int       *nn_side,
         Mid_soln  *midsoln,
         int       rk_iter)
{
         Locstate st, nbst[3], st2, nnst[9], tmpst;
         float    uxavg[4], nbuxavg[3][4], nnuxavg[9][4];
         float    uyavg[4], nbuyavg[3][4], nnuyavg[9][4];
         float    tmpuxavg[4], tmpuyavg[4];
         int      i, dim = 2, k;
         double   *cent, *nbcent[3], *nncent[9], *tmpcent;
         float    rside[2], A[2][2];
         float    coefx[3][2], coefy[3][2];
         float    u3, u4, u5, avg3, avg4, avg5;
         double **Lmass_matrix = tri->Lmass_matrix;
         float    Ma = 100.0, eps = 0.01;
         float    dirx[3], diry[3]; // cos of the angle
         float    ud[3][2]; // soln uxx, uxy, uyy computed using one line
         int      idirx, idiry, ipos;
         TRI      *minangle;

         if(rk_iter == RK_STEP)
         {
             st = tri->st;
             for(i = 0; i < 3; i++)
                 nbst[i] = nbtri[i]->st;
             for(i = 0; i < N_nn; i++)
                 nnst[i] = nntri[i]->st;
         }
         else
         {
             st = midsoln[tri->id].st[rk_iter];
             for(i = 0; i < 3; i++)
             {
                 if(nbtri[i]->id >= 0)
                     nbst[i] = midsoln[nbtri[i]->id].st[rk_iter];
                 else
                 {
                     // attached buffer
                     nbst[i] = nbtri[i]->st;
                 }
             }
             for(i = 0; i < N_nn; i++)
                 nnst[i] = midsoln[nntri[i]->id].st[rk_iter];
         }

         st2 = midsoln[tri->id].st[0];
         Set_params(st2,st);
         set_type_of_state(st2,state_type(st));

         cent = fg_centroid(tri);
         for(i = 0; i < 3; i++)
             nbcent[i] = fg_centroid(nbtri[i]);

         for(i = 0; i < N_nn; i++)
             nncent[i] = fg_centroid(nntri[i]);

         u_x_average(tri,st,uxavg);
         u_y_average(tri,st,uyavg);

         for(i = 0; i < 3; i++)
         {
             u_x_average(nbtri[i],nbst[i],nbuxavg[i]);
             u_y_average(nbtri[i],nbst[i],nbuyavg[i]);
         }
         for(i = 0; i < N_nn; i++)
         {
             // printf("find tri[%d] ux\n", nntri[i]->id);
             u_x_average(nntri[i],nnst[i],nnuxavg[i]);
             u_y_average(nntri[i],nnst[i],nnuyavg[i]);
         }


         if((tri->id == 110))
         {
             printf("\n-------------------------------------\n");
             printf("limiting_2nd_degree_ENO New soln of tri (%d): at iteration %d\n", tri->id, rk_iter);
             // print_tri_crds(tri);
             printf("tri neigh:0, 1, 2\n");
             printf("neighbr tri id (%d %d %d)\n", nbtri[0]->id, nbtri[1]->id, nbtri[2]->id);

             printf("tri input state:");
             verbose_print_state("tri:",st);
             printf("print nb0, 1, 2, states\n");
             // verbose_print_state("nb0:",nbst[0]);
             // verbose_print_state("nb1:",nbst[1]);
             // verbose_print_state("nb2:",nbst[2]);
             debug_flag = YES;
         }


         for(k = 0; k < N_EQN; k++)
         {
             // d_x u and d_y u polynomial
             // tri, nbtri[i], + nghbr of nbtri[i] with min angle 
             for(i = 0; i < 3; i++)
             {
                 if(nn_side[i] != 0)
                 {
                     minangle = min_angle_tri(tri, nbtri[i], nntri, N_nn, &ipos); 
                     // d_x u
                     rside[0] = nbuxavg[i][k] - uxavg[k];
                     rside[1] = nnuxavg[ipos][k] - uxavg[k];
                     A[0][0] = 2.0*(nbcent[i][0]-cent[0]);
                     A[0][1] = (nbcent[i][1]-cent[1]);
                     A[1][0] = 2.0*(nncent[ipos][0]-cent[0]);
                     A[1][1] = (nncent[ipos][1]-cent[1]);
                     comp_coef(A,rside,coefx[i]);

                     // d_y u
                     rside[0] = nbuyavg[i][k] - uyavg[k];
                     rside[1] = nnuyavg[ipos][k] - uyavg[k];
                     A[0][0] = (nbcent[i][0]-cent[0]);
                     A[0][1] = 2.0*(nbcent[i][1]-cent[1]);
                     A[1][0] = (nncent[ipos][0]-cent[0]);
                     A[1][1] = 2.0*(nncent[ipos][1]-cent[1]);
                     comp_coef(A,rside,coefy[i]);

                     dirx[i] = (A[0][0]/2.0*A[1][0]/2.0 + A[0][1]*A[1][1])/
                             ( sqrt(sqr(A[0][0]/2.0) + sqr(A[0][1]))*sqrt(sqr(A[1][0]/2.0) + sqr(A[1][1])) );
                     if(debug_flag == YES)
                     {
                         printf("nbtri[%d] use nntri[%d], cos_angle %g\n",nbtri[i]->id, minangle->id, dirx[i]);
                     }
                 }
                 else
                 {
                     // nghbr does not possess nghbrs.
                     TRI *tmparray[12];
                     int jj, ll = 0;
                     for(jj = 0; jj < 3; jj++)
                     {
                         if(nbtri[jj] != nbtri[i])
                         {
                             tmparray[ll] = nbtri[jj];
                             ll++;
                         }
                     }
                     for(jj = 0; jj < N_nn; jj++)
                     {
                         tmparray[ll] = nntri[jj];
                         ll++;
                     }
                     minangle = min_angle_tri(tri, nbtri[i], tmparray, ll, &ipos); 
                     if(rk_iter == RK_STEP)
                         tmpst = minangle->st;
                     else
                     {
                         if(minangle->id >= 0)
                             tmpst = midsoln[minangle->id].st[rk_iter]; 
                         else
                             tmpst = minangle->st;
                     }
                     u_x_average(minangle,tmpst,tmpuxavg);
                     u_y_average(minangle,tmpst,tmpuyavg);
                     tmpcent = fg_centroid(minangle);

                     // d_x u
                     rside[0] = nbuxavg[i][k] - uxavg[k];
                     rside[1] = tmpuxavg[k] - uxavg[k];
                     A[0][0] = 2.0*(nbcent[i][0]-cent[0]);
                     A[0][1] = (nbcent[i][1]-cent[1]);
                     A[1][0] = 2.0*(tmpcent[0]-cent[0]);
                     A[1][1] = (tmpcent[1]-cent[1]);
                     comp_coef(A,rside,coefx[i]);

                     // d_y u
                     rside[0] = nbuyavg[i][k] - uyavg[k];
                     rside[1] = tmpuyavg[k] - uyavg[k];
                     A[0][0] = (nbcent[i][0]-cent[0]);
                     A[0][1] = 2.0*(nbcent[i][1]-cent[1]);
                     A[1][0] = (tmpcent[0]-cent[0]);
                     A[1][1] = 2.0*(tmpcent[1]-cent[1]);
                     comp_coef(A,rside,coefy[i]);

                     dirx[i] = (A[0][0]/2.0*A[1][0]/2.0 + A[0][1]*A[1][1])/
                             ( sqrt(sqr(A[0][0]/2.0) + sqr(A[0][1]))*sqrt(sqr(A[1][0]/2.0) + sqr(A[1][1])) );
                     if(debug_flag == YES)
                     {
                         printf("nbtri[%d] use nntri[%d], cos_angle %g\n",nbtri[i]->id, minangle->id, dirx[i]);
                     }
                 }
             }

             avg3 = 1.0/3.0*(coefx[0][0] + coefx[1][0] + coefx[2][0]);
             u3 = minmod(coefx[0][0],coefx[1][0]);
             u3 = minmod(coefx[2][0],u3);
             // u3 = minmod(((1.0+eps)*u3), avg3);

             avg4 = 1.0/6.0*(coefx[0][1] + coefx[1][1] + coefx[2][1] + coefy[0][0] + coefy[1][0] + coefy[2][0]);
             u4 = minmod(coefx[0][1],coefx[1][1]);
             u4 = minmod(coefx[2][1],u4);
             u4 = minmod(coefy[0][0],u4);
             u4 = minmod(coefy[1][0],u4);
             u4 = minmod(coefy[2][0],u4);
             // u4 = minmod(((1.0+eps)*u4), avg4);

             avg5 = 1.0/3.0*(coefy[0][1] + coefy[1][1] + coefy[2][1]);
             u5 = minmod(coefy[0][1],coefy[1][1]);
             u5 = minmod(coefy[2][1],u5);
             // u5 = minmod(((1.0+eps)*u5), avg5);

             if(debug_flag == YES && k == 0)
             {
                 printf("Uxx cand[%g %g %g] cos(%g %g %g)\n", coefx[0][0], coefx[1][0], coefx[2][0],
                                      dirx[0], dirx[1], dirx[2]);
                 printf("Uxy cand[%g %14.13g %g, %g %14.13g %g]\n", coefx[0][1], coefx[1][1], coefx[2][1],
                                            coefy[0][0], coefy[1][0], coefy[2][0]);
                 // printf("Uxy cand[%g %g %g]\n", coefx[0][1], coefx[1][1], coefx[2][1]);
                 printf("Uyy cand[%g %g %g] cos(%g %g %g)\n", coefy[0][1],coefy[1][1],coefy[2][1],
                                     diry[0], diry[1], diry[2]);
             }

             switch(k)
             {
             case 0:
                 dg_Dens(st2)[3] = u3;
                 dg_Dens(st2)[4] = u4;
                 dg_Dens(st2)[5] = u5;
             break;
             case 1:
                 dg_Mom(st2)[0][3] = u3;
                 dg_Mom(st2)[0][4] = u4;
                 dg_Mom(st2)[0][5] = u5;
             break;
             case 2:
                 dg_Mom(st2)[1][3] = u3;
                 dg_Mom(st2)[1][4] = u4;
                 dg_Mom(st2)[1][5] = u5;
             break;
             case 3:
                 dg_Energy(st2)[3] = u3;
                 dg_Energy(st2)[4] = u4;
                 dg_Energy(st2)[5] = u5;
             break;
             }
             if(debug_flag == YES && k == 0)
             {
                 printf("for[%d] state, coeff(3, 4, 5) = (%g, %g, %g), final(%g, %g, %g), orign(%g %g %g)\n",                               k, u3, u4, u5, dg_Dens(st2)[3], dg_Dens(st2)[4], dg_Dens(st2)[5],
                        dg_Dens(st)[3], dg_Dens(st)[4], dg_Dens(st)[5]);
             }

         } // End of: for(k = 0; k < N_EQN; k++)

         debug_flag = NO;

}

TRI *min_angle_tri(
	TRI  *tri,
        TRI  *nbtri,
        TRI  **nntri,
        int  N_nn,
        int  *ipos)
{
        double   *cent, *nbcent;
        float  dir[3], len;
        int   i, j, dim = 2, min_num;
        float ndir[9][3], cos_th[9];

        cent = fg_centroid(tri);
        nbcent = fg_centroid(nbtri);
        
        for(i = 0; i < dim; i++)
            dir[i] = nbcent[i] - cent[i];
        len = sqrt(sqr(dir[0]) + sqr(dir[1]) );
        for(i = 0; i < dim; i++)
            dir[i] = dir[i]/len;

        for(j = 0; j < N_nn; j++)
        {
            nbcent = fg_centroid(nntri[j]);
            for(i = 0; i < dim; i++)
                ndir[j][i] = nbcent[i] - cent[i];
            len = sqrt(sqr(ndir[j][0]) + sqr(ndir[j][1]) );
            for(i = 0; i < dim; i++)
                ndir[j][i] = ndir[j][i]/len;
            cos_th[j] = dir[0]*ndir[j][0] + dir[1]*ndir[j][1];
        }
        min_num = min_angle(cos_th, N_nn);

        *ipos = min_num;
        return nntri[min_num];
}

LOCAL int min_angle(float *cos_th, int n)
{
        int i, imin = 0;
        float max_cth;
        max_cth = cos_th[0];
        for(i = 1; i < n; i++)
        {
            if(cos_th[i] > max_cth)
            {
                max_cth = cos_th[i];
                imin = i;
            }
        }
        return imin;
}

// Limiting the soln in interior
LIB_LOCAL void LARGE_STEN_local_limiting_soln_with_buffer_tris_multiple_times(
        Front    *fr,
        Mid_soln *midsoln,
        int      rk_step,
        int      HR_times,
        Limiting_store **store)
{
        TRI       *tri, *crsp_tri, *nbtri[3], *sten_tri[30];
        SURFACE   **surf;
        int       dim = 2, i, j, side, sten_tri_num;
        double     *cent;
        size_t    sizest = fr->sizest;
        Locstate  st2;
        float     max_dt;
        TRI       **limit_tris, **row_limit_tris[500];
        int       N_alloc = 800, N_row, N_use =0, N;

        TRI       **limit_tris2, **row_limit_tris2[500];
        int       N_alloc2 = 800, N_row2, N_use2 =0, N2;
        int       loop_num = 0, detect_extr = YES, comput_time = NO;
        vector(&limit_tris, N_alloc, sizeof(TRI*));
        row_limit_tris[0] = limit_tris;
        N_row = 1;

        if(mass_1st_row == NULL)
        {
            matrix(&mass_1st_row, 1, MAX_N_COEF,sizeof(double));
            for(i = 0; i < 30; i++)
                matrix(&mass_1st_rows[i], 1, MAX_N_COEF,sizeof(double));
        }

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf);
                 !at_end_of_tri_list(tri,*surf); tri = tri->next)
            {
                if(tri->BC_type == SUBDOMAIN)
                    continue;

                if(Boundary_tri(tri) || tri_on_phy_bdry(tri))
                {
                    if(debugging("twod_riemann"))
                        attach_buffer_states(fr,midsoln,rk_step,tri,store);
                    if(debugging("db_Mach"))
                        db_Mach_attach_buffer_states(fr,midsoln,rk_step,tri,store);
                    if(debugging("shock_vort"))
                        shock_vort_attach_buffer_states(fr,midsoln,rk_step,tri,store);
                    if(debugging("Sod"))
                        shock_vort_attach_buffer_states(fr,midsoln,rk_step,tri,store);
                }
                if(overshoot_state_Shu_V(tri,midsoln,rk_step) == YES)
                {
                    limit_tris[N_use] = tri;
                    N_use++;
                    if(N_use == N_alloc)
                    {
                        if(N_row +1 >= 500)
                        {
                            printf("ERROR: local_limiting_soln_with_buffer_tris, exceed alloc. limit\n");
                            clean_up(ERROR);
                        }
                        vector(&limit_tris, N_alloc, sizeof(TRI*));
                        row_limit_tris[N_row] = limit_tris;
                        N_row++;
                        N_use = 0;
                    }
                }
                else
                {
                    if(rk_step == RK_STEP)
                    {
                        max_dt = (*fr->_time_step_on_tri)(fr, tri);
                        if(max_dt < newdt)
                               time_on_tri = tri;
                        newdt = min(newdt, max_dt);
                        // states are saved there for redo highest degree
                        // if(!Boundary_tri(tri) && !tri_on_phy_bdry(tri))
                        assign(midsoln[tri->id].st[0],tri->st,fr->sizest);
                    }
                    else
                    {
                        // states are saved there for redo highest degree
                        // if(!Boundary_tri(tri) && !tri_on_phy_bdry(tri))
                        assign(midsoln[tri->id].st[0],midsoln[tri->id].st[rk_step],fr->sizest);
                    }
                }
            }
        }

redo_HR:
        loop_num++;

        // printf("---------------------------------\n");
        // printf("loop number = %d,"
       // " LARGE_STEN_local_limiting_soln_with_buffer_tris_multiple_times, HR_times = %d, comput time = %d\n",
       //                  loop_num, HR_times, comput_time);

        for(i = 0; i < N_row; i++)
        {
            if(i == N_row-1)
                N = N_use;
            else
                N = N_alloc;
            for(j = 0; j < N; j++)
            {
                for(side = 0; side < 3; side++)
                    nbtri[side] = Tri_on_side(row_limit_tris[i][j],side);
                if(MAX_N_COEF == 6)
                {
                    // limiting_2nd_degree(row_limit_tris[i][j],nbtri,midsoln,rk_step);
                    // limiting_1st_degree(row_limit_tris[i][j],nbtri,midsoln,rk_step);
                }
                else if(MAX_N_COEF == 10)
                {
                    // limiting_3rd_degreeP3(row_limit_tris[i][j],nbtri,midsoln,rk_step,detect_extr);
                    // limiting_2nd_degreeP3(row_limit_tris[i][j],nbtri,midsoln,rk_step);
                    // limiting_1st_degreeP3(row_limit_tris[i][j],nbtri,midsoln,rk_step,NO);
                    /** 
                    if(row_limit_tris[i][j]->id == 3)
                    {
                        printf("print tri[%d]' neighbor\n", row_limit_tris[i][j]->id);
                        print_tri_crds(nbtri[0]);
                        print_tri_crds(nbtri[1]);
                        print_tri_crds(nbtri[2]);
                    } 
                    **/

                    set_HR_sten(nbtri, row_limit_tris[i][j],sten_tri,&sten_tri_num);
                    LARGE_STEN_limiting_3rd_degreeP3(row_limit_tris[i][j],sten_tri,
                             sten_tri_num,nbtri,midsoln,rk_step,detect_extr);
                    LARGE_STEN_limiting_2nd_degreeP3(row_limit_tris[i][j],sten_tri,
                             sten_tri_num,nbtri,midsoln,rk_step);
                    LARGE_STEN_limiting_1st_degreeP3(row_limit_tris[i][j],sten_tri,
                             sten_tri_num,nbtri,midsoln,rk_step,NO);
                }
            }
        }

        //////////////* do scaling at the end of HR multiple iteration
        if(loop_num == HR_times)
        {
            for(i = 0; i < N_row; i++)
            {
                if(i == N_row-1)
                    N = N_use;
                else
                    N = N_alloc;
                for(j = 0; j < N; j++)
                {
                    if(row_limit_tris[i][j]->redo_limiting == YES)
                        fix_unphysical_st(row_limit_tris[i][j],midsoln,0,fr);
                }
            }
            comput_time = YES;
        }

        if(debugging("Sod") || debugging("Lax"))
        {
            update_buffer_x_peri(fr,midsoln,0,store);
        }
        else if(debugging("shock_vort"))
        {
            update_buffer_x_ref(fr,midsoln,0,store);
        }
        else if(debugging("v_evo") || debugging("Burgers"))
        {
            update_buffer(fr,midsoln,0,store);
        }

        for(i = 0; i < N_row; i++)
        {
            if(i == N_row-1)
                N = N_use;
            else
                N = N_alloc;
            for(j = 0; j < N; j++)
            {
                // if(row_limit_tris[i][j]->redo_limiting == YES)
                //     fix_unphysical_st(row_limit_tris[i][j],midsoln,0,fr);
                // update coefficient
                update_coef(row_limit_tris[i][j],midsoln,rk_step,fr,fr->sizest,comput_time);
            }
        }

        if(loop_num < HR_times)
        {
            // if(loop_num == HR_times -1)
            //     comput_time = YES;

#if defined(__MPI__)
            if(debugging("db_Mach"))
                update_db_Mach_buffer(fr,midsoln,rk_step,store);
#endif // if defined(__MPI__)

            for(i = 0; i < N_row; i++)
            {
                if(i == N_row-1) N = N_use;
                else N = N_alloc;
                for(j = 0; j < N; j++)
                {
                    if(Boundary_tri(row_limit_tris[i][j]) ||
                       tri_on_phy_bdry(row_limit_tris[i][j]))
                    {
                        if(debugging("twod_riemann"))
                            attach_buffer_states(fr,midsoln,rk_step,row_limit_tris[i][j],store);
                        if(debugging("db_Mach"))
                            db_Mach_attach_buffer_states(fr,midsoln,rk_step,row_limit_tris[i][j],store);
                        if(debugging("shock_vort"))
                            shock_vort_attach_buffer_states(fr,midsoln,rk_step,row_limit_tris[i][j],store);
                        if(debugging("Sod"))
                            shock_vort_attach_buffer_states(fr,midsoln,rk_step,row_limit_tris[i][j],store);
                    }
                }
            }
            goto redo_HR;
        }

        for(i = 0; i < N_row; i++)
            free(row_limit_tris[i]);
}

// Reconstructed coeffs. are stored in RK_STEP[0]
LOCAL void LARGE_STEN_limiting_3rd_degreeP3(
         TRI       *tri,
         TRI       *nbtri[30],
         int       sten_n,
	 TRI       *edgetri[3],
         Mid_soln  *midsoln,
         int       rk_iter,
         int       detect_extr)
{
         Locstate st, nbst[30], st2;
         float    uxxave[4], nbuxxave[30][8];
         float    uxyave[4], nbuxyave[30][8];
         float    uyyave[4], nbuyyave[30][8];
         int      i, dim = 2, k, j, l;
         double   *cent, *nbcent[30];
         float    rside[3], Axx[30][2][2], Axy[30][2][2], Ayy[30][2][2];
         float    coefxx[30][2], coefxy[30][2], coefyy[30][2];

         float    u6, u7, u8, u9, u7_0, u7_1, u8_0, u8_1,
                   avg3, avg4, avg5, arrya[30], arryb[30], w[30];
         double **Lmass_matrix = tri->Lmass_matrix;
         float    eps = 0.9;
         float    c_num_xx[30], c_num_xy[30], c_num_yy[30]; // condition number of stencils
         float    diam; 
	 // float    sqr_diam, sqr_sqr_diam;
         int      is_bad_stenxx[30], is_bad_stenxy[30], is_bad_stenyy[30];
         int      debug = NO;
         float    tmpnbuxxave[3][4];
         float    tmpnbuxyave[3][4];
         float    tmpnbuyyave[3][4];
         float    area = fg_area(tri);

         if(rk_iter == RK_STEP)
         {
             st = tri->st;
             for(i = 0; i < sten_n; i++)
                 nbst[i] = nbtri[i]->st;
         }
         else
         {
             st = midsoln[tri->id].st[rk_iter];
             for(i = 0; i < sten_n; i++)
                 nbst[i] = midsoln[nbtri[i]->id].st[rk_iter];
         }

         st2 = midsoln[tri->id].st[0];
         Set_params(st2,st);
         set_type_of_state(st2,state_type(st));

         diam = fg_diam(tri);
         // sqr_diam = sqr(diam);
         // sqr_sqr_diam = sqr(sqr_diam);

         /**
         if(1376 == tri->id)
         {
             printf("\n\n---------*******************--------------\n");
             printf("TRI[%d] entered LARGE_STEN_limiting_3rd_degreeP3, number of sten %d\n",tri->id,
                          sten_n);
             // print_tri_crds(tri);
             debug = YES;
         }
         **/

         cent = fg_centroid(tri);
         for(i = 0; i < sten_n; i++)
         {
             nbcent[i] = fg_centroid(nbtri[i]);
             // if(debug == YES)
             //     printf("nbcent[%d] = %g, %g\n", i, nbcent[i][0], nbcent[i][1]);
         }

         u_average_indx(tri,st,3,uxxave);
         u_average_indx(tri,st,4,uxyave);
         u_average_indx(tri,st,5,uyyave);
         for(k = 0; k < N_EQN; k++)
         {
             uxxave[k] *= 2.0;
             uyyave[k] *= 2.0;
         }

	 l = 0;
         for(i = 0; i < sten_n; i++)
         {
             u_average_indx(nbtri[i],nbst[i],3,nbuxxave[i]);
             u_average_indx(nbtri[i],nbst[i],4,nbuxyave[i]);
             u_average_indx(nbtri[i],nbst[i],5,nbuyyave[i]);

             for(k = 0; k < N_EQN; k++)
             {
                 nbuxxave[i][k] *= 2.0;
                 nbuyyave[i][k] *= 2.0;
             }
             // if(debug == YES)
             // {
             //     printf("sten[%d] nbuxxave = %g\n", i, nbuxxave[i][0]);
             // }
	     for(j = 0; j < 3; j++)
	     {
		 if(edgetri[j] == nbtri[i])
		 {
                     for(k = 0; k < N_EQN; k++)
                     {
			 tmpnbuxxave[l][k] = nbuxxave[i][k];
			 tmpnbuxyave[l][k] = nbuxyave[i][k];
			 tmpnbuyyave[l][k] = nbuyyave[i][k];
		     }
		     l++;
		 }
	     }
         }

         if(detect_extr == YES)
         {
             NEW_extrema_detec(uxxave,nbuxxave,sten_n,is_bad_stenxx);
             NEW_extrema_detec(uxyave,nbuxyave,sten_n,is_bad_stenxy);
             NEW_extrema_detec(uyyave,nbuyyave,sten_n,is_bad_stenyy);
             // NEW_extrema_detec(uxxave,tmpnbuxxave,3,is_bad_stenxx);
             // NEW_extrema_detec(uxyave,tmpnbuxyave,3,is_bad_stenxy);
             // NEW_extrema_detec(uyyave,tmpnbuyyave,3,is_bad_stenyy);
         }
         else
         {
             for(k = 0; k < N_EQN; k++)
                 is_bad_stenxx[k] = is_bad_stenxy[k] = is_bad_stenyy[k] = NO;
         }
         // for(k = 0; k < N_EQN; k++)
         //    is_bad_stenxx[k] = is_bad_stenxy[k] = is_bad_stenyy[k] = NO;

         for(i = 0; i < sten_n; i++)
         {
             Axx[i][0][0] = 6.0*(nbcent[i][0]-cent[0]);
             Axx[i][0][1] = 2.0*(nbcent[i][1]-cent[1]);
             Axx[i][1][0] = 6.0*(nbcent[(i+1)%sten_n][0]-cent[0]);
             Axx[i][1][1] = 2.0*(nbcent[(i+1)%sten_n][1]-cent[1]);
             c_num_xx[i] = cond_num(Axx[i]);

             Axy[i][0][0] = 2.0*(nbcent[i][0]-cent[0]);
             Axy[i][0][1] = 2.0*(nbcent[i][1]-cent[1]);
             Axy[i][1][0] = 2.0*(nbcent[(i+1)%sten_n][0]-cent[0]);
             Axy[i][1][1] = 2.0*(nbcent[(i+1)%sten_n][1]-cent[1]);
             c_num_xy[i] = cond_num(Axy[i]);

             Ayy[i][0][0] = 2.0*(nbcent[i][0]-cent[0]);
             Ayy[i][0][1] = 6.0*(nbcent[i][1]-cent[1]);
             Ayy[i][1][0] = 2.0*(nbcent[(i+1)%sten_n][0]-cent[0]);
             Ayy[i][1][1] = 6.0*(nbcent[(i+1)%sten_n][1]-cent[1]);
             c_num_yy[i] = cond_num(Ayy[i]);

             if(debug == YES)
             {
                printf("sten[%d], k component of cross product= %g\n", i,
                    Axx[i][0][0]*Axx[i][1][1] - Axx[i][0][1]*Axx[i][1][0]);
             }
         }

         for(k = 0; k < N_EQN; k++)
         {
             for(i = 0; i < sten_n; i++)
             {
                 rside[0] = nbuxxave[i][k] - uxxave[k];
                 rside[1] = nbuxxave[(i+1)%sten_n][k] - uxxave[k];
                 comp_coef(Axx[i],rside,coefxx[i]);

                 rside[0] = nbuxyave[i][k] - uxyave[k];
                 rside[1] = nbuxyave[(i+1)%sten_n][k] - uxyave[k];
                 comp_coef(Axy[i],rside,coefxy[i]);

                 rside[0] = nbuyyave[i][k] - uyyave[k];
                 rside[1] = nbuyyave[(i+1)%sten_n][k] - uyyave[k];
                 comp_coef(Ayy[i],rside,coefyy[i]);
                 /**
                 if(debug == YES && k == 0)
                 {
                     printf("sten[%d] coefxx = %g, %g\n",
                            i, coefxx[i][0], coefxx[i][1]);
                     if(i == 5)
                     {
                         printf("matrix A = [%g, %g], [%g, %g]\n",
                              Axx[i][0][0], Axx[i][0][1],
                              Axx[i][1][0], Axx[i][1][1]); 
                     }
                 }
                 **/
             }
             /////////////////// WENO
             ///// u_xx polynomial
             for(i = 0; i < sten_n; i++)
             {
                 arrya[i] = coefxx[i][0];
                 arryb[i] = coefxx[i][1];
             }
             WENO_mod_on_3rd(arrya,arryb,c_num_xx,sten_n,diam,w);
             // area_WENO_mod_on_3rd(arrya,arryb,c_num_xx,sten_n,diam,area,w);
             if(debug == YES && k == 0)
             {
                 for(i = 0; i < sten_n; i++)
                     printf("weight w[%d] = %g, u_xxx candidate = %g, u_xxy candidate = %g\n",
                          i, w[i], coefxx[i][0], coefxx[i][1]);
             }

             u7_0 = u6 = 0.0;
             for(i = 0; i < sten_n; i++)
             {
                 u6   += w[i]*coefxx[i][0];
                 u7_0 += w[i]*coefxx[i][1];
             }
             if(is_bad_stenxx[k] == YES && detect_extr == YES)
             {
                 u6 = 0.0; u7_0 = 0.0;
             }

             ///// u_xy polynomial
             for(i = 0; i < sten_n; i++)
             {
                 arrya[i] = coefxy[i][0];
                 arryb[i] = coefxy[i][1];
             }
             WENO_mod_on_3rd(arrya,arryb,c_num_xy,sten_n,diam,w);
             // area_WENO_mod_on_3rd(arrya,arryb,c_num_xy,sten_n,diam,area,w);
             u7_1 = u8_0 = 0.0;
             for(i = 0; i < sten_n; i++)
             {
                 u7_1 += w[i]*coefxy[i][0];
                 u8_0 += w[i]*coefxy[i][1];
             }
             if(is_bad_stenxy[k] == YES && detect_extr == YES)
             {
                 u7_1 = 0.0; u8_0 = 0.0;
             }
             ///// u_yy polynomial
             for(i = 0; i < sten_n; i++)
             {
                 arrya[i] = coefyy[i][0];
                 arryb[i] = coefyy[i][1];
             }
             WENO_mod_on_3rd(arrya,arryb,c_num_yy,sten_n,diam,w);
             // area_WENO_mod_on_3rd(arrya,arryb,c_num_yy,sten_n,diam,area,w);
             u8_1 = u9 = 0.0;
             for(i = 0; i < sten_n; i++)
             {
                 u8_1 += w[i]*coefyy[i][0];
                 u9   += w[i]*coefyy[i][1];
             }
             if(is_bad_stenyy[k] == YES && detect_extr == YES)
             {
                 u8_1 = 0.0; u9 = 0.0;
             }
             u7 = minmod2((1+0.05)*minmod2(u7_0,u7_1), 0.5*(u7_0 + u7_1)); // eps = 0.05, 0.01
             u8 = minmod2((1+0.05)*minmod2(u8_0,u8_1), 0.5*(u8_0 + u8_1));
             /////////////////// End WENO
             switch(k)
             {
             case 0:
                 dg_Dens(st2)[6] = u6;
                 dg_Dens(st2)[7] = u7;
                 dg_Dens(st2)[8] = u8;
                 dg_Dens(st2)[9] = u9;
                 if(debug == YES)
                 {
                     printf("reconstructed 3rd degree coef %g %g %g %g\n", 
                             u6, u7, u8, u9);
                     // printf("EXIT LARGE_STEN_limiting_3rd_degreeP3()\n");
                     // clean_up(0);
                 }
             break;
             case 1:
                 dg_Mom(st2)[0][6] = u6;
                 dg_Mom(st2)[0][7] = u7;
                 dg_Mom(st2)[0][8] = u8;
                 dg_Mom(st2)[0][9] = u9;
             break;
             case 2:
                 dg_Mom(st2)[1][6] = u6;
                 dg_Mom(st2)[1][7] = u7;
                 dg_Mom(st2)[1][8] = u8;
                 dg_Mom(st2)[1][9] = u9;
             break;
             case 3:
                 dg_Energy(st2)[6] = u6;
                 dg_Energy(st2)[7] = u7;
                 dg_Energy(st2)[8] = u8;
                 dg_Energy(st2)[9] = u9;
             break;
             }
         }

}

// Reconstructed coeffs. are stored in RK_STEP[0]
LOCAL void LARGE_STEN_limiting_2nd_degreeP3(
         TRI       *tri,
         TRI       *nbtri[30],
         int       sten_n,
         TRI       *edgetri[3],
         Mid_soln  *midsoln,
         int       rk_iter)
{
         Locstate st, nbst[30], st2, nbst2[30];
         float    uxave[4], nbuxave[30][8], uyave[4], nbuyave[30][8];
         float    Rxave[4], nbRxave[30][8], Ryave[4], nbRyave[30][8];
         float    Lxave[4], nbLxave[30][8], Lyave[4], nbLyave[30][8];
         int      i, dim = 2, indx, k, j, l;
         double    *cent, *nbcent[30];
         float    rside[3], Ax[30][2][2], Ay[30][2][2], rside2[3];
         float    coefx[30][2], coefy[30][2];
         float    u3, u4, u5, u4_0, u4_1, avg1, avg2, wx[90], wy[90], arrya[90], arryb[90], w[90];
         double **Lmass_matrix = tri->Lmass_matrix;
         float    eps = 1.5, diam; 
	 // float    sqr_diam, cub_diam; // 0.5, 0.8 for all_cent
         int      idir, is_bad_stenx[30], is_bad_steny[30];
         float    c_num_x[30], c_num_y[30];
         int      debug = NO;
         float    tmpnbLxave[3][4], tmpnbLyave[3][4];
         float    area = fg_area(tri);

         if(rk_iter == RK_STEP)
         {
             st = tri->st;
             for(i = 0; i < sten_n; i++)
                 nbst[i] = nbtri[i]->st;
         }
         else
         {
             st = midsoln[tri->id].st[rk_iter];
             for(i = 0; i < sten_n; i++)
                 nbst[i] = midsoln[nbtri[i]->id].st[rk_iter];
         }

         /**
         if(3 == tri->id)
         {
             printf("\n\n---------*******************--------------\n");
             printf("TRI[%d] entered LARGE_STEN_limiting_2nd_degreeP3\n",tri->id);
             // print_tri_crds(tri);
             debug = YES;
         }
         **/

         st2 = midsoln[tri->id].st[0];
         diam = fg_diam(tri);
         // sqr_diam = sqr(diam);
         // cub_diam = cub(diam);

         u_average_indx(tri,st,1,uxave);
         u_average_indx(tri,st,2,uyave);
         R_degree3_term_averageP3(tri,st,tri->Lmass_matrix,0,1.0,Rxave);
         R_degree3_term_averageP3(tri,st,tri->Lmass_matrix,1,1.0,Ryave);
         for(k = 0; k < N_EQN; k++)
         {
             uxave[k] += Rxave[k];
             uyave[k] += Ryave[k];
         }
         R_degree3_term_averageP3(tri,st2,tri->Lmass_matrix,0,1.0,Rxave);
         R_degree3_term_averageP3(tri,st2,tri->Lmass_matrix,1,1.0,Ryave);

         // sten_n neighbor tris
         for(i = 0; i < sten_n; i++)
         {
             u_average_indx(nbtri[i],nbst[i],1,nbuxave[i]);
             u_average_indx(nbtri[i],nbst[i],2,nbuyave[i]);
             R_degree3_term_averageP3(nbtri[i],nbst[i],nbtri[i]->Lmass_matrix,0,1.0,nbRxave[i]);
             R_degree3_term_averageP3(nbtri[i],nbst[i],nbtri[i]->Lmass_matrix,1,1.0,nbRyave[i]);
             for(k = 0; k < N_EQN; k++)
             {
                 nbuxave[i][k] += nbRxave[i][k];
                 nbuyave[i][k] += nbRyave[i][k];
             }

             comp_mass_matrix_1st_row(MAX_N_COEF,nbtri[i],dim,fg_centroid(tri),mass_1st_rows[i]);
             R_degree3_term_averageP3(nbtri[i],st2,mass_1st_rows[i],0,1.0,nbRxave[i]);
             R_degree3_term_averageP3(nbtri[i],st2,mass_1st_rows[i],1,1.0,nbRyave[i]);
         }
         for(k = 0; k < N_EQN; k++)
         {
             Lxave[k] = uxave[k]-Rxave[k];
             Lyave[k] = uyave[k]-Ryave[k];
             for(i = 0; i < sten_n; i++)
             {
                 nbLxave[i][k] = nbuxave[i][k]-nbRxave[i][k];
                 nbLyave[i][k] = nbuyave[i][k]-nbRyave[i][k];
             }
         }
         l = 0;
         for(i = 0; i < sten_n; i++)
         {
             for(j = 0; j < 3; j++)
             {
                 if(nbtri[i] == edgetri[j])
                 {
                     for(k = 0; k < N_EQN; k++)
                     {
                         tmpnbLxave[l][k] = nbLxave[i][k];
                         tmpnbLyave[l][k] = nbLyave[i][k];
                     }
                     l++;
                 }
             }
         }
         NEW_extrema_detec(Lxave,nbLxave,sten_n,is_bad_stenx);
         NEW_extrema_detec(Lyave,nbLyave,sten_n,is_bad_steny);
         // for(k = 0; k < N_EQN; k++)
         //    is_bad_stenx[k] = is_bad_steny[k] = NO;
         // NEW_extrema_detec(Lxave,tmpnbLxave,3,is_bad_stenx);
         // NEW_extrema_detec(Lyave,tmpnbLyave,3,is_bad_steny);

         cent = fg_centroid(tri);
         for(i = 0; i < sten_n; i++)
             nbcent[i] = fg_centroid(nbtri[i]);

         for(i = 0; i < sten_n; i++)
         {
             Ax[i][0][0] = 2.0*(nbcent[i][0]-cent[0]);
             Ax[i][0][1] =     (nbcent[i][1]-cent[1]);
             Ax[i][1][0] = 2.0*(nbcent[(i+1)%sten_n][0]-cent[0]);
             Ax[i][1][1] =     (nbcent[(i+1)%sten_n][1]-cent[1]);
             c_num_x[i] = cond_num(Ax[i]); // c_num_x[i] = 1.0;
             Ay[i][0][0] =     (nbcent[i][0]-cent[0]);
             Ay[i][0][1] = 2.0*(nbcent[i][1]-cent[1]);
             Ay[i][1][0] =     (nbcent[(i+1)%sten_n][0]-cent[0]);
             Ay[i][1][1] = 2.0*(nbcent[(i+1)%sten_n][1]-cent[1]);
             c_num_y[i] = cond_num(Ay[i]); // c_num_y[i] = 1.0;
         }

         for(k = 0; k < N_EQN; k++)
         {
             for(i = 0; i < sten_n; i++)
             {
                 rside[0] = nbLxave[i][k] - Lxave[k];
                 rside[1] = nbLxave[(i+1)%sten_n][k] - Lxave[k];
                 comp_coef(Ax[i],rside,coefx[i]);

                 rside[0] = nbLyave[i][k] - Lyave[k];
                 rside[1] = nbLyave[(i+1)%sten_n][k] - Lyave[k];
                 comp_coef(Ay[i],rside,coefy[i]);
             }
             for(i = 0; i < sten_n; i++)
             {
                 arrya[i] = coefx[i][0];
                 arryb[i] = coefx[i][1];
             }
             WENO_mod_on_2nd(arrya,arryb,c_num_x,sten_n,1.0,w);
             // area_WENO_mod_on_2nd(arrya,arryb,c_num_x,sten_n,1.0,area,w);
             u3 = u4_0 = 0.0;
             for(i = 0; i < sten_n; i++)
             {
                 u3   += w[i]*coefx[i][0];
                 u4_0 += w[i]*coefx[i][1];
             }
             if(is_bad_stenx[k] == YES)
             {
                 u3 = 0.0; u4_0 = 0.0;
             }

             for(i = 0; i < sten_n; i++)
             {
                 arrya[i] = coefy[i][0];
                 arryb[i] = coefy[i][1];
             }
             WENO_mod_on_2nd(arrya,arryb,c_num_y,sten_n,1.0,w);
             // area_WENO_mod_on_2nd(arrya,arryb,c_num_y,sten_n,1.0,area,w);
             u4_1 = u5 = 0.0;
             for(i = 0; i < sten_n; i++)
             {
                 u4_1 += w[i]*coefy[i][0];
                 u5   += w[i]*coefy[i][1];
             }

             if(is_bad_steny[k] == YES)
             {
                 u4_1 = 0.0; u5 = 0.0;
             }
             u4 = minmod2((1+0.05)*minmod2(u4_0,u4_1), 0.5*(u4_0 + u4_1)); // 0.05, 0.01
             switch(k)
             {
             case 0:
                 dg_Dens(st2)[3] = u3;
                 dg_Dens(st2)[4] = u4;
                 dg_Dens(st2)[5] = u5;
                 // TMP
                 // printf("fixed u_xx, u_xy, u_yy origin coeff: %g %g %g\n\n",
                 //    dg_Dens(st2)[3], dg_Dens(st2)[4],dg_Dens(st2)[5]);
                 // END TMP
                 if(debug == YES)
                 {
                     printf("fixed 2nd degree coeff = %g, %g, %g\n", u3, u4, u5);
                 }
             break;
             case 1:
                 dg_Mom(st2)[0][3] = u3;
                 dg_Mom(st2)[0][4] = u4;
                 dg_Mom(st2)[0][5] = u5;
             break;
             case 2:
                 dg_Mom(st2)[1][3] = u3;
                 dg_Mom(st2)[1][4] = u4;
                 dg_Mom(st2)[1][5] = u5;
             break;
             case 3:
                 dg_Energy(st2)[3] = u3;
                 dg_Energy(st2)[4] = u4;
                 dg_Energy(st2)[5] = u5;
             break;
             }
         }
}

LOCAL void LARGE_STEN_limiting_1st_degreeP3(
         TRI       *tri,
         TRI       *nbtri[30],
         int       sten_n,
         TRI       *edgetri[3],
         Mid_soln  *midsoln,
         int       rk_iter,
         int       comput_mat)
{
         Locstate st, nbst[30], st2, nbst2[30];
         float    uave[4], nbuave[30][4];
         float    Rave[4], nbRave[30][4];
         float    Lave[4], nbLave[30][8];
         int      i, dim = 2, indx, k, is_bad_sten[40], j, l;
         double    *cent, *nbcent[30];
         float    rside[3], A[30][2][2], rside2[3];
         float    coef[30][2];
         float    u0, u1, u2, avg1, avg2, w[30], arrya[30], arryb[30];
         double **Lmass_matrix = tri->Lmass_matrix;
         float    eps = 0.7; // 0.5, 0.1: over-smooth???, 0.7 for all cent
         float    c_num[30], diam;
         int      debug = NO;
         float    tmpnbLave[3][4];
         float    area= fg_area(tri);

         if(rk_iter == RK_STEP)
         {
             st = tri->st;
             for(i = 0; i < sten_n; i++)
                 nbst[i] = nbtri[i]->st;
         }
         else
         {
             st = midsoln[tri->id].st[rk_iter];
             for(i = 0; i < sten_n; i++)
                 nbst[i] = midsoln[nbtri[i]->id].st[rk_iter];
         }
         st2 = midsoln[tri->id].st[0];

         /**
         if(3 == tri->id)
         {
             printf("\n\n---------*******************--------------\n");
             printf("TRI[%d] entered LARGE_STEN_limiting_1st_degreeP3\n",tri->id);
             // print_tri_crds(tri);
             debug = YES;
         }
         **/

         uave[0] = Dens(st);
         uave[1] = Mom(st)[0];
         uave[2] = Mom(st)[1];
         uave[3] = Energy(st);
         R_degree2_above_term_averageP3(tri,st2,tri->Lmass_matrix,Rave);

         // sten_n neighbor tris
         if(comput_mat == YES)
         {
             for(i = 0; i < sten_n; i++)
                 comp_mass_matrix_1st_row(MAX_N_COEF,nbtri[i],dim,fg_centroid(tri),mass_1st_rows[i]);
         }

         for(i = 0; i < sten_n; i++)
         {
             nbuave[i][0] = Dens(nbst[i]);
             nbuave[i][1] = Mom(nbst[i])[0];
             nbuave[i][2] = Mom(nbst[i])[1];
             nbuave[i][3] = Energy(nbst[i]);
             R_degree2_above_term_averageP3(nbtri[i],st2,mass_1st_rows[i],nbRave[i]);
         }
         for(k = 0; k < N_EQN; k++)
         {
             Lave[k] = uave[k]-Rave[k];
             for(i = 0; i < sten_n; i++)
                 nbLave[i][k] = nbuave[i][k]-nbRave[i][k];
         }
         l = 0;
         for(i = 0; i < sten_n; i++)
         {
             for(j = 0; j < 3; j++)
             {
                 if(nbtri[i] == edgetri[j])
                 {
                     for(k = 0; k < N_EQN; k++)
                         tmpnbLave[l][k] = nbLave[i][k];
                     l++;
                 }
             }
         }
         // NEW_extrema_detec(Lave,tmpnbLave,3,is_bad_sten);

         NEW_extrema_detec(Lave,nbLave,sten_n,is_bad_sten);
         // for(k = 0; k < N_EQN; k++)
         //    is_bad_sten[k] = NO;

         cent = fg_centroid(tri);
         for(i = 0; i < sten_n; i++)
             nbcent[i] = fg_centroid(nbtri[i]);
         diam = (fg_diam(tri));

         for(i = 0; i < sten_n; i++)
         {
             A[i][0][0] = (nbcent[i][0]-cent[0]);
             A[i][0][1] = (nbcent[i][1]-cent[1]);
             A[i][1][0] = (nbcent[(i+1)%sten_n][0]-cent[0]);
             A[i][1][1] = (nbcent[(i+1)%sten_n][1]-cent[1]);
             c_num[i] = cond_num(A[i]); // c_num[i] =1.0;
         }

         for(k = 0; k < N_EQN; k++)
         {
             // linear part of polynomial
             for(i = 0; i < sten_n; i++)
             {
                 rside[0] = nbLave[i][k] - Lave[k];
                 rside[1] = nbLave[(i+1)%sten_n][k] - Lave[k];
                 comp_coef(A[i],rside,coef[i]);
             }
             for(i = 0; i < sten_n; i++)
             {
                 arrya[i] = coef[i][0];
                 arryb[i] = coef[i][1];
             }
             WENO_mod_1(arrya, arryb, c_num, sten_n, w);
             // area_WENO_mod_1(arrya, arryb, c_num, sten_n, area, w);
             u1 = u2 = 0.0;
             for(i = 0; i < sten_n; i++)
             {
                 u1 += w[i]*coef[i][0];
                 u2 += w[i]*coef[i][1];
             }
             if(is_bad_sten[k] == YES)
             {
                 u1 = u2 = 0.0;
             }
             ////////// Zero moment //////////////
             u0 = Lave[k];

             switch(k)
             {
             case 0:
                 dg_Dens(st2)[0] = u0;
                 dg_Dens(st2)[1] = u1;
                 dg_Dens(st2)[2] = u2;
                 if(debug == YES)
                 {
                     printf("fixed 0th & 1st degree ceof = %g %g %g\n",
                           u0, u1, u2); 
                 }
             break;
             case 1:
                 dg_Mom(st2)[0][0] = u0;
                 dg_Mom(st2)[0][1] = u1;
                 dg_Mom(st2)[0][2] = u2;
             break;
             case 2:
                 dg_Mom(st2)[1][0] = u0;
                 dg_Mom(st2)[1][1] = u1;
                 dg_Mom(st2)[1][2] = u2;
             break;
             case 3:
                 dg_Energy(st2)[0] = u0;
                 dg_Energy(st2)[1] = u1;
                 dg_Energy(st2)[2] = u2;
             break;
             }
         }
         Dens(st2) = Dens(st);
         Mom(st2)[0] = Mom(st)[0];
         Mom(st2)[1] = Mom(st)[1];
         Energy(st2) = Energy(st);

         if(N_EQN == 4 && YES == unphysical_st_at_quadrature(tri, st2))
             tri->redo_limiting = YES;
         else
             tri->redo_limiting = NO;

         if(debug == YES)
         {
             verbose_print_state("fixed state", st2);
         }
}

EXPORT void NEW_extrema_detec(
         float     *uave,
         float     nbuave[][8],
         int       N,
         int       *is_bad)
{
         int      i, j;
         float    umax[N_EQN], umin[N_EQN];

         for(i = 0; i < N_EQN; i++)
         {
             umax[i] = umin[i] = uave[i];
             is_bad[i] = NO;
         }

         for(j = 0; j < N; j++)
         {
             for(i = 0; i < N_EQN; i++)
             {
                 if(nbuave[j][i] > umax[i])
                     umax[i] = nbuave[j][i];
                 if(nbuave[j][i] < umin[i])
                     umin[i] = nbuave[j][i];
             }
         }

         for(i = 0; i < N_EQN; i++)
         {
             if(uave[i] >= umax[i] || uave[i] <= umin[i])
                 is_bad[i] = YES;
         }
}

// Limiting the soln in interior
LIB_LOCAL void LARGE_STEN_limiting_soln_with_buffer_tris(
        Front    *fr,
        Mid_soln *midsoln,
        int      rk_step,
        int      detect_extr,
        int      comput_time,
        Limiting_store **store)
{
        RECT_GRID *gr = fr->rect_grid;
        float     *L = gr->L, *U = gr->U;
        TRI       *tri, *crsp_tri, *nbtri[3], *sten_tri[30];
        SURFACE   **surf;
        int       dim = 2, i, sten_tri_num;
        double     *cent;
        size_t    sizest = fr->sizest;
        Locstate  st;
        float     max_dt;

        if(mass_1st_row == NULL)
        {
            matrix(&mass_1st_row, 1, MAX_N_COEF,sizeof(double));
            for(i = 0; i < 30; i++)
                matrix(&mass_1st_rows[i], 1, MAX_N_COEF,sizeof(double));
        }

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf);
             !at_end_of_tri_list(tri,*surf); tri = tri->next)
            {
                cent = fg_centroid(tri);
                if(tri->BC_type == SUBDOMAIN)
                    continue;

                if(Boundary_tri(tri) || tri_on_phy_bdry(tri))
                {
                    if(debugging("twod_riemann"))
                    {
                        attach_buffer_states(fr,midsoln,rk_step,tri,store);
                    }
                    else
                    {
                        if(rk_step == RK_STEP)
                            assign(midsoln[tri->id].st[0],tri->st,fr->sizest);
                        else
                            assign(midsoln[tri->id].st[0],midsoln[tri->id].st[rk_step],fr->sizest);
                        continue;
                    }
                }
                for(i = 0; i < 3; i++)
                    nbtri[i] = Tri_on_side(tri,i);
                // limiting the 2nd degree coefficients
                if(MAX_N_COEF == 10)
                {
                    // limiting_3rd_degreeP3(tri,nbtri,midsoln,rk_step,detect_extr);
                    // limiting_2nd_degreeP3(tri,nbtri,midsoln,rk_step);
                    // limiting_1st_degreeP3(tri,nbtri,midsoln,rk_step,NO);

                    set_HR_sten(nbtri,tri,sten_tri,&sten_tri_num);
                    LARGE_STEN_limiting_3rd_degreeP3(tri,sten_tri,
                             sten_tri_num,nbtri,midsoln,rk_step,detect_extr);
                    LARGE_STEN_limiting_2nd_degreeP3(tri,sten_tri,
                             sten_tri_num,nbtri,midsoln,rk_step);
                    LARGE_STEN_limiting_1st_degreeP3(tri,sten_tri,
                             sten_tri_num,nbtri,midsoln,rk_step,NO);
                }
                else if(MAX_N_COEF == 6)
                {
                    // limiting_2nd_degree(tri,nbtri,midsoln,rk_step);
                    // limiting_1st_degree(tri,nbtri,midsoln,rk_step);
                }
            }
        }

        if(debugging("Sod") || debugging("Lax"))
        {
            update_buffer_x_peri(fr,midsoln,0,store);
        }
        else if(debugging("shock_vortex"))
        {
            update_buffer_x_ref(fr,midsoln,0,store);
        }
        else if(debugging("v_evo") || debugging("Burgers"))
        {
            update_buffer(fr,midsoln,0,store);
            // update_buffer_x_per_y_ref(fr,midsoln,0);
            // update_buffer_x_ref(fr,midsoln,0);
            // update_buffer_x_peri(fr,midsoln,0);
        }

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf);
                 !at_end_of_tri_list(tri,*surf); tri = tri->next)
            {
                if(tri->BC_type == SUBDOMAIN)
                    continue;

                if(Boundary_tri(tri) || tri_on_phy_bdry(tri))
                {
                    if(debugging("g_sine") || debugging("v_evo")
                       || debugging("Burgers") || debugging("twod_riemann"))
                    {
                        NULL;
                    }
                    else
                    {
                        zero_moments(midsoln[tri->id].st[0]);
                    }
                    if(debugging("twod_riemann"))
                    {
                        NULL;
                    }
                    else
                    {
                        if(rk_step == RK_STEP)
                            st = tri->st;
                        else
                            st = midsoln[tri->id].st[rk_step];
                        assign(st,midsoln[tri->id].st[0],fr->sizest);
                        if(rk_step == RK_STEP)
                        {
                            max_dt = (*fr->_time_step_on_tri)(fr, tri);
                            if(max_dt < newdt)
                                   time_on_tri = tri;
                            newdt = min(newdt, max_dt);
                        }
                        continue;
                    }
                }
                // update coefficient
                update_coef(tri,midsoln,rk_step,fr,fr->sizest,comput_time);
            }
        }
}

/// the convenience of programming.
LIB_LOCAL int area_WENO_mod_on_3rd(
        float  *a,        
        float  *b,
        float  *angle,
        int    n, 
        float  h,
        float  area,
        float  *w)
{
        float  sum = 0.0, wtmp[10], d;
        int    i, j;
        float  eps = 0.000001;
        float  da[10];

        sum = 0.0; eps = 0.0001;
        for(i = 0; i < n; i++)
        {
            da[i] = 1.0/(angle[i]); 
            // wtmp[i] = da[i]/(1.0 + h*sqr(sqr(a[i]) + sqr(b[i])));
            wtmp[i] = da[i]/sqr(eps + area*(sqr(a[i]) + sqr(b[i])));
            // wtmp[i] = da[i]/(1.0 + h*sqr(sqr(a[i]) + sqr(b[i])));
            // wtmp[i] = da[i]/(1.0 + h*(fabs(a[i])*sqr(a[i]) + fabs(b[i])*sqr(b[i])));
            sum += wtmp[i];
        }
        for(i = 0; i < n; i++)
            w[i] = wtmp[i]/sum;
}

LIB_LOCAL int area_WENO_mod_on_2nd(
        float  *a,
        float  *b,
        float  *angle,
        int    n,
        float  h,
        float  area,
        float  *w)
{
        float  sum = 0.0, wtmp[10], d;
        int    i, j;
        float  eps = 0.000001;
        float  da[10];

        sum = 0.0; eps = 0.0001;
        for(i = 0; i < n; i++)
        {
            da[i] = 1.0/(angle[i]);
            // wtmp[i] = da[i]/(1.0 + h*(sqr(a[i]) + sqr(b[i])));
            wtmp[i] = da[i]/sqr(eps + area*(sqr(a[i]) + sqr(b[i])));
            sum += wtmp[i];
        }
        for(i = 0; i < n; i++)
            w[i] = wtmp[i]/sum;
}

LIB_LOCAL int area_WENO_mod_1(
        float  *a,
        float  *b,
        float  *angle,
        int     n,
        float  area,
        float  *w)
{
        float  sum = 0.0, wtmp[10], d;
        int    i, j;
        float  eps = 0.000001;
        float  da[10];
        float  ww[10];

        sum = 0.0;  
        for(i = 0; i < n; i++)
        {
            da[i] = 1.0/(angle[i]);
            wtmp[i] = (da[i])/sqr(eps + area*(sqr(a[i]) + sqr(b[i])));
            // wtmp[i] = (da[i])/(eps + (sqr(a[i]) + sqr(b[i])));
            // wtmp[i] = (da[i])/(1.0+sqr((sqr(a[i]) + sqr(b[i]))));
            sum += wtmp[i];
        }
        for(i = 0; i < n; i++)
            w[i] = wtmp[i]/sum;
}


// map_poly_SV_to_CV() applys to P3 case.
EXPORT void map_poly_SV_to_CV_p3(
         TRI       *tri)
{
         int         cv_indx, i;
         float xdiff, ydiff;
         float *cent = fg_centroid(tri);
         int  debug = NO;

         for(cv_indx = 0; cv_indx < N_PART; cv_indx++)
         {
             xdiff = tri->CVcent[cv_indx][0] - cent[0];
             ydiff = tri->CVcent[cv_indx][1] - cent[1];

             tri->cv_soln->cv_dg_rho[cv_indx][0] += tri->cv_soln->cv_dg_rho[cv_indx][1]*xdiff +
                                           tri->cv_soln->cv_dg_rho[cv_indx][2]*ydiff +
                                           tri->cv_soln->cv_dg_rho[cv_indx][3]*sqr(xdiff) +
                                           tri->cv_soln->cv_dg_rho[cv_indx][4]*xdiff*ydiff +
                                           tri->cv_soln->cv_dg_rho[cv_indx][5]*sqr(ydiff);
             
             tri->cv_soln->cv_dg_rho[cv_indx][0] += tri->cv_soln->cv_dg_rho[cv_indx][6]*cub(xdiff);
             tri->cv_soln->cv_dg_rho[cv_indx][0] += tri->cv_soln->cv_dg_rho[cv_indx][7]*sqr(xdiff)*ydiff;
             tri->cv_soln->cv_dg_rho[cv_indx][0] += tri->cv_soln->cv_dg_rho[cv_indx][8]*sqr(ydiff)*xdiff;
             tri->cv_soln->cv_dg_rho[cv_indx][0] += tri->cv_soln->cv_dg_rho[cv_indx][9]*cub(ydiff);

             tri->cv_soln->cv_dg_rho[cv_indx][1] += 2.0*tri->cv_soln->cv_dg_rho[cv_indx][3]*xdiff +
                                           tri->cv_soln->cv_dg_rho[cv_indx][4]*ydiff;

             tri->cv_soln->cv_dg_rho[cv_indx][1] += 3.0*tri->cv_soln->cv_dg_rho[cv_indx][6]*sqr(xdiff);
             tri->cv_soln->cv_dg_rho[cv_indx][1] += 2.0*tri->cv_soln->cv_dg_rho[cv_indx][7]*xdiff*ydiff;
             tri->cv_soln->cv_dg_rho[cv_indx][1] += tri->cv_soln->cv_dg_rho[cv_indx][8]*sqr(ydiff);

             tri->cv_soln->cv_dg_rho[cv_indx][2] += tri->cv_soln->cv_dg_rho[cv_indx][4]*xdiff +
                                           2.0*tri->cv_soln->cv_dg_rho[cv_indx][5]*ydiff;

             tri->cv_soln->cv_dg_rho[cv_indx][2] += tri->cv_soln->cv_dg_rho[cv_indx][7]*sqr(xdiff); 
             tri->cv_soln->cv_dg_rho[cv_indx][2] += 2.0*tri->cv_soln->cv_dg_rho[cv_indx][8]*xdiff*ydiff; 
             tri->cv_soln->cv_dg_rho[cv_indx][2] += 3.0*tri->cv_soln->cv_dg_rho[cv_indx][9]*sqr(ydiff);

             tri->cv_soln->cv_dg_rho[cv_indx][3] += 3.0*tri->cv_soln->cv_dg_rho[cv_indx][6]*xdiff;
             tri->cv_soln->cv_dg_rho[cv_indx][3] += tri->cv_soln->cv_dg_rho[cv_indx][7]*ydiff; 

             tri->cv_soln->cv_dg_rho[cv_indx][4] += 2.0*tri->cv_soln->cv_dg_rho[cv_indx][7]*xdiff; 
             tri->cv_soln->cv_dg_rho[cv_indx][4] += 2.0*tri->cv_soln->cv_dg_rho[cv_indx][8]*ydiff; 

             tri->cv_soln->cv_dg_rho[cv_indx][5] += tri->cv_soln->cv_dg_rho[cv_indx][8]*xdiff; 
             tri->cv_soln->cv_dg_rho[cv_indx][5] += 3.0*tri->cv_soln->cv_dg_rho[cv_indx][9]*ydiff; 

             ///////////////// energy
             tri->cv_soln->cv_dg_e[cv_indx][0] += tri->cv_soln->cv_dg_e[cv_indx][1]*xdiff +
                                           tri->cv_soln->cv_dg_e[cv_indx][2]*ydiff +
                                           tri->cv_soln->cv_dg_e[cv_indx][3]*sqr(xdiff) +
                                           tri->cv_soln->cv_dg_e[cv_indx][4]*xdiff*ydiff +
                                           tri->cv_soln->cv_dg_e[cv_indx][5]*sqr(ydiff);

             tri->cv_soln->cv_dg_e[cv_indx][0] += tri->cv_soln->cv_dg_e[cv_indx][6]*cub(xdiff);
             tri->cv_soln->cv_dg_e[cv_indx][0] += tri->cv_soln->cv_dg_e[cv_indx][7]*sqr(xdiff)*ydiff;
             tri->cv_soln->cv_dg_e[cv_indx][0] += tri->cv_soln->cv_dg_e[cv_indx][8]*sqr(ydiff)*xdiff;
             tri->cv_soln->cv_dg_e[cv_indx][0] += tri->cv_soln->cv_dg_e[cv_indx][9]*cub(ydiff);

             tri->cv_soln->cv_dg_e[cv_indx][1] += 2.0*tri->cv_soln->cv_dg_e[cv_indx][3]*xdiff +
                                           tri->cv_soln->cv_dg_e[cv_indx][4]*ydiff;

             tri->cv_soln->cv_dg_e[cv_indx][1] += 3.0*tri->cv_soln->cv_dg_e[cv_indx][6]*sqr(xdiff);
             tri->cv_soln->cv_dg_e[cv_indx][1] += 2.0*tri->cv_soln->cv_dg_e[cv_indx][7]*xdiff*ydiff;
             tri->cv_soln->cv_dg_e[cv_indx][1] += tri->cv_soln->cv_dg_e[cv_indx][8]*sqr(ydiff);

             tri->cv_soln->cv_dg_e[cv_indx][2] += tri->cv_soln->cv_dg_e[cv_indx][4]*xdiff +
                                           2.0*tri->cv_soln->cv_dg_e[cv_indx][5]*ydiff;

             tri->cv_soln->cv_dg_e[cv_indx][2] += tri->cv_soln->cv_dg_e[cv_indx][7]*sqr(xdiff);
             tri->cv_soln->cv_dg_e[cv_indx][2] += 2.0*tri->cv_soln->cv_dg_e[cv_indx][8]*xdiff*ydiff;
             tri->cv_soln->cv_dg_e[cv_indx][2] += 3.0*tri->cv_soln->cv_dg_e[cv_indx][9]*sqr(ydiff);

             tri->cv_soln->cv_dg_e[cv_indx][3] += 3.0*tri->cv_soln->cv_dg_e[cv_indx][6]*xdiff;
             tri->cv_soln->cv_dg_e[cv_indx][3] += tri->cv_soln->cv_dg_e[cv_indx][7]*ydiff;

             tri->cv_soln->cv_dg_e[cv_indx][4] += 2.0*tri->cv_soln->cv_dg_e[cv_indx][7]*xdiff;
             tri->cv_soln->cv_dg_e[cv_indx][4] += 2.0*tri->cv_soln->cv_dg_e[cv_indx][8]*ydiff;

             tri->cv_soln->cv_dg_e[cv_indx][5] += tri->cv_soln->cv_dg_e[cv_indx][8]*xdiff;
             tri->cv_soln->cv_dg_e[cv_indx][5] += 3.0*tri->cv_soln->cv_dg_e[cv_indx][9]*ydiff;

             //////////////// mom[0]
             tri->cv_soln->cv_dg_m[0][cv_indx][0] += tri->cv_soln->cv_dg_m[0][cv_indx][1]*xdiff +
                                           tri->cv_soln->cv_dg_m[0][cv_indx][2]*ydiff +
                                           tri->cv_soln->cv_dg_m[0][cv_indx][3]*sqr(xdiff) +
                                           tri->cv_soln->cv_dg_m[0][cv_indx][4]*xdiff*ydiff +
                                           tri->cv_soln->cv_dg_m[0][cv_indx][5]*sqr(ydiff);

             tri->cv_soln->cv_dg_m[0][cv_indx][0] += tri->cv_soln->cv_dg_m[0][cv_indx][6]*cub(xdiff);
             tri->cv_soln->cv_dg_m[0][cv_indx][0] += tri->cv_soln->cv_dg_m[0][cv_indx][7]*sqr(xdiff)*ydiff;
             tri->cv_soln->cv_dg_m[0][cv_indx][0] += tri->cv_soln->cv_dg_m[0][cv_indx][8]*sqr(ydiff)*xdiff;
             tri->cv_soln->cv_dg_m[0][cv_indx][0] += tri->cv_soln->cv_dg_m[0][cv_indx][9]*cub(ydiff);

             tri->cv_soln->cv_dg_m[0][cv_indx][1] += 2.0*tri->cv_soln->cv_dg_m[0][cv_indx][3]*xdiff +
                                           tri->cv_soln->cv_dg_m[0][cv_indx][4]*ydiff;

             tri->cv_soln->cv_dg_m[0][cv_indx][1] += 3.0*tri->cv_soln->cv_dg_m[0][cv_indx][6]*sqr(xdiff);
             tri->cv_soln->cv_dg_m[0][cv_indx][1] += 2.0*tri->cv_soln->cv_dg_m[0][cv_indx][7]*xdiff*ydiff;
             tri->cv_soln->cv_dg_m[0][cv_indx][1] += tri->cv_soln->cv_dg_m[0][cv_indx][8]*sqr(ydiff);

             tri->cv_soln->cv_dg_m[0][cv_indx][2] += tri->cv_soln->cv_dg_m[0][cv_indx][4]*xdiff +
                                           2.0*tri->cv_soln->cv_dg_m[0][cv_indx][5]*ydiff;

             tri->cv_soln->cv_dg_m[0][cv_indx][2] += tri->cv_soln->cv_dg_m[0][cv_indx][7]*sqr(xdiff);
             tri->cv_soln->cv_dg_m[0][cv_indx][2] += 2.0*tri->cv_soln->cv_dg_m[0][cv_indx][8]*xdiff*ydiff;
             tri->cv_soln->cv_dg_m[0][cv_indx][2] += 3.0*tri->cv_soln->cv_dg_m[0][cv_indx][9]*sqr(ydiff);

             tri->cv_soln->cv_dg_m[0][cv_indx][3] += 3.0*tri->cv_soln->cv_dg_m[0][cv_indx][6]*xdiff;
             tri->cv_soln->cv_dg_m[0][cv_indx][3] += tri->cv_soln->cv_dg_m[0][cv_indx][7]*ydiff;

             tri->cv_soln->cv_dg_m[0][cv_indx][4] += 2.0*tri->cv_soln->cv_dg_m[0][cv_indx][7]*xdiff;
             tri->cv_soln->cv_dg_m[0][cv_indx][4] += 2.0*tri->cv_soln->cv_dg_m[0][cv_indx][8]*ydiff;

             tri->cv_soln->cv_dg_m[0][cv_indx][5] += tri->cv_soln->cv_dg_m[0][cv_indx][8]*xdiff;
             tri->cv_soln->cv_dg_m[0][cv_indx][5] += 3.0*tri->cv_soln->cv_dg_m[0][cv_indx][9]*ydiff;

             //////////////// mom[1]
             tri->cv_soln->cv_dg_m[1][cv_indx][0] += tri->cv_soln->cv_dg_m[1][cv_indx][1]*xdiff +
                                           tri->cv_soln->cv_dg_m[1][cv_indx][2]*ydiff +
                                           tri->cv_soln->cv_dg_m[1][cv_indx][3]*sqr(xdiff) +
                                           tri->cv_soln->cv_dg_m[1][cv_indx][4]*xdiff*ydiff +
                                           tri->cv_soln->cv_dg_m[1][cv_indx][5]*sqr(ydiff);

             tri->cv_soln->cv_dg_m[1][cv_indx][0] += tri->cv_soln->cv_dg_m[1][cv_indx][6]*cub(xdiff);
             tri->cv_soln->cv_dg_m[1][cv_indx][0] += tri->cv_soln->cv_dg_m[1][cv_indx][7]*sqr(xdiff)*ydiff;
             tri->cv_soln->cv_dg_m[1][cv_indx][0] += tri->cv_soln->cv_dg_m[1][cv_indx][8]*sqr(ydiff)*xdiff;
             tri->cv_soln->cv_dg_m[1][cv_indx][0] += tri->cv_soln->cv_dg_m[1][cv_indx][9]*cub(ydiff);

             tri->cv_soln->cv_dg_m[1][cv_indx][1] += 2.0*tri->cv_soln->cv_dg_m[1][cv_indx][3]*xdiff +
                                           tri->cv_soln->cv_dg_m[1][cv_indx][4]*ydiff;

             tri->cv_soln->cv_dg_m[1][cv_indx][1] += 3.0*tri->cv_soln->cv_dg_m[1][cv_indx][6]*sqr(xdiff);
             tri->cv_soln->cv_dg_m[1][cv_indx][1] += 2.0*tri->cv_soln->cv_dg_m[1][cv_indx][7]*xdiff*ydiff;
             tri->cv_soln->cv_dg_m[1][cv_indx][1] += tri->cv_soln->cv_dg_m[1][cv_indx][8]*sqr(ydiff);

             tri->cv_soln->cv_dg_m[1][cv_indx][2] += tri->cv_soln->cv_dg_m[1][cv_indx][4]*xdiff +
                                           2.0*tri->cv_soln->cv_dg_m[1][cv_indx][5]*ydiff;

             tri->cv_soln->cv_dg_m[1][cv_indx][2] += tri->cv_soln->cv_dg_m[1][cv_indx][7]*sqr(xdiff);
             tri->cv_soln->cv_dg_m[1][cv_indx][2] += 2.0*tri->cv_soln->cv_dg_m[1][cv_indx][8]*xdiff*ydiff;
             tri->cv_soln->cv_dg_m[1][cv_indx][2] += 3.0*tri->cv_soln->cv_dg_m[1][cv_indx][9]*sqr(ydiff);

             tri->cv_soln->cv_dg_m[1][cv_indx][3] += 3.0*tri->cv_soln->cv_dg_m[1][cv_indx][6]*xdiff;
             tri->cv_soln->cv_dg_m[1][cv_indx][3] += tri->cv_soln->cv_dg_m[1][cv_indx][7]*ydiff;

             tri->cv_soln->cv_dg_m[1][cv_indx][4] += 2.0*tri->cv_soln->cv_dg_m[1][cv_indx][7]*xdiff;
             tri->cv_soln->cv_dg_m[1][cv_indx][4] += 2.0*tri->cv_soln->cv_dg_m[1][cv_indx][8]*ydiff;

             tri->cv_soln->cv_dg_m[1][cv_indx][5] += tri->cv_soln->cv_dg_m[1][cv_indx][8]*xdiff;
             tri->cv_soln->cv_dg_m[1][cv_indx][5] += 3.0*tri->cv_soln->cv_dg_m[1][cv_indx][9]*ydiff;
         }
}

// map_poly_SV_to_CV() applys to P3 case.
EXPORT void map_poly_SV_to_CV_p2(
         TRI       *tri)
{
         int         cv_indx, i;
         float xdiff, ydiff;
         float *cent = fg_centroid(tri);
         int  debug = NO;
         double area = fg_area(tri), sqrt_area;

         sqrt_area = sqrt(area);

         for(cv_indx = 0; cv_indx < N_PART; cv_indx++)
         {
             xdiff = (tri->CVcent[cv_indx][0] - cent[0])/sqrt_area;
             ydiff = (tri->CVcent[cv_indx][1] - cent[1])/sqrt_area;

             tri->cv_soln->cv_dg_rho[cv_indx][0] += tri->cv_soln->cv_dg_rho[cv_indx][1]*xdiff +
                                           tri->cv_soln->cv_dg_rho[cv_indx][2]*ydiff +
                                           tri->cv_soln->cv_dg_rho[cv_indx][3]*sqr(xdiff) +
                                           tri->cv_soln->cv_dg_rho[cv_indx][4]*xdiff*ydiff +
                                           tri->cv_soln->cv_dg_rho[cv_indx][5]*sqr(ydiff);
             tri->cv_soln->cv_dg_rho[cv_indx][1] += (2.0*tri->cv_soln->cv_dg_rho[cv_indx][3]*xdiff +
                                           tri->cv_soln->cv_dg_rho[cv_indx][4]*ydiff)/sqrt_area;
             tri->cv_soln->cv_dg_rho[cv_indx][2] += (tri->cv_soln->cv_dg_rho[cv_indx][4]*xdiff +
                                           2.0*tri->cv_soln->cv_dg_rho[cv_indx][5]*ydiff)/sqrt_area;

             tri->cv_soln->cv_dg_e[cv_indx][0] += tri->cv_soln->cv_dg_e[cv_indx][1]*xdiff +
                                           tri->cv_soln->cv_dg_e[cv_indx][2]*ydiff +
                                           tri->cv_soln->cv_dg_e[cv_indx][3]*sqr(xdiff) +
                                           tri->cv_soln->cv_dg_e[cv_indx][4]*xdiff*ydiff +
                                           tri->cv_soln->cv_dg_e[cv_indx][5]*sqr(ydiff);
             tri->cv_soln->cv_dg_e[cv_indx][1] += (2.0*tri->cv_soln->cv_dg_e[cv_indx][3]*xdiff +
                                           tri->cv_soln->cv_dg_e[cv_indx][4]*ydiff)/sqrt_area;
             tri->cv_soln->cv_dg_e[cv_indx][2] += (tri->cv_soln->cv_dg_e[cv_indx][4]*xdiff +
                                           2.0*tri->cv_soln->cv_dg_e[cv_indx][5]*ydiff)/sqrt_area;

             tri->cv_soln->cv_dg_m[0][cv_indx][0] += tri->cv_soln->cv_dg_m[0][cv_indx][1]*xdiff +
                                           tri->cv_soln->cv_dg_m[0][cv_indx][2]*ydiff +
                                           tri->cv_soln->cv_dg_m[0][cv_indx][3]*sqr(xdiff) +
                                           tri->cv_soln->cv_dg_m[0][cv_indx][4]*xdiff*ydiff +
                                           tri->cv_soln->cv_dg_m[0][cv_indx][5]*sqr(ydiff);
             tri->cv_soln->cv_dg_m[0][cv_indx][1] += (2.0*tri->cv_soln->cv_dg_m[0][cv_indx][3]*xdiff +
                                           tri->cv_soln->cv_dg_m[0][cv_indx][4]*ydiff)/sqrt_area;
             tri->cv_soln->cv_dg_m[0][cv_indx][2] += (tri->cv_soln->cv_dg_m[0][cv_indx][4]*xdiff +
                                           2.0*tri->cv_soln->cv_dg_m[0][cv_indx][5]*ydiff)/sqrt_area;

             tri->cv_soln->cv_dg_m[1][cv_indx][0] += tri->cv_soln->cv_dg_m[1][cv_indx][1]*xdiff +
                                           tri->cv_soln->cv_dg_m[1][cv_indx][2]*ydiff +
                                           tri->cv_soln->cv_dg_m[1][cv_indx][3]*sqr(xdiff) +
                                           tri->cv_soln->cv_dg_m[1][cv_indx][4]*xdiff*ydiff +
                                           tri->cv_soln->cv_dg_m[1][cv_indx][5]*sqr(ydiff);
             tri->cv_soln->cv_dg_m[1][cv_indx][1] += (2.0*tri->cv_soln->cv_dg_m[1][cv_indx][3]*xdiff +
                                           tri->cv_soln->cv_dg_m[1][cv_indx][4]*ydiff)/sqrt_area;
             tri->cv_soln->cv_dg_m[1][cv_indx][2] += (tri->cv_soln->cv_dg_m[1][cv_indx][4]*xdiff +
                                           2.0*tri->cv_soln->cv_dg_m[1][cv_indx][5]*ydiff)/sqrt_area;
             if(N_EQN == 8)
             {
                 tri->cv_soln->cv_dg_m[2][cv_indx][0] += tri->cv_soln->cv_dg_m[2][cv_indx][1]*xdiff +
                                           tri->cv_soln->cv_dg_m[2][cv_indx][2]*ydiff +
                                           tri->cv_soln->cv_dg_m[2][cv_indx][3]*sqr(xdiff) +
                                           tri->cv_soln->cv_dg_m[2][cv_indx][4]*xdiff*ydiff +
                                           tri->cv_soln->cv_dg_m[2][cv_indx][5]*sqr(ydiff);
                 tri->cv_soln->cv_dg_m[2][cv_indx][1] += (2.0*tri->cv_soln->cv_dg_m[2][cv_indx][3]*xdiff +
                                           tri->cv_soln->cv_dg_m[2][cv_indx][4]*ydiff)/sqrt_area;
                 tri->cv_soln->cv_dg_m[2][cv_indx][2] += (tri->cv_soln->cv_dg_m[2][cv_indx][4]*xdiff +
                                           2.0*tri->cv_soln->cv_dg_m[2][cv_indx][5]*ydiff)/sqrt_area;
                 for(i = 0; i < 3; i++)
                 {
                     tri->cv_soln->cv_dg_B[i][cv_indx][0] += tri->cv_soln->cv_dg_B[i][cv_indx][1]*xdiff +
                                           tri->cv_soln->cv_dg_B[i][cv_indx][2]*ydiff +
                                           tri->cv_soln->cv_dg_B[i][cv_indx][3]*sqr(xdiff) +
                                           tri->cv_soln->cv_dg_B[i][cv_indx][4]*xdiff*ydiff +
                                           tri->cv_soln->cv_dg_B[i][cv_indx][5]*sqr(ydiff);
                     tri->cv_soln->cv_dg_B[i][cv_indx][1] += (2.0*tri->cv_soln->cv_dg_B[i][cv_indx][3]*xdiff +
                                           tri->cv_soln->cv_dg_B[i][cv_indx][4]*ydiff)/sqrt_area;
                     tri->cv_soln->cv_dg_B[i][cv_indx][2] += (tri->cv_soln->cv_dg_B[i][cv_indx][4]*xdiff +
                                           2.0*tri->cv_soln->cv_dg_B[i][cv_indx][5]*ydiff)/sqrt_area;
                 }
             }
         }
}

EXPORT void map_poly_SV_to_CV_ver2_p2(
         TRI       *tri,
         Limiting_store *limit_store)
{
         int         cv_indx, i;
         float xdiff, ydiff;
         float *cent = fg_centroid(tri);
         int  debug = NO;
         double area = fg_area(tri), sqrt_area; 

         /*
         if(fabs(cent[0] +0.73333333) < 0.000001 && fabs(cent[1]+0.866666666) < 0.000001)
         {
             printf("enter map_poly_SV_to_CV, cent [%g, %g]\n", cent[0], cent[1]);
             debug = YES;
         }
         */
         sqrt_area = sqrt(area);

         for(cv_indx = 0; cv_indx < N_PART; cv_indx++)
         {
             xdiff = (tri->CVcent[cv_indx][0] - cent[0])/sqrt_area;
             ydiff = (tri->CVcent[cv_indx][1] - cent[1])/sqrt_area;

             limit_store[tri->id].cv_dg_rho[cv_indx][0] +=
                                           limit_store[tri->id].cv_dg_rho[cv_indx][1]*xdiff +
                                           limit_store[tri->id].cv_dg_rho[cv_indx][2]*ydiff +
                                           limit_store[tri->id].cv_dg_rho[cv_indx][3]*sqr(xdiff) +
                                           limit_store[tri->id].cv_dg_rho[cv_indx][4]*xdiff*ydiff +
                                           limit_store[tri->id].cv_dg_rho[cv_indx][5]*sqr(ydiff);
             limit_store[tri->id].cv_dg_rho[cv_indx][1] +=
                                           (2.0*limit_store[tri->id].cv_dg_rho[cv_indx][3]*xdiff +
                                            limit_store[tri->id].cv_dg_rho[cv_indx][4]*ydiff)/sqrt_area;
             limit_store[tri->id].cv_dg_rho[cv_indx][2] +=
                                           (limit_store[tri->id].cv_dg_rho[cv_indx][4]*xdiff +
                                            2.0*limit_store[tri->id].cv_dg_rho[cv_indx][5]*ydiff)/sqrt_area;

             limit_store[tri->id].cv_dg_e[cv_indx][0] +=
                                           limit_store[tri->id].cv_dg_e[cv_indx][1]*xdiff +
                                           limit_store[tri->id].cv_dg_e[cv_indx][2]*ydiff +
                                           limit_store[tri->id].cv_dg_e[cv_indx][3]*sqr(xdiff) +
                                           limit_store[tri->id].cv_dg_e[cv_indx][4]*xdiff*ydiff +
                                           limit_store[tri->id].cv_dg_e[cv_indx][5]*sqr(ydiff);
             limit_store[tri->id].cv_dg_e[cv_indx][1] +=
                                           (2.0*limit_store[tri->id].cv_dg_e[cv_indx][3]*xdiff +
                                            limit_store[tri->id].cv_dg_e[cv_indx][4]*ydiff)/sqrt_area;
             limit_store[tri->id].cv_dg_e[cv_indx][2] +=
                                           (limit_store[tri->id].cv_dg_e[cv_indx][4]*xdiff +
                                            2.0*limit_store[tri->id].cv_dg_e[cv_indx][5]*ydiff)/sqrt_area;

             limit_store[tri->id].cv_dg_m[0][cv_indx][0] +=
                                           limit_store[tri->id].cv_dg_m[0][cv_indx][1]*xdiff +
                                           limit_store[tri->id].cv_dg_m[0][cv_indx][2]*ydiff +
                                           limit_store[tri->id].cv_dg_m[0][cv_indx][3]*sqr(xdiff) +
                                           limit_store[tri->id].cv_dg_m[0][cv_indx][4]*xdiff*ydiff +
                                           limit_store[tri->id].cv_dg_m[0][cv_indx][5]*sqr(ydiff);
             limit_store[tri->id].cv_dg_m[0][cv_indx][1] +=
                                           (2.0*limit_store[tri->id].cv_dg_m[0][cv_indx][3]*xdiff +
                                            limit_store[tri->id].cv_dg_m[0][cv_indx][4]*ydiff)/sqrt_area;
             limit_store[tri->id].cv_dg_m[0][cv_indx][2] +=
                                           (limit_store[tri->id].cv_dg_m[0][cv_indx][4]*xdiff +
                                            2.0*limit_store[tri->id].cv_dg_m[0][cv_indx][5]*ydiff)/sqrt_area;

             limit_store[tri->id].cv_dg_m[1][cv_indx][0] +=
                                           limit_store[tri->id].cv_dg_m[1][cv_indx][1]*xdiff +
                                           limit_store[tri->id].cv_dg_m[1][cv_indx][2]*ydiff +
                                           limit_store[tri->id].cv_dg_m[1][cv_indx][3]*sqr(xdiff) +
                                           limit_store[tri->id].cv_dg_m[1][cv_indx][4]*xdiff*ydiff +
                                           limit_store[tri->id].cv_dg_m[1][cv_indx][5]*sqr(ydiff);
             limit_store[tri->id].cv_dg_m[1][cv_indx][1] +=
                                           (2.0*limit_store[tri->id].cv_dg_m[1][cv_indx][3]*xdiff +
                                            limit_store[tri->id].cv_dg_m[1][cv_indx][4]*ydiff)/sqrt_area;
             limit_store[tri->id].cv_dg_m[1][cv_indx][2] +=
                                           (limit_store[tri->id].cv_dg_m[1][cv_indx][4]*xdiff +
                                            2.0*limit_store[tri->id].cv_dg_m[1][cv_indx][5]*ydiff)/sqrt_area;

             if(N_EQN == 8)
             {
                 limit_store[tri->id].cv_dg_m[2][cv_indx][0] +=
                                               limit_store[tri->id].cv_dg_m[2][cv_indx][1]*xdiff +
                                               limit_store[tri->id].cv_dg_m[2][cv_indx][2]*ydiff +
                                               limit_store[tri->id].cv_dg_m[2][cv_indx][3]*sqr(xdiff) +
                                               limit_store[tri->id].cv_dg_m[2][cv_indx][4]*xdiff*ydiff +
                                               limit_store[tri->id].cv_dg_m[2][cv_indx][5]*sqr(ydiff);
                 limit_store[tri->id].cv_dg_m[2][cv_indx][1] +=
                                               (2.0*limit_store[tri->id].cv_dg_m[2][cv_indx][3]*xdiff +
                                                limit_store[tri->id].cv_dg_m[2][cv_indx][4]*ydiff)/sqrt_area;
                 limit_store[tri->id].cv_dg_m[2][cv_indx][2] +=
                                               (limit_store[tri->id].cv_dg_m[2][cv_indx][4]*xdiff +
                                                2.0*limit_store[tri->id].cv_dg_m[2][cv_indx][5]*ydiff)/sqrt_area;
                 for(i = 0; i < 3; i++)
                 {
                     limit_store[tri->id].cv_dg_B[i][cv_indx][0] +=
                                               limit_store[tri->id].cv_dg_B[i][cv_indx][1]*xdiff +
                                               limit_store[tri->id].cv_dg_B[i][cv_indx][2]*ydiff +
                                               limit_store[tri->id].cv_dg_B[i][cv_indx][3]*sqr(xdiff) +
                                               limit_store[tri->id].cv_dg_B[i][cv_indx][4]*xdiff*ydiff +
                                               limit_store[tri->id].cv_dg_B[i][cv_indx][5]*sqr(ydiff);
                     limit_store[tri->id].cv_dg_B[i][cv_indx][1] +=
                                               (2.0*limit_store[tri->id].cv_dg_B[i][cv_indx][3]*xdiff +
                                                limit_store[tri->id].cv_dg_B[i][cv_indx][4]*ydiff)/sqrt_area;
                     limit_store[tri->id].cv_dg_B[i][cv_indx][2] +=
                                               (limit_store[tri->id].cv_dg_B[i][cv_indx][4]*xdiff +
                                                2.0*limit_store[tri->id].cv_dg_B[i][cv_indx][5]*ydiff)/sqrt_area;
                 }
             }
         }
}

EXPORT void map_poly_SV_to_CV_ver2_p3(
         TRI       *tri,
         Limiting_store *limit_store)
{
         int         cv_indx, i;
         float xdiff, ydiff;
         float *cent = fg_centroid(tri);
         int  debug = NO;

         for(cv_indx = 0; cv_indx < N_PART; cv_indx++)
         {
             xdiff = tri->CVcent[cv_indx][0] - cent[0];
             ydiff = tri->CVcent[cv_indx][1] - cent[1];

             limit_store[tri->id].cv_dg_rho[cv_indx][0] +=
                                           limit_store[tri->id].cv_dg_rho[cv_indx][1]*xdiff +
                                           limit_store[tri->id].cv_dg_rho[cv_indx][2]*ydiff +
                                           limit_store[tri->id].cv_dg_rho[cv_indx][3]*sqr(xdiff) +
                                           limit_store[tri->id].cv_dg_rho[cv_indx][4]*xdiff*ydiff +
                                           limit_store[tri->id].cv_dg_rho[cv_indx][5]*sqr(ydiff);

             limit_store[tri->id].cv_dg_rho[cv_indx][0] += limit_store[tri->id].cv_dg_rho[cv_indx][6]*cub(xdiff);
             limit_store[tri->id].cv_dg_rho[cv_indx][0] += limit_store[tri->id].cv_dg_rho[cv_indx][7]*sqr(xdiff)*ydiff;
             limit_store[tri->id].cv_dg_rho[cv_indx][0] += limit_store[tri->id].cv_dg_rho[cv_indx][8]*sqr(ydiff)*xdiff;
             limit_store[tri->id].cv_dg_rho[cv_indx][0] += limit_store[tri->id].cv_dg_rho[cv_indx][9]*cub(ydiff);

             limit_store[tri->id].cv_dg_rho[cv_indx][1] +=
                                           2.0*limit_store[tri->id].cv_dg_rho[cv_indx][3]*xdiff +
                                           limit_store[tri->id].cv_dg_rho[cv_indx][4]*ydiff;

             limit_store[tri->id].cv_dg_rho[cv_indx][1] += 3.0*limit_store[tri->id].cv_dg_rho[cv_indx][6]*sqr(xdiff);
             limit_store[tri->id].cv_dg_rho[cv_indx][1] += 2.0*limit_store[tri->id].cv_dg_rho[cv_indx][7]*xdiff*ydiff;
             limit_store[tri->id].cv_dg_rho[cv_indx][1] += limit_store[tri->id].cv_dg_rho[cv_indx][8]*sqr(ydiff);

             limit_store[tri->id].cv_dg_rho[cv_indx][2] +=
                                           limit_store[tri->id].cv_dg_rho[cv_indx][4]*xdiff +
                                           2.0*limit_store[tri->id].cv_dg_rho[cv_indx][5]*ydiff;

             limit_store[tri->id].cv_dg_rho[cv_indx][2] += limit_store[tri->id].cv_dg_rho[cv_indx][7]*sqr(xdiff);
             limit_store[tri->id].cv_dg_rho[cv_indx][2] += 2.0*limit_store[tri->id].cv_dg_rho[cv_indx][8]*xdiff*ydiff;
             limit_store[tri->id].cv_dg_rho[cv_indx][2] += 3.0*limit_store[tri->id].cv_dg_rho[cv_indx][9]*sqr(ydiff);

             limit_store[tri->id].cv_dg_rho[cv_indx][3] += 3.0*limit_store[tri->id].cv_dg_rho[cv_indx][6]*xdiff;
             limit_store[tri->id].cv_dg_rho[cv_indx][3] += limit_store[tri->id].cv_dg_rho[cv_indx][7]*ydiff;

             limit_store[tri->id].cv_dg_rho[cv_indx][4] += 2.0*limit_store[tri->id].cv_dg_rho[cv_indx][7]*xdiff;
             limit_store[tri->id].cv_dg_rho[cv_indx][4] += 2.0*limit_store[tri->id].cv_dg_rho[cv_indx][8]*ydiff;

             limit_store[tri->id].cv_dg_rho[cv_indx][5] += limit_store[tri->id].cv_dg_rho[cv_indx][8]*xdiff;
             limit_store[tri->id].cv_dg_rho[cv_indx][5] += 3.0*limit_store[tri->id].cv_dg_rho[cv_indx][9]*ydiff;

             /////////////////// energy
             limit_store[tri->id].cv_dg_e[cv_indx][0] +=
                                           limit_store[tri->id].cv_dg_e[cv_indx][1]*xdiff +
                                           limit_store[tri->id].cv_dg_e[cv_indx][2]*ydiff +
                                           limit_store[tri->id].cv_dg_e[cv_indx][3]*sqr(xdiff) +
                                           limit_store[tri->id].cv_dg_e[cv_indx][4]*xdiff*ydiff +
                                           limit_store[tri->id].cv_dg_e[cv_indx][5]*sqr(ydiff);

             limit_store[tri->id].cv_dg_e[cv_indx][0] += limit_store[tri->id].cv_dg_e[cv_indx][6]*cub(xdiff);
             limit_store[tri->id].cv_dg_e[cv_indx][0] += limit_store[tri->id].cv_dg_e[cv_indx][7]*sqr(xdiff)*ydiff;
             limit_store[tri->id].cv_dg_e[cv_indx][0] += limit_store[tri->id].cv_dg_e[cv_indx][8]*sqr(ydiff)*xdiff;
             limit_store[tri->id].cv_dg_e[cv_indx][0] += limit_store[tri->id].cv_dg_e[cv_indx][9]*cub(ydiff);

             limit_store[tri->id].cv_dg_e[cv_indx][1] +=
                                           2.0*limit_store[tri->id].cv_dg_e[cv_indx][3]*xdiff +
                                           limit_store[tri->id].cv_dg_e[cv_indx][4]*ydiff;

             limit_store[tri->id].cv_dg_e[cv_indx][1] += 3.0*limit_store[tri->id].cv_dg_e[cv_indx][6]*sqr(xdiff);
             limit_store[tri->id].cv_dg_e[cv_indx][1] += 2.0*limit_store[tri->id].cv_dg_e[cv_indx][7]*xdiff*ydiff;
             limit_store[tri->id].cv_dg_e[cv_indx][1] += limit_store[tri->id].cv_dg_e[cv_indx][8]*sqr(ydiff);

             limit_store[tri->id].cv_dg_e[cv_indx][2] +=
                                           limit_store[tri->id].cv_dg_e[cv_indx][4]*xdiff +
                                           2.0*limit_store[tri->id].cv_dg_e[cv_indx][5]*ydiff;

             limit_store[tri->id].cv_dg_e[cv_indx][2] += limit_store[tri->id].cv_dg_e[cv_indx][7]*sqr(xdiff);
             limit_store[tri->id].cv_dg_e[cv_indx][2] += 2.0*limit_store[tri->id].cv_dg_e[cv_indx][8]*xdiff*ydiff;
             limit_store[tri->id].cv_dg_e[cv_indx][2] += 3.0*limit_store[tri->id].cv_dg_e[cv_indx][9]*sqr(ydiff);

             limit_store[tri->id].cv_dg_e[cv_indx][3] += 3.0*limit_store[tri->id].cv_dg_e[cv_indx][6]*xdiff;
             limit_store[tri->id].cv_dg_e[cv_indx][3] += limit_store[tri->id].cv_dg_e[cv_indx][7]*ydiff;

             limit_store[tri->id].cv_dg_e[cv_indx][4] += 2.0*limit_store[tri->id].cv_dg_e[cv_indx][7]*xdiff;
             limit_store[tri->id].cv_dg_e[cv_indx][4] += 2.0*limit_store[tri->id].cv_dg_e[cv_indx][8]*ydiff;

             limit_store[tri->id].cv_dg_e[cv_indx][5] += limit_store[tri->id].cv_dg_e[cv_indx][8]*xdiff;
             limit_store[tri->id].cv_dg_e[cv_indx][5] += 3.0*limit_store[tri->id].cv_dg_e[cv_indx][9]*ydiff;

             /////////////////// mom[0]
             limit_store[tri->id].cv_dg_m[0][cv_indx][0] +=
                                           limit_store[tri->id].cv_dg_m[0][cv_indx][1]*xdiff +
                                           limit_store[tri->id].cv_dg_m[0][cv_indx][2]*ydiff +
                                           limit_store[tri->id].cv_dg_m[0][cv_indx][3]*sqr(xdiff) +
                                           limit_store[tri->id].cv_dg_m[0][cv_indx][4]*xdiff*ydiff +
                                           limit_store[tri->id].cv_dg_m[0][cv_indx][5]*sqr(ydiff);

             limit_store[tri->id].cv_dg_m[0][cv_indx][0] += limit_store[tri->id].cv_dg_m[0][cv_indx][6]*cub(xdiff);
             limit_store[tri->id].cv_dg_m[0][cv_indx][0] += limit_store[tri->id].cv_dg_m[0][cv_indx][7]*sqr(xdiff)*ydiff;
             limit_store[tri->id].cv_dg_m[0][cv_indx][0] += limit_store[tri->id].cv_dg_m[0][cv_indx][8]*sqr(ydiff)*xdiff;
             limit_store[tri->id].cv_dg_m[0][cv_indx][0] += limit_store[tri->id].cv_dg_m[0][cv_indx][9]*cub(ydiff);

             limit_store[tri->id].cv_dg_m[0][cv_indx][1] +=
                                           2.0*limit_store[tri->id].cv_dg_m[0][cv_indx][3]*xdiff +
                                           limit_store[tri->id].cv_dg_m[0][cv_indx][4]*ydiff;

             limit_store[tri->id].cv_dg_m[0][cv_indx][1] += 3.0*limit_store[tri->id].cv_dg_m[0][cv_indx][6]*sqr(xdiff);
             limit_store[tri->id].cv_dg_m[0][cv_indx][1] += 2.0*limit_store[tri->id].cv_dg_m[0][cv_indx][7]*xdiff*ydiff;
             limit_store[tri->id].cv_dg_m[0][cv_indx][1] += limit_store[tri->id].cv_dg_m[0][cv_indx][8]*sqr(ydiff);

             limit_store[tri->id].cv_dg_m[0][cv_indx][2] +=
                                           limit_store[tri->id].cv_dg_m[0][cv_indx][4]*xdiff +
                                           2.0*limit_store[tri->id].cv_dg_m[0][cv_indx][5]*ydiff;

             limit_store[tri->id].cv_dg_m[0][cv_indx][2] += limit_store[tri->id].cv_dg_m[0][cv_indx][7]*sqr(xdiff);
             limit_store[tri->id].cv_dg_m[0][cv_indx][2] += 2.0*limit_store[tri->id].cv_dg_m[0][cv_indx][8]*xdiff*ydiff;
             limit_store[tri->id].cv_dg_m[0][cv_indx][2] += 3.0*limit_store[tri->id].cv_dg_m[0][cv_indx][9]*sqr(ydiff);

             limit_store[tri->id].cv_dg_m[0][cv_indx][3] += 3.0*limit_store[tri->id].cv_dg_m[0][cv_indx][6]*xdiff;
             limit_store[tri->id].cv_dg_m[0][cv_indx][3] += limit_store[tri->id].cv_dg_m[0][cv_indx][7]*ydiff;

             limit_store[tri->id].cv_dg_m[0][cv_indx][4] += 2.0*limit_store[tri->id].cv_dg_m[0][cv_indx][7]*xdiff;
             limit_store[tri->id].cv_dg_m[0][cv_indx][4] += 2.0*limit_store[tri->id].cv_dg_m[0][cv_indx][8]*ydiff;

             limit_store[tri->id].cv_dg_m[0][cv_indx][5] += limit_store[tri->id].cv_dg_m[0][cv_indx][8]*xdiff;
             limit_store[tri->id].cv_dg_m[0][cv_indx][5] += 3.0*limit_store[tri->id].cv_dg_m[0][cv_indx][9]*ydiff;

             ////////////// mom[1]
             limit_store[tri->id].cv_dg_m[1][cv_indx][0] +=
                                           limit_store[tri->id].cv_dg_m[1][cv_indx][1]*xdiff +
                                           limit_store[tri->id].cv_dg_m[1][cv_indx][2]*ydiff +
                                           limit_store[tri->id].cv_dg_m[1][cv_indx][3]*sqr(xdiff) +
                                           limit_store[tri->id].cv_dg_m[1][cv_indx][4]*xdiff*ydiff +
                                           limit_store[tri->id].cv_dg_m[1][cv_indx][5]*sqr(ydiff);

             limit_store[tri->id].cv_dg_m[1][cv_indx][0] += limit_store[tri->id].cv_dg_m[1][cv_indx][6]*cub(xdiff);
             limit_store[tri->id].cv_dg_m[1][cv_indx][0] += limit_store[tri->id].cv_dg_m[1][cv_indx][7]*sqr(xdiff)*ydiff;
             limit_store[tri->id].cv_dg_m[1][cv_indx][0] += limit_store[tri->id].cv_dg_m[1][cv_indx][8]*sqr(ydiff)*xdiff;
             limit_store[tri->id].cv_dg_m[1][cv_indx][0] += limit_store[tri->id].cv_dg_m[1][cv_indx][9]*cub(ydiff);

             limit_store[tri->id].cv_dg_m[1][cv_indx][1] +=
                                           2.0*limit_store[tri->id].cv_dg_m[1][cv_indx][3]*xdiff +
                                           limit_store[tri->id].cv_dg_m[1][cv_indx][4]*ydiff;

             limit_store[tri->id].cv_dg_m[1][cv_indx][1] += 3.0*limit_store[tri->id].cv_dg_m[1][cv_indx][6]*sqr(xdiff);
             limit_store[tri->id].cv_dg_m[1][cv_indx][1] += 2.0*limit_store[tri->id].cv_dg_m[1][cv_indx][7]*xdiff*ydiff;
             limit_store[tri->id].cv_dg_m[1][cv_indx][1] += limit_store[tri->id].cv_dg_m[1][cv_indx][8]*sqr(ydiff);

             limit_store[tri->id].cv_dg_m[1][cv_indx][2] +=
                                           limit_store[tri->id].cv_dg_m[1][cv_indx][4]*xdiff +
                                           2.0*limit_store[tri->id].cv_dg_m[1][cv_indx][5]*ydiff;

             limit_store[tri->id].cv_dg_m[1][cv_indx][2] += limit_store[tri->id].cv_dg_m[1][cv_indx][7]*sqr(xdiff);
             limit_store[tri->id].cv_dg_m[1][cv_indx][2] += 2.0*limit_store[tri->id].cv_dg_m[1][cv_indx][8]*xdiff*ydiff;
             limit_store[tri->id].cv_dg_m[1][cv_indx][2] += 3.0*limit_store[tri->id].cv_dg_m[1][cv_indx][9]*sqr(ydiff);

             limit_store[tri->id].cv_dg_m[1][cv_indx][3] += 3.0*limit_store[tri->id].cv_dg_m[1][cv_indx][6]*xdiff;
             limit_store[tri->id].cv_dg_m[1][cv_indx][3] += limit_store[tri->id].cv_dg_m[1][cv_indx][7]*ydiff;

             limit_store[tri->id].cv_dg_m[1][cv_indx][4] += 2.0*limit_store[tri->id].cv_dg_m[1][cv_indx][7]*xdiff;
             limit_store[tri->id].cv_dg_m[1][cv_indx][4] += 2.0*limit_store[tri->id].cv_dg_m[1][cv_indx][8]*ydiff;

             limit_store[tri->id].cv_dg_m[1][cv_indx][5] += limit_store[tri->id].cv_dg_m[1][cv_indx][8]*xdiff;
             limit_store[tri->id].cv_dg_m[1][cv_indx][5] += 3.0*limit_store[tri->id].cv_dg_m[1][cv_indx][9]*ydiff;
         }
}

EXPORT void con_u_at_CV_pt(
         TRI      *tri,
         int      cv_indx,
         float    *crds,
         float  *cent,
         float    *con_u)
{
         int      i;
         float    val;

         for(i = 0; i < 4; i++)
             con_u[i] = 0.0;

         for(i = 0; i < MAX_N_COEF; i++)
         {
             val = vh_val(crds,cent,i);

             con_u[0] += tri->cv_soln->cv_dg_rho[cv_indx][i]*val;
             con_u[1] += tri->cv_soln->cv_dg_m[0][cv_indx][i]*val;
             con_u[2] += tri->cv_soln->cv_dg_m[1][cv_indx][i]*val;
             con_u[3] += tri->cv_soln->cv_dg_e[cv_indx][i]*val;
         }
}

EXPORT void con_u_at_CV_pt_from_store(
         TRI      *tri,
         int      cv_indx,
         float    *crds,
         float  *cent,
         Limiting_store *store,
         float    *con_u)
{
         int      i;
         float    val;

         for(i = 0; i < 4; i++)
             con_u[i] = 0.0;

         for(i = 0; i < MAX_N_COEF; i++)
         {
             val = vh_val(crds,cent,i);

             con_u[0] += store[tri->id].cv_dg_rho[cv_indx][i]*val;
             con_u[1] += store[tri->id].cv_dg_m[0][cv_indx][i]*val;
             con_u[2] += store[tri->id].cv_dg_m[1][cv_indx][i]*val;
             con_u[3] += store[tri->id].cv_dg_e[cv_indx][i]*val;
         }
}

// return -1: CV side is in interor of SV
// return SV_side indx if cv_side is on SV bdry
EXPORT int CV_side_on_SV_side(
        int cv_indx,
        int cv_side)
{
        if(cv_indx == 0)
        {
            if(cv_side == 1)
                return -1;
            if(cv_side == 0)
                return 0;
            if(cv_side == 2)
                return 2;
        }
        if(cv_indx == 1)
        {
            if(cv_side == 0)
                return 1;
            if(cv_side == 1)
                return -1;
            return 0;
        }
        if(cv_indx == 2)
        {
            if(cv_side == 1)
                return -1;
            if(cv_side == 0)
                return 2;
            if(cv_side == 3)
                return 1;
        }
        return -1;
}

/// get tri type CV edge adjacent neighbors and
//  and edge adjacent to these neighbors' neighors
/// (also vertex adjacent to CV). In the counter clockwise direction
/// 
EXPORT int get_tri_CV_stencil(
         TRI       *tri,
         TRI       *nbtri[3],
         int       cv_indx,
         int       *nbcv_indx,
         int       *nbcv_side,
         TRI       **tris,
         int       *n_tri)
{
         int    on_SV_side;
         // TRI    *nntri[30];
         int    nn_num, i;
         POINT  *vert = NULL;

         if(cv_indx < 3)
             vert = Point_of_tri(tri)[cv_indx];

         ////nbCV[0] // exterior side
         // on_SV_side = CV_side_on_SV_side(cv_indx, 0);

         if(cv_indx < 3)
         {
             // tris_around_pt(nbtri, tri, cv_indx, tris, &nn_num); 
             // *n_tri = nn_num;
             get_tri_vert_CV_stencil(tri,nbtri,cv_indx,nbcv_indx,nbcv_side,tris,n_tri);
         }
         else
         {
             // tris[0] = tri;
             // for(i = 0; i < 3; i++)
             //     tris[i+1] = nbtri[i];
             // *n_tri = 4;
             get_tri_cent_CV_stencil(tri,nbtri,cv_indx,nbcv_indx,nbcv_side,tris,n_tri);
         }
         return *n_tri;
}

/// get tri-type edge adjacent CV neighbors.
//// The vertex adjacent CVs are also included into the stencil.
///  In the counter clockwise direction
/// Tri is divided into 4 self-similar CVs
/// by connecting edge midpts.
LOCAL int get_tri_edge_vert_CV_stencil(
         TRI       *tri,
         TRI       *nbtri[3],
         int       cv_indx,
         int       *nbcv_indx,
         int       *nbcv_side,
         TRI       **tris,
         int       *n_tri)
{
         int    on_SV_side, tmpnbSV_side, nbSV_side, next_side, prevnvSV_side;
         TRI    *nntri[40], *tmptri, *prevtri = NULL;
         int    nn_num, cv_count, i;

         for(nbSV_side = 0; nbSV_side < 3; nbSV_side++)
         {
             if(tri == Tri_on_side(nbtri[0],nbSV_side))
                 break;
         }
         tris[0] = nbtri[0];
         nbcv_indx[0] = (nbSV_side+1)%3;
         nbcv_side[0] = nbSV_side;
         tris[1] = nbtri[0];
         nbcv_indx[1] = nbSV_side;
         nbcv_side[1] = nbSV_side;

         ////////////////////  get the neighbor of the previous nbtri
         ////////////////// in the counter-clock-wise direction, which shares the vertex
         next_side = (nbSV_side+2)%3; // 
         prevtri = tmptri = Tri_on_side(nbtri[0],next_side);

         cv_count = 2;
         // if(tmptri != NULL && tmptri->BC_type!= SUBDOMAIN)
         if(tmptri != NULL)
         {
             // get the side of tmptri which is adjacent to the previous nbtri
             for(tmpnbSV_side = 0; tmpnbSV_side < 3; tmpnbSV_side++)
             {
                 if(nbtri[0] == Tri_on_side(tmptri,tmpnbSV_side))
                     break;
             }
             tris[cv_count] = tmptri;
             nbcv_indx[cv_count] = tmpnbSV_side;
             nbcv_side[cv_count] = prevnvSV_side = tmpnbSV_side;
             cv_count++;
         }
         /////////////// get things in nbtri[1]
         for(nbSV_side = 0; nbSV_side < 3; nbSV_side++)
         {
             if(tri == Tri_on_side(nbtri[1],nbSV_side))
                 break;
         }

         ////////////////// in the counter-clock-wise direction, which shares the vertex with nbtri[1]
         next_side = (nbSV_side+1)%3; //
         tmptri = Tri_on_side(nbtri[1],next_side);
         // if(tmptri != NULL && tmptri->BC_type!= SUBDOMAIN)
         if(tmptri != NULL)
         {
             // get the side of tmptri which is adjacent to the previous nbtri
             for(tmpnbSV_side = 0; tmpnbSV_side < 3; tmpnbSV_side++)
             {
                 if(nbtri[1] == Tri_on_side(tmptri,tmpnbSV_side))
                     break;
             }
             if(tmptri != prevtri)
             {
                 tris[cv_count] = tmptri;
                 nbcv_indx[cv_count] = (tmpnbSV_side+1)%3;
                 nbcv_side[cv_count] = tmpnbSV_side;
                 cv_count++;
             }
         }
         ///////////// get partial cells in nbtri[1]
         tris[cv_count] = nbtri[1];
         nbcv_indx[cv_count] = (nbSV_side+1)%3;
         nbcv_side[cv_count] = nbSV_side;
         cv_count++;
         tris[cv_count] = nbtri[1];
         nbcv_indx[cv_count] = nbSV_side;
         nbcv_side[cv_count] = nbSV_side;
         cv_count++;

         ////////  in the counter-clock-wise direction, which shares the vertex with nbtri[1] (2)
         next_side = (nbSV_side+2)%3;
         prevtri = tmptri = Tri_on_side(nbtri[1],next_side);

         // if(tmptri != NULL && tmptri->BC_type!= SUBDOMAIN)
         if(tmptri != NULL)
         {
             // get the side of tmptri which is adjacent to the previous nbtri
             for(tmpnbSV_side = 0; tmpnbSV_side < 3; tmpnbSV_side++)
             {
                 if(nbtri[1] == Tri_on_side(tmptri,tmpnbSV_side))
                     break;
             }
             tris[cv_count] = tmptri;
             nbcv_indx[cv_count] = tmpnbSV_side;
             nbcv_side[cv_count] = prevnvSV_side = tmpnbSV_side;
             cv_count++;
         }
         /////////////// get things in nbtri[2]
         for(nbSV_side = 0; nbSV_side < 3; nbSV_side++)
         {
             if(tri == Tri_on_side(nbtri[2],nbSV_side))
                 break;
         }
         ////////////////// in the counter-clock-wise direction, which shares the vertex with nbtri[2]
         next_side = (nbSV_side+1)%3; //
         tmptri = Tri_on_side(nbtri[2],next_side);
         // if(tmptri != NULL && tmptri->BC_type!= SUBDOMAIN)
         if(tmptri != NULL)
         {
             // get the side of tmptri which is adjacent to the previous nbtri
             for(tmpnbSV_side = 0; tmpnbSV_side < 3; tmpnbSV_side++)
             {
                 if(nbtri[2] == Tri_on_side(tmptri,tmpnbSV_side))
                     break;
             }
             if(tmptri != prevtri)
             {
                 tris[cv_count] = tmptri;
                 nbcv_indx[cv_count] = (tmpnbSV_side+1)%3;
                 nbcv_side[cv_count] = tmpnbSV_side;
                 cv_count++;
             }
         }
         ///////////// get partial cells in nbtri[2]
         tris[cv_count] = nbtri[2];
         nbcv_indx[cv_count] = (nbSV_side+1)%3;
         nbcv_side[cv_count] = nbSV_side;
         cv_count++;
         tris[cv_count] = nbtri[2];
         nbcv_indx[cv_count] = nbSV_side;
         nbcv_side[cv_count] = nbSV_side;
         cv_count++;
         ////////  in the counter-clock-wise direction, which shares the vertex with nbtri[2] (2)
         next_side = (nbSV_side+2)%3;
         prevtri = tmptri = Tri_on_side(nbtri[2],next_side);
         // if(tmptri != NULL && tmptri->BC_type!= SUBDOMAIN)
         if(tmptri != NULL)
         {
             // get the side of tmptri which is adjacent to the previous nbtri
             for(tmpnbSV_side = 0; tmpnbSV_side < 3; tmpnbSV_side++)
             {
                 if(nbtri[2] == Tri_on_side(tmptri,tmpnbSV_side))
                     break;
             }
             tris[cv_count] = tmptri;
             nbcv_indx[cv_count] = tmpnbSV_side;
             nbcv_side[cv_count] = prevnvSV_side = tmpnbSV_side;
             cv_count++;
         }
         ///////// in the counter-clock-wise direction, which shares the vertex with nbtri[0]
         for(nbSV_side = 0; nbSV_side < 3; nbSV_side++)
         {
             if(tri == Tri_on_side(nbtri[0],nbSV_side))
                 break;
         }
         ////////////////// in the counter-clock-wise direction, which shares the vertex with nbtri[0]
         next_side = (nbSV_side+1)%3; //
         tmptri = Tri_on_side(nbtri[0],next_side);
         // if(tmptri != NULL && tmptri->BC_type!= SUBDOMAIN)
         if(tmptri != NULL)
         {
             // get the side of tmptri which is adjacent to the previous nbtri
             for(tmpnbSV_side = 0; tmpnbSV_side < 3; tmpnbSV_side++)
             {
                 if(nbtri[0] == Tri_on_side(tmptri,tmpnbSV_side))
                     break;
             }
             if(tmptri != prevtri)
             {
                 tris[cv_count] = tmptri;
                 nbcv_indx[cv_count] = (tmpnbSV_side+1)%3;
                 nbcv_side[cv_count] = tmpnbSV_side;
                 cv_count++;
             }
         }

         *n_tri = cv_count;
         return *n_tri;
}


LOCAL int get_tri_vert_CV_stencil(
         TRI       *tri,
         TRI       *nbtri[3],
         int       cv_indx,
         int       *nbcv_indx,
         int       *nbcv_side,
         TRI       **tris,
         int       *n_tri)
{
         int    on_SV_side, nbSV_side, next_side;
         TRI    *nntri[40], *tmptri, *prevtri = NULL;
         int    nn_num, cv_count, i;
         int    tmpnbcv_indx[30];
         int    tmpnbcv_side[30], tmpcv_count;

         // get tris around the vertex of tri of CV
         // tris_around_pt(nbtri, tri, cv_indx, nntri, &nn_num);
         // check if # of tris around the vertex of the tri of the CV
         // = 3. We need to implement this case separately.
         if(YES == is_3tris_around_pt(nbtri, tri, cv_indx))
         {
             printf("ERROR: 3 tris case in get_tri_vert_CV_stencil, implement\n");
             clean_up(ERROR);
         }
 
         tris[0] = tri;
         nbcv_indx[0] = (cv_indx+1)%3;
         nbcv_side[0] = cv_indx;
         tris[1] = tri;
         nbcv_indx[1] = 3;
         nbcv_side[1] = -1;
         tris[2] = tri;
         nbcv_indx[2] = (cv_indx+2)%3;
         nbcv_side[2] = (cv_indx+2)%3;
 
         //////////////// 1st neighbor in the counter-clock-wise direction
         ///////////////// which is the one in nbtri
         on_SV_side = (cv_indx+2)%3; // w. r. t. tri
         for(nbSV_side = 0; nbSV_side < 3; nbSV_side++)
         {
             if(tri == Tri_on_side(nbtri[on_SV_side],nbSV_side))
                 break;
         }         
         tris[3] = nbtri[on_SV_side];
         nbcv_indx[3] = 3;
         nbcv_side[3] = -1;
         tris[4] = nbtri[on_SV_side];
         nbcv_indx[4] = nbSV_side;
         nbcv_side[4] = nbSV_side;
         ////////////////////  get the neighbor of the previous nbtri
         ////////////////// in the counter-clock-wise direction, which shares the vertex
         next_side = (nbSV_side+2)%3; // 
         prevtri = tmptri = Tri_on_side(nbtri[on_SV_side],next_side);
         cv_count = 5;
         if(tmptri != NULL && tmptri != nbtri[cv_indx])
         {
             // get the side of tmptri which is adjacent to the previous nbtri
             for(nbSV_side = 0; nbSV_side < 3; nbSV_side++)
             {
                 if(nbtri[on_SV_side] == Tri_on_side(tmptri,nbSV_side))
                     break;
             }  
             tris[cv_count] = tmptri;
             nbcv_indx[cv_count] = nbSV_side;
             nbcv_side[cv_count] = nbSV_side;
             cv_count++;
         }
         /****************************
          ***************************/
         ///////////////////////////////
         /// Now do clock-wise direction to add cvs to the stencil.
         /// We start with 1st neighbor of tri which contains cv_indx.
         ////////////////////////////// 
         tmpcv_count = 0;
         on_SV_side = cv_indx; // w. r. t. tri in the clock-wise direction
         for(nbSV_side = 0; nbSV_side < 3; nbSV_side++)
         {
             if(tri == Tri_on_side(nbtri[on_SV_side],nbSV_side))
                 break;
         }
         nntri[tmpcv_count] = nbtri[on_SV_side];
         tmpnbcv_indx[tmpcv_count] = 3;
         tmpnbcv_side[tmpcv_count] = -1;         
         tmpcv_count++;

         nntri[tmpcv_count] = nbtri[on_SV_side];
         tmpnbcv_indx[tmpcv_count] = (nbSV_side+1)%3;
         tmpnbcv_side[tmpcv_count] = nbSV_side;         
         tmpcv_count++;
   
         ///////// get the neighbor of the previous nbtri 
         ///////// in the clock-wise direction, which shares the vertex

         next_side = (nbSV_side+1)%3;
         tmptri = Tri_on_side(nbtri[on_SV_side],next_side);
         
         if(tmptri != NULL && tmptri != prevtri)
         {
             // get the side of tmptri which is adjacent to the previous nbtri
             for(nbSV_side = 0; nbSV_side < 3; nbSV_side++)
             {
                 if(nbtri[on_SV_side] == Tri_on_side(tmptri,nbSV_side))
                     break;
             }             
             nntri[tmpcv_count] = tmptri;
             tmpnbcv_indx[tmpcv_count] = (nbSV_side+1)%3;
             tmpnbcv_side[tmpcv_count] = (nbSV_side+1)%3;
             tmpcv_count++;
         }
         /******************************
          *****************************/
         for(i = tmpcv_count; i>= 0; i--)
         {
             tris[cv_count] = nntri[i]; 
             nbcv_indx[cv_count] = tmpnbcv_indx[i];
             nbcv_side[cv_count] = tmpnbcv_side[i];
             cv_count++;
         }

         *n_tri = cv_count;
}

LOCAL int get_tri_cent_CV_stencil(
         TRI       *tri,
         TRI       *nbtri[3],
         int       cv_indx,
         int       *nbcv_indx,
         int       *nbcv_side,
         TRI       **tris,
         int       *n_tri)
{
         int    on_SV_side, nbSV_side;
         tris[0] = tri;
	 nbcv_indx[0] = 0;
         nbcv_side[0] = 0; 
 
         ////// in nbtri[0]
         for(nbSV_side = 0; nbSV_side < 3; nbSV_side++)
         {
             if(tri == Tri_on_side(nbtri[0],nbSV_side))
                 break; 
         }
         tris[1] = nbtri[0];
         nbcv_indx[1] = (nbSV_side + 1)%3;
         nbcv_side[1] = nbSV_side;

         tris[2] = nbtri[0];
         nbcv_indx[2] = nbSV_side;
         nbcv_side[2] = nbSV_side;
 
         //////////////// go back to tri
         tris[3] = tri;
	 nbcv_indx[3] = 1;
         nbcv_side[3] = 1; 

         ////// in nbtri[1]
         for(nbSV_side = 0; nbSV_side < 3; nbSV_side++)
         {
             if(tri == Tri_on_side(nbtri[1],nbSV_side))
                 break; 
         }
         tris[4] = nbtri[1];
         nbcv_indx[4] = (nbSV_side + 1)%3;
         nbcv_side[4] = nbSV_side;

         tris[5] = nbtri[1];
         nbcv_indx[5] = nbSV_side;
         nbcv_side[5] = nbSV_side;

         //////////////// go back to tri
         tris[6] = tri;
         nbcv_indx[6] = 2;
         nbcv_side[6] = 2;
         ////// in nbtri[2]
         for(nbSV_side = 0; nbSV_side < 3; nbSV_side++)
         {
             if(tri == Tri_on_side(nbtri[2],nbSV_side))
                 break;
         }
         tris[7] = nbtri[2];
         nbcv_indx[7] = (nbSV_side + 1)%3;
         nbcv_side[7] = nbSV_side;

         tris[8] = nbtri[2];
         nbcv_indx[8] = nbSV_side;
         nbcv_side[8] = nbSV_side;
         //////////////////////
         *n_tri = 9;
         return 9;
}
// add tris in the counter-clock wise direction
// for CVs[0, 1, 2]
LOCAL void tris_around_pt(
        TRI       *nbtri[3],
        TRI       *tri,
        int       vert, 
        TRI       **nntri,
        int       *nn_num)
{
        int       share_v, N_nn = 0, num_side;
        int       i, side, tmpvert, tmpN_nn = 0;
        TRI       *tmp, *tmpnbtri[3];
        TRI       *tmpnntri[30];

        // counter-clock-wise: 1st element is the tri
        nntri[N_nn] = tri;
        N_nn++;

        // counter-clock-wise: 2nd element is tri's neighbor
        tmp = nntri[N_nn] = nbtri[(vert+2)%3];
        if(tmp != NULL)
            N_nn++;

        /**************
        //// find which vertex of tmp = (vert of tri)
        if(tmp != NULL)
        {
            for(side = 0; side < 3; side++)
                tmpnbtri[side] = Tri_on_side(tmp,side);

            for(tmpvert = 0; tmpvert < 3; tmpvert++)
            {
                if(Point_of_tri(tmp)[tmpvert] == Point_of_tri(tri)[vert]) 
                    break;
            }
            tmp = nntri[N_nn] = tmpnbtri[(tmpvert+2)%3];  
            if(tmp != NULL)
            {
                // check whether tri is included again
                if(tmp == tri)
                    return;
                N_nn++;
            }
        }
        ***************/

        ////// 3rd and more element in
        ////// the counter-clock-wise direction
        for(; tmp != NULL; )
        {
            for(side = 0; side < 3; side++)
                tmpnbtri[side] = Tri_on_side(tmp,side);

            for(tmpvert = 0; tmpvert < 3; tmpvert++)
            {
                if(Point_of_tri(tmp)[tmpvert] == Point_of_tri(tri)[vert])   
                    break;
            }
            tmp = nntri[N_nn] = tmpnbtri[(tmpvert+2)%3];
            if(tmp != NULL)
            {
                // check whether tri is included again
                if(tmp == tri)
                    return;
                N_nn++;
            }
        } 

        ///////// 3rd and more element in
        ///////// the clock-wise direction
        tmp = tmpnntri[tmpN_nn] = nbtri[vert];
        if(tmp != NULL)
            tmpN_nn++;
        for(; tmp != NULL; )
        {
            for(side = 0; side < 3; side++)
                tmpnbtri[side] = Tri_on_side(tmp,side);

            for(tmpvert = 0; tmpvert < 3; tmpvert++)
            {
                if(Point_of_tri(tmp)[tmpvert] == Point_of_tri(tri)[vert])
                    break;
            }
            tmp = tmpnntri[tmpN_nn] = tmpnbtri[tmpvert];
            if(tmp != NULL)
            {
                // check whether tri is included again
                if(tmp == tri)
                    break;
                tmpN_nn++;
            }
        }

        //// put counter-clock-wise and clock-wise list together
        for(i = tmpN_nn-1; i >= 0; i--)
        {
            nntri[N_nn] = tmpnntri[i];  
            N_nn++;
        }
        *nn_num = N_nn;
}

LOCAL int is_3tris_around_pt(
        TRI       *nbtri[3],
        TRI       *tri,
        int       vert)
{
        int    c_side, cc_side, next_ccside, side;
        TRI    *ctri, *cctri;

        cc_side = (vert+2)%3; // w. r. t. tri
        c_side = (vert+1)%3; // w. r. t. tri

        cctri = nbtri[cc_side];
        ctri = nbtri[c_side];

        for(side = 0; side < 3; side++)
        {
            if(tri == Tri_on_side(cctri,side))
                break;
        }
        next_ccside = (side+2)%3;

        if(Tri_on_side(cctri,next_ccside) == ctri)
            return YES;

        return NO;
}

/*
   For locally divergence-free MHD, also re-enforce divergence-free here
*/
LOCAL int identify_limiting_candidates(
        Front    *fr,
        Mid_soln *midsoln,
        Limiting_store **limit_store,
        int      rk_step,
        int      N_alloc,
        int      Row_alloc,
        TRI      **row_limit_tris[],
        int      *return_N_use)
{
        TRI       **limit_tris; // **row_limit_tris[1500]; // tri pointer array, array of "tri pointer array".
        int       N_row, N_use = 0;
        TRI       *tri, *crsp_tri, *nbtri[3];
        SURFACE   **surf;
        int       dim = 2, i, j, side;
        double    *cent, max_dt;
        size_t    sizest = fr->sizest;
        Locstate  tmpst;

        vector(&limit_tris, N_alloc, sizeof(TRI*));
        // vector(row_limit_tris, Row_alloc, sizeof(TRI**));
        row_limit_tris[0] = limit_tris;
        N_row = 1;

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf);
                 !at_end_of_tri_list(tri,*surf); tri = tri->next)
            {
                if(tri->BC_type == SUBDOMAIN)
                    continue;
                // printf("determine tri[%d] type %d, cent[%g %g], boundary %d neighbr[%p, %p, %p]\n", 
                //       tri->id, tri->BC_type, fg_centroid(tri)[0], fg_centroid(tri)[1], Boundary_tri(tri),
                //       Tri_on_side(tri,0), Tri_on_side(tri,1), Tri_on_side(tri,2));fflush(stdout);
                // if(tri->id == 20)
                //     print_tri_crds(tri);

                if(Boundary_tri(tri) || tri_on_phy_bdry(tri))
                {
                    if(debugging("twod_riemann"))
                        // attach_buffer_states(fr,midsoln,rk_step,tri,limit_store);
                        twod_riemann_attach_buffer_states(fr,midsoln,rk_step,tri,limit_store);
                    if(debugging("db_Mach"))
                        db_Mach_attach_buffer_states(fr,midsoln,rk_step,tri,limit_store);
                    if(debugging("shock_vort"))
                        shock_vort_attach_buffer_states(fr,midsoln,rk_step,tri,limit_store);
                    if(debugging("Sod"))
                        shock_vort_attach_buffer_states(fr,midsoln,rk_step,tri,limit_store);
                }

                /// locate for HR //TMP
                // if(overshoot_state_Shu_V(tri,midsoln,rk_step) == YES)//TO DO
                // if(overshoot_state(tri,midsoln,rk_step) == YES) // OLD detecter
                // if(overshoot_state(tri,midsoln,rk_step) == YES || 
                //    overshoot_state_Shu_V_ver2(tri,midsoln,rk_step) == YES)// NEW detecter
                if(overshoot_state(tri,midsoln,rk_step) == YES)
                {
                    /// TMP
                    //if(tri->id == 200) 
                    //     printf("tri[%d] needs limiting\n", tri->id);fflush(stdout);

                    limit_tris[N_use] = tri;
                    N_use++;
                    if(N_use == N_alloc)
                    {
                        if(N_row +1 >= Row_alloc)
                        {
                            printf("ERROR: identify_limiting_candidates, exceed alloc. limit\n");
                            clean_up(ERROR);
                        }
                        vector(&limit_tris, N_alloc, sizeof(TRI*));
                        row_limit_tris[N_row] = limit_tris;
                        N_row++;
                        N_use = 0;
                    }
                }
                else
                {
                    //// This is for locally divergence-free case
                    if(rk_step == RK_STEP)
                        tmpst = tri->st;
                    else 
                        tmpst = midsoln[tri->id].st[rk_step];

                    impose_loc_divergence_free(tri, tmpst);

                    if(rk_step == RK_STEP)
                    {
                        max_dt = (*fr->_time_step_on_tri)(fr, tri);
                        if(max_dt < newdt)
                               time_on_tri = tri;
                        newdt = min(newdt, max_dt);
                        assign(midsoln[tri->id].st[0],tri->st,fr->sizest);
                    }
                    else
                        assign(midsoln[tri->id].st[0],midsoln[tri->id].st[rk_step],fr->sizest);
                }
            }
        }

        *return_N_use = N_use;
        return N_row;
}

LOCAL void impose_loc_divergence_free(
	TRI      *tri,
        Locstate st2)
{
        int      i; 
        double   **mass_matrix = tri->Bmass_matrix, avg;

        if(MAX_N_COEF == 3 || MAX_N_COEF == 6 || MAX_N_COEF == 10)
        {
            if(dg_B(st2)[0][1] >= 0.0)
                dg_B(st2)[0][1] = min(fabs(dg_B(st2)[0][1]), fabs(dg_B(st2)[1][2]));
            else
                dg_B(st2)[0][1] = -min(fabs(dg_B(st2)[0][1]), fabs(dg_B(st2)[1][2]));
            dg_B(st2)[1][2] = -dg_B(st2)[0][1];

        }
 
        if(MAX_N_COEF == 3)
            return;

        if(MAX_N_COEF == 6 || MAX_N_COEF == 10)
        {
            // re-enforce divergence-free, also need to be conservative.
            if(dg_B(st2)[0][3] >= 0.0)
                dg_B(st2)[0][3] = min(fabs(dg_B(st2)[0][3]), 0.5*fabs(dg_B(st2)[1][4]));
            else
                dg_B(st2)[0][3] = -min(fabs(dg_B(st2)[0][3]), 0.5*fabs(dg_B(st2)[1][4]));

            dg_B(st2)[1][4] = -2.0*dg_B(st2)[0][3];

            if(dg_B(st2)[0][4] >= 0.0)
                dg_B(st2)[0][4] = min(fabs(dg_B(st2)[0][4]), 2.0*fabs(dg_B(st2)[1][5]));
            else
                dg_B(st2)[0][4] = -min(fabs(dg_B(st2)[0][4]), 2.0*fabs(dg_B(st2)[1][5]));

            dg_B(st2)[1][5] = -0.5*dg_B(st2)[0][4];
        }

        if(MAX_N_COEF == 10)
        {
            // re-enforce divergence-free , also need to be conservative.
           if(dg_B(st2)[0][6] >= 0.0)
               dg_B(st2)[0][6] = min(fabs(dg_B(st2)[0][6]), fabs(dg_B(st2)[1][7])/3.0);
           else
               dg_B(st2)[0][6] = -min(fabs(dg_B(st2)[0][6]), fabs(dg_B(st2)[1][7])/3.0);
           dg_B(st2)[1][7] = -3.0*dg_B(st2)[0][6];

           if(dg_B(st2)[0][7] >= 0.0)
               dg_B(st2)[0][7] = min(fabs(dg_B(st2)[0][7]), fabs(dg_B(st2)[1][8]));
           else
               dg_B(st2)[0][7] = -min(fabs(dg_B(st2)[0][7]), fabs(dg_B(st2)[1][8]));
           dg_B(st2)[1][8] = -dg_B(st2)[0][7];

           if(dg_B(st2)[0][8] >= 0.0)
               dg_B(st2)[0][8] = min(fabs(dg_B(st2)[0][8]), 3.0*fabs(dg_B(st2)[1][9]));
           else
               dg_B(st2)[0][8] = -min(fabs(dg_B(st2)[0][8]), 3.0*fabs(dg_B(st2)[1][9]));
           dg_B(st2)[1][9] = -dg_B(st2)[0][8]/3.0;
        }

        avg = 0.0;
        for(i = 0; i < MAX_N_COEF; i++)
            avg += dg_B(st2)[0][i]*mass_matrix[0][i];
        avg /= mass_matrix[0][0];
        dg_B(st2)[0][0] += (Mag(st2)[0] - avg); 

        avg = 0.0;
        for(i = 0; i < MAX_N_COEF; i++)
            avg += dg_B(st2)[1][i]*mass_matrix[0][i];
        avg /= mass_matrix[0][0];
        dg_B(st2)[1][0] += (Mag(st2)[1] - avg); 

        if(MAX_N_COEF != 6 && MAX_N_COEF != 10)
        {
            printf("ERROR: impose_loc_divergence_free(), implement\n");
            clean_up(ERROR);
        }
}

EXPORT void Subcell_limiting_soln_with_buffer_tris_multiple_times(
        Front    *fr,
        Mid_soln *midsoln,
        Limiting_store **limit_store,
        int      rk_step,
        int      HR_times)
{
        TRI       *tri, *crsp_tri, *nbtri[3];
        SURFACE   **surf;
        int       dim = 2, i, j, side;
        double     *cent;
        size_t    sizest = fr->sizest;
        Locstate  st2;
        float     max_dt;
        // TRI       **limit_tris, **row_limit_tris[1500];
        TRI       **row_limit_tris[1500];
        int       N_alloc = 800, N_row, N_use =0, N, local_iter_N = 4, tmpi, Row_alloc = 1500;
        int       loop_num = 0, detect_extr = NO, comput_time = NO, check_quadr = NO;
        static Locstate Roe_st[3] = {NULL, NULL, NULL}, st;  //Roe mean value
        static float  **L[3], **R[3];
        float     conu[3][4][MAX_N_COEF];  // [side][# eqn][coef]
        float     outcome[4][MAX_N_COEF];
        char      s[256];
        int       debug = NO, bigHR_round, update_layer[3] = {NO, YES, YES};
        Locstate  nst;

        if(Roe_st[0] == NULL)
        {
             for(i = 0; i < 3; i++)
             {
                 matrix(&L[i], 4, 4, sizeof(float));
                 matrix(&R[i], 4, 4, sizeof(float));
                 g_alloc_state(&Roe_st[i], fr->sizest);
             }
             g_alloc_state(&st, fr->sizest);
        }
        if(mass_1st_row == NULL)
        {
            matrix(&mass_1st_row, 1, MAX_N_COEF,sizeof(double));
            for(i = 0; i < 30; i++)
                matrix(&mass_1st_rows[i], 1, MAX_N_COEF,sizeof(double));
        }

        N_row = identify_limiting_candidates(fr,midsoln,limit_store,rk_step,N_alloc,
                                    Row_alloc,row_limit_tris,&N_use);

        for(bigHR_round = 0;  bigHR_round < 2; bigHR_round++)
        {
            preprocess_P3_times_rearrange_order(fr,midsoln,limit_store,
                  row_limit_tris,N_alloc,N_row,N_use,rk_step,HR_times);
        }

redo_HR:
        loop_num++;
        if(loop_num == HR_times) check_quadr = YES;

	/*******************************************/
	// do HR on big tris to preprocess
	/*******************************************/
        for(bigHR_round = 0;  bigHR_round < 0; bigHR_round++)
        {
            for(i = 0; i < N_row; i++)
            {
                if(i == N_row-1) N = N_use;
                else N = N_alloc;
                for(j = 0; j < N; j++)
                {
                    for(side = 0; side < 3; side++)
                        nbtri[side] = Tri_on_side(row_limit_tris[i][j],side);
                    if(debugging("char_re"))
                    {
                            // side = find_char_dir(row_limit_tris[i][j],nbtri,midsoln,rk_step);
                            // comput_Roe_ver2(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step,Roe_st,L,R);
                            // convert_cand_conu_charu(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step,L[side]);
                    }

                    pre_process_limiting_P3(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step);

                        // limiting_3rd_degreeP3(row_limit_tris[i][j],nbtri,midsoln,rk_step,YES);
                        // limiting_2nd_degreeP3(row_limit_tris[i][j],nbtri,midsoln,rk_step,NO);
	                // limiting_1st_degreeP3(row_limit_tris[i][j],nbtri,midsoln,rk_step,NO);    
	                
                        // Subcell_limiting_3rd_degreeP3(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step);
                        // Subcell_limiting_2nd_degreeP3(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step);

                    if(debugging("char_re"))
                    {
                            // convert reconstructed char variables to conserv. variables.
                            // convert_con_char(midsoln[row_limit_tris[i][j]->id].st[0], R[side],
                            //                  midsoln[row_limit_tris[i][j]->id].st[0]);
                            // save_reconstruct_st(row_limit_tris[i][j],midsoln,rk_step, conu[side]);

                            // convert candidate char variables to conserv. variables.
                            // convert_cand_conu_charu(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step,R[side]);
                    }
                }
            }
            for(i = 0; i < N_row; i++)
            {
                if(i == N_row-1) N = N_use;
                else N = N_alloc;
                for(j = 0; j < N; j++)
                {
                    update_coef(row_limit_tris[i][j],midsoln,rk_step,fr,fr->sizest,NO);
                    // update_coef_layers(row_limit_tris[i][j],midsoln,rk_step,fr,update_layer);
                    update_tri_CVs(row_limit_tris[i][j],midsoln,limit_store,rk_step);
                }
            }

#if defined(__MPI__)
            if(debugging("db_Mach"))
                update_db_Mach_buffer(fr,midsoln,rk_step,limit_store);
#endif // if defined(__MPI__)

            if(debugging("Sod") || debugging("Lax"))
            {
                // update_buffer_x_peri(fr,midsoln,0,limit_store);
                // update_buffer_x_peri(fr,midsoln,rk_step,limit_store);
                update_buffer_x_ref(fr,midsoln,rk_step,limit_store);
            }
            else if(debugging("shock_vort"))
                update_buffer_x_ref(fr,midsoln,rk_step,limit_store);
            else if(debugging("v_evo") || debugging("Burgers"))
                update_buffer(fr,midsoln,rk_step,limit_store);

            for(i = 0; i < N_row; i++)
            {
                if(i == N_row-1) N = N_use;
                else N = N_alloc;
                for(j = 0; j < N; j++)
                {
                    if(Boundary_tri(row_limit_tris[i][j]) ||
                       tri_on_phy_bdry(row_limit_tris[i][j]))
                    {
                        if(debugging("twod_riemann"))
                            twod_riemann_attach_buffer_states(fr,midsoln,rk_step,row_limit_tris[i][j],limit_store);
                        if(debugging("db_Mach"))
                            db_Mach_attach_buffer_states(fr,midsoln,rk_step,row_limit_tris[i][j],limit_store);
                        if(debugging("shock_vort"))
                            shock_vort_attach_buffer_states(fr,midsoln,rk_step,row_limit_tris[i][j],limit_store);
                        if(debugging("Sod"))
                            shock_vort_attach_buffer_states(fr,midsoln,rk_step,row_limit_tris[i][j],limit_store);
                    }
                }
            }
        }
	/*******************************************/
	// END: do HR on big tris to preprocess
	/*******************************************/

        for(i = 0; i < N_row; i++)
        {
            if(i == N_row-1) N = N_use;
            else N = N_alloc;
            for(j = 0; j < N; j++)
            {
                for(side = 0; side < 3; side++)
                    nbtri[side] = Tri_on_side(row_limit_tris[i][j],side);
                if(MAX_N_COEF == 6)
                {
                    printf("ERROR: implement  Subcell_limiting_soln_with_buffer_tris_multiple_times, 6\n");
                    clean_up(ERROR);
                }
                else if(MAX_N_COEF == 10)
                {
                    if(debugging("char_re"))
                    {
			// side = find_char_dir(row_limit_tris[i][j],nbtri,midsoln,rk_step);
                        comput_Roe_ver2(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step,Roe_st,L,R);

                        side = fr->step %3;
                        convert_cand_conu_charu(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step,L[side]);

                        Subcell_limiting_3rd_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                           midsoln,limit_store,rk_step,YES);
                        Subcell_limiting_2nd_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                           midsoln,limit_store,rk_step,NO);
                        Subcell_limiting_1st_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                           midsoln,limit_store,rk_step,NO,NO,NO);

                        // convert reconstructed char variables to conserv. variables.
                        convert_con_char(midsoln[row_limit_tris[i][j]->id].st[0], R[side],
                                         midsoln[row_limit_tris[i][j]->id].st[0]);
                        convert_cand_conu_charu(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step,R[side]);
                        /*
                        for(side = 0; side < 3; side++)
                        {
                            convert_cand_conu_charu(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step,L[side]);

                            Subcell_limiting_3rd_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                               midsoln,limit_store,rk_step,YES);
                            Subcell_limiting_2nd_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                               midsoln,limit_store,rk_step,NO);
                            Subcell_limiting_1st_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                               midsoln,limit_store,rk_step,NO,NO,NO);

                            // convert reconstructed char variables to conserv. variables.
                            convert_con_char(midsoln[row_limit_tris[i][j]->id].st[0], R[side],
                                         midsoln[row_limit_tris[i][j]->id].st[0]);

                            save_reconstruct_st(row_limit_tris[i][j],midsoln,rk_step, conu[side]);
                            convert_cand_conu_charu(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step,R[side]);
                        }

                        // use largest jump direction
                        weno_combine_P3(row_limit_tris[i][j],conu,outcome);
                        save_weno_st_to_midsoln(row_limit_tris[i][j],midsoln,rk_step,outcome);
                        if(N_EQN == 4 && YES == unphysical_st_at_quadrature(row_limit_tris[i][j], 
                                                      midsoln[row_limit_tris[i][j]->id].st[0]))
                            row_limit_tris[i][j]->redo_limiting = YES;
                        else
                            row_limit_tris[i][j]->redo_limiting = NO;
                        */
                    }
                    else
                    {
                        if(debugging("old_way"))
                        {
                            ///// save states before HR
                            if(rk_step == RK_STEP)
                                assign(st, row_limit_tris[i][j]->st, fr->sizest);
                            else
                                assign(st, midsoln[row_limit_tris[i][j]->id].st[rk_step], fr->sizest);
                            ///// end:::::save states before HR
                            for(tmpi = 0; tmpi < local_iter_N; tmpi++)
                            {
                                Subcell_limiting_3rd_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                                    midsoln,limit_store,rk_step,YES);
      
                                Subcell_limiting_2nd_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                                    midsoln,limit_store,rk_step,YES);

                                Subcell_limiting_1st_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                                   midsoln,limit_store,rk_step,NO,YES, (tmpi == local_iter_N-1?YES:NO));

                                update_coef(row_limit_tris[i][j],midsoln,rk_step,fr,fr->sizest,NO);
                                update_tri_CVs(row_limit_tris[i][j],midsoln,limit_store,rk_step);
                            }
                            ///// restore original (before HR) states
                            if(rk_step == RK_STEP)
                                assign(row_limit_tris[i][j]->st, st, fr->sizest);
                            else
                                assign(midsoln[row_limit_tris[i][j]->id].st[rk_step], st, fr->sizest);
                            update_tri_CVs(row_limit_tris[i][j],midsoln,limit_store,rk_step); 
                            ///// end:::: restore original (before HR) states

                            // Subcell_limiting_3rd_degreeP3(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step);
                            // Subcell_limiting_2nd_degreeP3(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step);
                        }
                        else
                        {
                            // Subcell_limiting_3rd_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                            //     midsoln,limit_store,rk_step,YES);

                            Subcell_limiting_3rd_degreeP3_PNC_grouping(row_limit_tris[i][j],nbtri,
                               midsoln,limit_store,rk_step,YES);
      
                            Subcell_limiting_2nd_degreeP3_PNC_grouping(row_limit_tris[i][j],nbtri,
                               midsoln,limit_store,rk_step,YES);

                            Subcell_limiting_1st_degreeP3_PNC_grouping(row_limit_tris[i][j],nbtri,
                               midsoln,limit_store,rk_step,NO,YES,YES);
                        }
                    }
                }
            }
        }

        if(loop_num == HR_times && rk_step == RK_STEP)
            comput_time = YES;

        if(loop_num == HR_times)
        {
            for(i = 0; i < N_row; i++)
            {
                if(i == N_row-1) N = N_use;
                else N = N_alloc;
                for(j = 0; j < N; j++)
                {
                    if(row_limit_tris[i][j]->redo_limiting == YES)
                         fix_unphysical_st(row_limit_tris[i][j],midsoln,0,fr);
                }
            }
        }

        for(i = 0; i < N_row; i++)
        {
            if(i == N_row-1) N = N_use;
            else N = N_alloc;
            for(j = 0; j < N; j++)
            {
                update_coef(row_limit_tris[i][j],midsoln,rk_step,fr,fr->sizest,comput_time);
                update_tri_CVs(row_limit_tris[i][j],midsoln,limit_store,rk_step);
            }
        }

        if(loop_num < HR_times)
        {
#if defined(__MPI__)
            if(debugging("db_Mach"))
                update_db_Mach_buffer(fr,midsoln,rk_step,limit_store);
#endif // if defined(__MPI__)

            if(debugging("Sod") || debugging("Lax"))
            {
                // update_buffer_x_peri(fr,midsoln,0,limit_store);
                // update_buffer_x_peri(fr,midsoln,rk_step,limit_store);
                update_buffer_x_ref(fr,midsoln,rk_step,limit_store);
            }
            else if(debugging("shock_vort"))
                update_buffer_x_ref(fr,midsoln,rk_step,limit_store);
            else if(debugging("v_evo") || debugging("Burgers"))
                update_buffer(fr,midsoln,rk_step,limit_store);

            for(i = 0; i < N_row; i++)
            {
                if(i == N_row-1) N = N_use;
                else N = N_alloc;
                for(j = 0; j < N; j++)
                {
                    if(Boundary_tri(row_limit_tris[i][j]) ||
                       tri_on_phy_bdry(row_limit_tris[i][j]))
                    {
                        if(debugging("twod_riemann"))
                            // attach_buffer_states(fr,midsoln,rk_step,row_limit_tris[i][j],limit_store);
                            twod_riemann_attach_buffer_states(fr,midsoln,rk_step,row_limit_tris[i][j],limit_store);
                        if(debugging("db_Mach"))
                            db_Mach_attach_buffer_states(fr,midsoln,rk_step,row_limit_tris[i][j],limit_store);
                        if(debugging("shock_vort"))
                            shock_vort_attach_buffer_states(fr,midsoln,rk_step,row_limit_tris[i][j],limit_store);
                        if(debugging("Sod"))
                            shock_vort_attach_buffer_states(fr,midsoln,rk_step,row_limit_tris[i][j],limit_store);
                    }
                }
            }
            goto redo_HR;
        }

        for(i = 0; i < N_row; i++)
            free(row_limit_tris[i]);

        ////////////////////// TMP
        debug_flag = NO;
}

EXPORT void Subcell_limiting_soln_with_separating_orders(
        Front    *fr,
        Mid_soln *midsoln,
        Limiting_store **limit_store,
        int      rk_step,
        int      HR_times)
{
        TRI       *tri, *crsp_tri, *nbtri[3];
        SURFACE   **surf;
        int       dim = 2, i, j, side;
        double     *cent;
        size_t    sizest = fr->sizest;
        Locstate  st2, st, nst;
        float     max_dt;
        // TRI       **limit_tris, **row_limit_tris[1500];
        TRI       **row_limit_tris[1500];
        int       N_alloc = 800, N_row, N_use =0, N, local_iter_N = 4, tmpi, Row_alloc = 1500;
        int       loop_num = 0, detect_extr = NO, comput_time = NO, check_quadr = NO;
        static Locstate Roe_st[3] = {NULL, NULL, NULL};  //Roe mean value
        static float  **L[3], **R[3];
        float     conu[3][4][MAX_N_COEF];  // [side][# eqn][coef]
        float     outcome[4][MAX_N_COEF], old_avg[4], new_avg[4];
        char      s[256];
        int       debug = NO, bigHR_round, update_layer[3] = {NO, YES, YES};

        if(Roe_st[0] == NULL)
        {
             for(i = 0; i < 3; i++)
             {
                 matrix(&L[i], 4, 4, sizeof(float));
                 matrix(&R[i], 4, 4, sizeof(float));
                 g_alloc_state(&Roe_st[i], fr->sizest);
             }
             // g_alloc_state(&st, fr->sizest);
        }
        if(mass_1st_row == NULL)
        {
            matrix(&mass_1st_row, 1, MAX_N_COEF,sizeof(double));
            for(i = 0; i < 30; i++)
                matrix(&mass_1st_rows[i], 1, MAX_N_COEF,sizeof(double));
        }

        N_row = identify_limiting_candidates(fr,midsoln,limit_store,rk_step,N_alloc,
                                    Row_alloc,row_limit_tris,&N_use);

        preprocess_P3_times_rearrange_order(fr,midsoln,limit_store,
                  row_limit_tris,N_alloc,N_row,N_use,rk_step,HR_times);

redo_HR:
        loop_num++;
        if(loop_num == HR_times) check_quadr = YES;

        if(loop_num == HR_times && rk_step == RK_STEP)
            comput_time = YES;

        /*******************************************/
        /*******************************************/
        /*******************************************/
        /// first do 3rd degree terms
        for(i = 0; i < N_row; i++)
        {
            if(i == N_row-1) N = N_use;
            else N = N_alloc;
            for(j = 0; j < N; j++)
            {
                for(side = 0; side < 3; side++)
                    nbtri[side] = Tri_on_side(row_limit_tris[i][j],side);
                Subcell_limiting_3rd_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                          midsoln,limit_store,rk_step,YES);
            }
        }

        /*******************************************/
        /*******************************************/
        //// update 3rd degree degree terms
        for(i = 0; i < N_row; i++)
        {
            if(i == N_row-1) N = N_use;
            else N = N_alloc;
            for(j = 0; j < N; j++)
            {
                if(rk_step == RK_STEP)
                    st = row_limit_tris[i][j]->st;
                else
                    st = midsoln[row_limit_tris[i][j]->id].st[rk_step];
                nst = midsoln[row_limit_tris[i][j]->id].st[0];
                old_avg[0] = old_avg[1] = old_avg[2] = old_avg[3] = 0.0;
                new_avg[0] = new_avg[1] = new_avg[2] = new_avg[3] = 0.0;

                for(tmpi = 6; tmpi < MAX_N_COEF; tmpi++)
                {
                    old_avg[0] += dg_Dens(st)[tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                    old_avg[1] += dg_Mom(st)[0][tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                    old_avg[2] += dg_Mom(st)[1][tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                    old_avg[3] += dg_Energy(st)[tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                    new_avg[0] += dg_Dens(nst)[tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                    new_avg[1] += dg_Mom(nst)[0][tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                    new_avg[2] += dg_Mom(nst)[1][tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                    new_avg[3] += dg_Energy(nst)[tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                }
                for(tmpi = 0; tmpi < N_EQN; tmpi++)
                {
                    old_avg[tmpi] /= row_limit_tris[i][j]->Lmass_matrix[0][0];
                    new_avg[tmpi] /= row_limit_tris[i][j]->Lmass_matrix[0][0];
                }
                dg_Dens(st)[0] +=   (old_avg[0] - new_avg[0]);
                dg_Mom(st)[0][0] += (old_avg[1] - new_avg[1]);
                dg_Mom(st)[1][0] += (old_avg[2] - new_avg[2]);
                dg_Energy(st)[0] += (old_avg[3] - new_avg[3]);
                for(tmpi = 6; tmpi < MAX_N_COEF; tmpi++)
                {
                    dg_Dens(st)[tmpi] = dg_Dens(nst)[tmpi];
                    dg_Mom(st)[0][tmpi] = dg_Mom(nst)[0][tmpi];
                    dg_Mom(st)[1][tmpi] = dg_Mom(nst)[1][tmpi];
                    dg_Energy(st)[tmpi] = dg_Energy(nst)[tmpi];
                }
                update_tri_CVs(row_limit_tris[i][j],midsoln,limit_store,rk_step);
            }
        }
        //// END: update 3rd degree degree terms
        /*******************************************/
        /*******************************************/

#if defined(__MPI__)
        if(debugging("db_Mach"))
            update_db_Mach_buffer(fr,midsoln,rk_step,limit_store);
#endif // if defined(__MPI__)

        if(debugging("Sod") || debugging("Lax"))
        {
                // update_buffer_x_peri(fr,midsoln,0,limit_store);
                // update_buffer_x_peri(fr,midsoln,rk_step,limit_store);
            update_buffer_x_ref(fr,midsoln,rk_step,limit_store);
        }
        else if(debugging("shock_vort"))
            update_buffer_x_ref(fr,midsoln,rk_step,limit_store);
        else if(debugging("v_evo") || debugging("Burgers"))
            update_buffer(fr,midsoln,rk_step,limit_store);

        /*******************************************/
        /*******************************************/
        //// update 2nd degree degree terms

        for(i = 0; i < N_row; i++)
        {
            if(i == N_row-1) N = N_use;
            else N = N_alloc;
            for(j = 0; j < N; j++)
            {
                for(side = 0; side < 3; side++)
                    nbtri[side] = Tri_on_side(row_limit_tris[i][j],side);
                Subcell_limiting_2nd_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                      midsoln,limit_store,rk_step,NO);
            }
        }
        //// update 2nd degree degree terms
        for(i = 0; i < N_row; i++)
        {
            if(i == N_row-1) N = N_use;
            else N = N_alloc;
            for(j = 0; j < N; j++)
            {
                if(rk_step == RK_STEP)
                    st = row_limit_tris[i][j]->st;
                else
                    st = midsoln[row_limit_tris[i][j]->id].st[rk_step];
                nst = midsoln[row_limit_tris[i][j]->id].st[0];
                old_avg[0] = old_avg[1] = old_avg[2] = old_avg[3] = 0.0;
                new_avg[0] = new_avg[1] = new_avg[2] = new_avg[3] = 0.0;

                for(tmpi = 3; tmpi < 6; tmpi++)
                {
                    old_avg[0] += dg_Dens(st)[tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                    old_avg[1] += dg_Mom(st)[0][tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                    old_avg[2] += dg_Mom(st)[1][tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                    old_avg[3] += dg_Energy(st)[tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                    new_avg[0] += dg_Dens(nst)[tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                    new_avg[1] += dg_Mom(nst)[0][tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                    new_avg[2] += dg_Mom(nst)[1][tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                    new_avg[3] += dg_Energy(nst)[tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                }
                for(tmpi = 0; tmpi < N_EQN; tmpi++)
                {
                    old_avg[tmpi] /= row_limit_tris[i][j]->Lmass_matrix[0][0];
                    new_avg[tmpi] /= row_limit_tris[i][j]->Lmass_matrix[0][0];
                }
                dg_Dens(st)[0] +=   (old_avg[0] - new_avg[0]);
                dg_Mom(st)[0][0] += (old_avg[1] - new_avg[1]);
                dg_Mom(st)[1][0] += (old_avg[2] - new_avg[2]);
                dg_Energy(st)[0] += (old_avg[3] - new_avg[3]);
                for(tmpi = 3; tmpi < 6; tmpi++)
                {
                    dg_Dens(st)[tmpi] = dg_Dens(nst)[tmpi];
                    dg_Mom(st)[0][tmpi] = dg_Mom(nst)[0][tmpi];
                    dg_Mom(st)[1][tmpi] = dg_Mom(nst)[1][tmpi];
                    dg_Energy(st)[tmpi] = dg_Energy(nst)[tmpi];
                }
                update_tri_CVs(row_limit_tris[i][j],midsoln,limit_store,rk_step);
            }
        }
        ////END: update 2nd degree terms
        /*******************************************/
        /*******************************************/

#if defined(__MPI__)
        if(debugging("db_Mach"))
            update_db_Mach_buffer(fr,midsoln,rk_step,limit_store);
#endif // if defined(__MPI__)

        if(debugging("Sod") || debugging("Lax"))
        {
                // update_buffer_x_peri(fr,midsoln,0,limit_store);
                // update_buffer_x_peri(fr,midsoln,rk_step,limit_store);
            update_buffer_x_ref(fr,midsoln,rk_step,limit_store);
        }
        else if(debugging("shock_vort"))
            update_buffer_x_ref(fr,midsoln,rk_step,limit_store);
        else if(debugging("v_evo") || debugging("Burgers"))
            update_buffer(fr,midsoln,rk_step,limit_store);

        /*******************************************/
        /*******************************************/
        //// update 1st and 0th degree terms
        for(i = 0; i < N_row; i++)
        {
            if(i == N_row-1) N = N_use;
            else N = N_alloc;
            for(j = 0; j < N; j++)
            {
                for(side = 0; side < 3; side++)
                    nbtri[side] = Tri_on_side(row_limit_tris[i][j],side);
                Subcell_limiting_1st_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                               midsoln,limit_store,rk_step,YES,YES,YES);
            }
        }
        //// END: update 1st and 0th degree terms
        /*******************************************/
        /*******************************************/

        if(loop_num == HR_times)
        {
            for(i = 0; i < N_row; i++)
            {
                if(i == N_row-1) N = N_use;
                else N = N_alloc;
                for(j = 0; j < N; j++)
                {
                    if(row_limit_tris[i][j]->redo_limiting == YES)
                         fix_unphysical_st(row_limit_tris[i][j],midsoln,0,fr);
                }
            }
        }

        for(i = 0; i < N_row; i++)
        {
            if(i == N_row-1) N = N_use;
            else N = N_alloc;
            for(j = 0; j < N; j++)
            {
                update_coef(row_limit_tris[i][j],midsoln,rk_step,fr,fr->sizest,comput_time);
                update_tri_CVs(row_limit_tris[i][j],midsoln,limit_store,rk_step);
            }
        }

        if(loop_num < HR_times)
        {
#if defined(__MPI__)
            if(debugging("db_Mach"))
                update_db_Mach_buffer(fr,midsoln,rk_step,limit_store);
#endif // if defined(__MPI__)

            if(debugging("Sod") || debugging("Lax"))
            {
                // update_buffer_x_peri(fr,midsoln,0,limit_store);
                // update_buffer_x_peri(fr,midsoln,rk_step,limit_store);
                update_buffer_x_ref(fr,midsoln,rk_step,limit_store);
            }
            else if(debugging("shock_vort"))
                update_buffer_x_ref(fr,midsoln,rk_step,limit_store);
            else if(debugging("v_evo") || debugging("Burgers"))
                update_buffer(fr,midsoln,rk_step,limit_store);

            for(i = 0; i < N_row; i++)
            {
                if(i == N_row-1) N = N_use;
                else N = N_alloc;
                for(j = 0; j < N; j++)
                {
                    if(Boundary_tri(row_limit_tris[i][j]) ||
                       tri_on_phy_bdry(row_limit_tris[i][j]))
                    {
                        if(debugging("twod_riemann"))
                            // attach_buffer_states(fr,midsoln,rk_step,row_limit_tris[i][j],limit_store);
                            twod_riemann_attach_buffer_states(fr,midsoln,rk_step,row_limit_tris[i][j],limit_store);
                        if(debugging("db_Mach"))
                            db_Mach_attach_buffer_states(fr,midsoln,rk_step,row_limit_tris[i][j],limit_store);
                        if(debugging("shock_vort"))
                            shock_vort_attach_buffer_states(fr,midsoln,rk_step,row_limit_tris[i][j],limit_store);
                        if(debugging("Sod"))
                            shock_vort_attach_buffer_states(fr,midsoln,rk_step,row_limit_tris[i][j],limit_store);
                    }
                }
            }
            goto redo_HR;
        }

        for(i = 0; i < N_row; i++)
            free(row_limit_tris[i]);

        ////////////////////// TMP
        debug_flag = NO;
}

//// Use partial neighboring cells to recompute 2nd and 3rd degree terms by HR
LOCAL void subcell_update_high_degree_terms(
        Front    *fr,
        Mid_soln *midsoln,
        Limiting_store **limit_store,
        TRI      ***row_limit_tris,
        int      N_alloc,
        int      N_row,
        int      N_use,
        int      rk_step,
        int      HR_times)
{
        TRI       *tri, *crsp_tri, *nbtri[3];
        SURFACE   **surf;
        int       dim = 2, i, j, side, tmpi, cv_indx, indx;
        double     *cent;
        size_t    sizest = fr->sizest;
        Locstate  st2;
        TRI       **limit_tris;
        int       N;
        int       loop_num = 0, detect_extr = NO, comput_time = NO, check_quadr = NO;
        float     conu[3][4][MAX_N_COEF];  // [side][# eqn][coef]
        float     outcome[4][MAX_N_COEF], old_avg[4], new_avg[4];
        char      s[256];
        int       debug = NO, bigHR_round, update_layer[3] = {NO, YES, YES};
        Locstate  st, nst;

        /// first do 3rd and 2nd degree terms
        for(i = 0; i < N_row; i++)
        {
            if(i == N_row-1) N = N_use;
            else N = N_alloc;
            for(j = 0; j < N; j++)
            {
                for(side = 0; side < 3; side++)
                    nbtri[side] = Tri_on_side(row_limit_tris[i][j],side);
                if(debugging("old_way"))
                { 
                    /// this seems to work better????
                    Subcell_limiting_3rd_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                      midsoln,limit_store,rk_step,YES);
                    // Subcell_limiting_2nd_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                    //   midsoln,limit_store,rk_step,YES);
                }
                else
                {
                    Subcell_limiting_3rd_degreeP3_PNC_grouping(row_limit_tris[i][j],nbtri,
                               midsoln,limit_store,rk_step,YES);

                    // Subcell_limiting_2nd_degreeP3_PNC_grouping(row_limit_tris[i][j],nbtri,
                    //            midsoln,limit_store,rk_step,NO);
                }
            }
        }

        //// update 3rd and 2nd degree degree terms
        for(i = 0; i < N_row; i++)
        {
            if(i == N_row-1) N = N_use;
            else N = N_alloc;
            for(j = 0; j < N; j++)
            {
                if(rk_step == RK_STEP)
                    st = row_limit_tris[i][j]->st;
                else
                    st = midsoln[row_limit_tris[i][j]->id].st[rk_step];
                nst = midsoln[row_limit_tris[i][j]->id].st[0];
                old_avg[0] = old_avg[1] = old_avg[2] = old_avg[3] = 0.0;
                new_avg[0] = new_avg[1] = new_avg[2] = new_avg[3] = 0.0;

                // for(tmpi = 3; tmpi < MAX_N_COEF; tmpi++)
                for(tmpi = 6; tmpi < MAX_N_COEF; tmpi++)
                {
                    old_avg[0] += dg_Dens(st)[tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                    old_avg[1] += dg_Mom(st)[0][tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                    old_avg[2] += dg_Mom(st)[1][tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                    old_avg[3] += dg_Energy(st)[tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                    new_avg[0] += dg_Dens(nst)[tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                    new_avg[1] += dg_Mom(nst)[0][tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                    new_avg[2] += dg_Mom(nst)[1][tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                    new_avg[3] += dg_Energy(nst)[tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                }
                for(tmpi = 0; tmpi < N_EQN; tmpi++)
                {
                    old_avg[tmpi] /= row_limit_tris[i][j]->Lmass_matrix[0][0];
                    new_avg[tmpi] /= row_limit_tris[i][j]->Lmass_matrix[0][0];
                }
                dg_Dens(st)[0] +=   (old_avg[0] - new_avg[0]);
                dg_Mom(st)[0][0] += (old_avg[1] - new_avg[1]);
                dg_Mom(st)[1][0] += (old_avg[2] - new_avg[2]);
                dg_Energy(st)[0] += (old_avg[3] - new_avg[3]);
                // for(tmpi = 3; tmpi < MAX_N_COEF; tmpi++)
                for(tmpi = 6; tmpi < MAX_N_COEF; tmpi++)
                {
                    dg_Dens(st)[tmpi] = dg_Dens(nst)[tmpi];
                    dg_Mom(st)[0][tmpi] = dg_Mom(nst)[0][tmpi];
                    dg_Mom(st)[1][tmpi] = dg_Mom(nst)[1][tmpi];
                    dg_Energy(st)[tmpi] = dg_Energy(nst)[tmpi];
                }
                update_tri_CVs(row_limit_tris[i][j],midsoln,limit_store,rk_step);
            }
        }

        update_buffer_of_test_problems(fr,midsoln,rk_step,limit_store,row_limit_tris,N_alloc,N_row,N_use);
}

EXPORT void Subcell_limiting_soln_with_buffer_tris_multiple_times_rearrange_order(
        Front    *fr,
        Mid_soln *midsoln,
        Limiting_store **limit_store,
        int      rk_step,
        int      HR_times,
        int      check_quadrature,
        int      total_tri)
{
        TRI       *tri, *crsp_tri, *nbtri[3];
        SURFACE   **surf;
        int       dim = 2, i, j, side, tmpi, cv_indx, indx;
        double     *cent;
        size_t    sizest = fr->sizest;
        Locstate  st2;
        float     max_dt;
        TRI       **limit_tris, **row_limit_tris[1500];
        int       N_alloc = 800, N_row, N_use =0, N, Row_alloc = 1500, local_iter_N = 1;
        int       loop_num = 0, detect_extr = NO, comput_time = NO, check_quadr = NO;
        static Locstate Roe_st[3] = {NULL, NULL, NULL},sta;  //Roe mean value
        static float  **L[3], **R[3];
        float     conu[3][4][MAX_N_COEF];  // [side][# eqn][coef]
        float     outcome[4][MAX_N_COEF], old_avg[4], new_avg[4];
        char      s[256];
        int       debug = NO, bigHR_round, update_layer[3] = {NO, YES, YES};
        Locstate  st, nst;

        if(Roe_st[0] == NULL)
        {
             for(i = 0; i < 3; i++)
             {
                 matrix(&L[i], 4, 4, sizeof(float));
                 matrix(&R[i], 4, 4, sizeof(float));
                 g_alloc_state(&Roe_st[i], fr->sizest);
             }
             g_alloc_state(&sta, fr->sizest);
        }
        if(mass_1st_row == NULL)
        {
            vector(&alltri_mass_1st_rows,total_tri,sizeof(Tri_mass_1st_rows));
            vector(&alltri_mass_1st_rows_grp,total_tri,sizeof(Tri_mass_1st_rows));
            for(i = 0; i < total_tri; i++)
            {
                alltri_mass_1st_rows[i].mass_1st_rows[0] = NULL;
                alltri_mass_1st_rows_grp[i].mass_1st_rows[0] = NULL;
            }

            matrix(&mass_1st_row, 1, MAX_N_COEF,sizeof(double));
            // for(i = 0; i < 30; i++)
            //     matrix(&mass_1st_rows[i], 1, MAX_N_COEF,sizeof(double));
            vector(&alltri_HR_sten_2,total_tri,sizeof(Tri_HR_sten));
            vector(&alltri_HR_sten_2_grp,total_tri,sizeof(Tri_HR_sten));
            for(i = 0; i < total_tri; i++)
            {
                alltri_HR_sten_2[i].HR_sten_set_3rd = alltri_HR_sten_2[i].HR_sten_set_2nd =
                alltri_HR_sten_2[i].HR_sten_set_1st = NO;
                alltri_HR_sten_2_grp[i].HR_sten_set_3rd = alltri_HR_sten_2_grp[i].HR_sten_set_2nd =
                alltri_HR_sten_2_grp[i].HR_sten_set_1st = NO;
            }
        }

        // printf("Before identify_limiting_candidates()\n");

        /// locate for HR //TMP
        N_row = identify_limiting_candidates(fr,midsoln,limit_store,rk_step,N_alloc,
                                    Row_alloc,row_limit_tris,&N_use);

        for(bigHR_round = 0;  bigHR_round < 0; bigHR_round++)
        {
            preprocess_P3_times_rearrange_order(fr,midsoln,limit_store,
                  row_limit_tris,N_alloc,N_row,N_use,rk_step,HR_times);
        }

        /// Now update 3rd and 2nd degree terms
        for(bigHR_round = 0;  bigHR_round < 0; bigHR_round++)
        {
            subcell_update_high_degree_terms(fr,midsoln,limit_store,
                  row_limit_tris,N_alloc,N_row,N_use,rk_step,HR_times);
        }
        //// END: update 3rd and 2nd degree degree terms

redo_HR:
        loop_num++;
        if(loop_num == HR_times) check_quadr = YES;

        // printf("Before do HR on big tris to preprocess()\n");
	/*******************************************/
	// do HR on big tris to preprocess
	/*******************************************/
        for(bigHR_round = 0;  bigHR_round < 1; bigHR_round++)
        {
            //// do partial HR for pre_process
            /***
            for(i = 0; i < N_row; i++)
            {
                if(i == N_row-1) N = N_use;
                else N = N_alloc;
                for(j = 0; j < N; j++)
                {
                    for(side = 0; side < 3; side++)
                        nbtri[side] = Tri_on_side(row_limit_tris[i][j],side);
                    limiting_3rd_degreeP3(row_limit_tris[i][j],nbtri,midsoln,rk_step,YES);
                    limiting_2nd_degreeP3(row_limit_tris[i][j],nbtri,midsoln,rk_step,YES);
                }
            }
            for(i = 0; i < N_row; i++)
            {
                if(i == N_row-1) N = N_use;
                else N = N_alloc;
                for(j = 0; j < N; j++)
                {
                    if(rk_step == RK_STEP)
                        st = row_limit_tris[i][j]->st;
                    else
                        st = midsoln[row_limit_tris[i][j]->id].st[rk_step];
                    nst = midsoln[row_limit_tris[i][j]->id].st[0];
                    old_avg[0] = old_avg[1] = old_avg[2] = old_avg[3] = 0.0;
                    new_avg[0] = new_avg[1] = new_avg[2] = new_avg[3] = 0.0;
                    for(tmpi = 3; tmpi < MAX_N_COEF; tmpi++)
                    // for(tmpi = 6; tmpi < MAX_N_COEF; tmpi++)
                    {
                        old_avg[0] += dg_Dens(st)[tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                        old_avg[1] += dg_Mom(st)[0][tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                        old_avg[2] += dg_Mom(st)[1][tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                        old_avg[3] += dg_Energy(st)[tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                        new_avg[0] += dg_Dens(nst)[tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                        new_avg[1] += dg_Mom(nst)[0][tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                        new_avg[2] += dg_Mom(nst)[1][tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                        new_avg[3] += dg_Energy(nst)[tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                    }
                    for(tmpi = 0; tmpi < N_EQN; tmpi++)
                    {
                        old_avg[tmpi] /= row_limit_tris[i][j]->Lmass_matrix[0][0];
                        new_avg[tmpi] /= row_limit_tris[i][j]->Lmass_matrix[0][0];
                    }
                    dg_Dens(st)[0] +=   (old_avg[0] - new_avg[0]);
                    dg_Mom(st)[0][0] += (old_avg[1] - new_avg[1]);
                    dg_Mom(st)[1][0] += (old_avg[2] - new_avg[2]);
                    dg_Energy(st)[0] += (old_avg[3] - new_avg[3]);
                    for(tmpi = 3; tmpi < MAX_N_COEF; tmpi++)
                    // for(tmpi = 6; tmpi < MAX_N_COEF; tmpi++)
                    {
                        dg_Dens(st)[tmpi] = dg_Dens(nst)[tmpi];
                        dg_Mom(st)[0][tmpi] = dg_Mom(nst)[0][tmpi];
                        dg_Mom(st)[1][tmpi] = dg_Mom(nst)[1][tmpi];
                        dg_Energy(st)[tmpi] = dg_Energy(nst)[tmpi];
                    }
                    update_tri_CVs(row_limit_tris[i][j],midsoln,limit_store,rk_step);
                }
            }
            ****/
            /////// END: do partial HR for pre_process

            for(i = 0; i < N_row; i++)
            {
                if(i == N_row-1) N = N_use;
                else N = N_alloc;
                for(j = 0; j < N; j++)
                {
                    for(side = 0; side < 3; side++)
                        nbtri[side] = Tri_on_side(row_limit_tris[i][j],side);
                    if(debugging("char_re"))
                    {
                            // side = find_char_dir(row_limit_tris[i][j],nbtri,midsoln,rk_step);
                            // comput_Roe_ver2(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step,Roe_st,L,R);
                            // convert_cand_conu_charu(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step,L[side]);
                    }
                    /****
                    Subcell_limiting_3rd_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                            midsoln,limit_store,rk_step,YES);
                    Subcell_limiting_2nd_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                            midsoln,limit_store,rk_step,YES);
                    // Subcell_limiting_1st_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                    //    midsoln,limit_store,rk_step,NO,YES,check_quadrature);
                    Subcell_limiting_1st_degreeP3_PNC_2nd_degree_tech(row_limit_tris[i][j],nbtri,
                                midsoln,limit_store,rk_step,NO,YES,NO);
                    ****/

                    pre_process_limiting_P3(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step);

                    // limiting_3rd_degreeP3(row_limit_tris[i][j],nbtri,midsoln,rk_step,YES);
                    // limiting_2nd_degreeP3(row_limit_tris[i][j],nbtri,midsoln,rk_step,NO);
	            // limiting_1st_degreeP3(row_limit_tris[i][j],nbtri,midsoln,rk_step,NO);    
                    // Subcell_limiting_3rd_degreeP3(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step);
                    // Subcell_limiting_2nd_degreeP3(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step);
                    if(debugging("char_re"))
                    {
                        // convert reconstructed char variables to conserv. variables.
                        // convert_con_char(midsoln[row_limit_tris[i][j]->id].st[0], R[side],
                        //                  midsoln[row_limit_tris[i][j]->id].st[0]);
                        // save_reconstruct_st(row_limit_tris[i][j],midsoln,rk_step, conu[side]);
                        // convert candidate char variables to conserv. variables.
                        // convert_cand_conu_charu(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step,R[side]);
                    }
                }
            }
            for(i = 0; i < N_row; i++)
            {
                if(i == N_row-1) N = N_use;
                else N = N_alloc;
                for(j = 0; j < N; j++)
                {
                    update_coef(row_limit_tris[i][j],midsoln,rk_step,fr,fr->sizest,NO);
                    // update_coef_layers(row_limit_tris[i][j],midsoln,rk_step,fr,update_layer);
                    update_tri_CVs(row_limit_tris[i][j],midsoln,limit_store,rk_step);
                }
            }

            update_buffer_of_test_problems(fr,midsoln,rk_step,limit_store,row_limit_tris,N_alloc,N_row,N_use);
        }
	/*******************************************/
	// END: do HR on big tris to preprocess
	/*******************************************/

        /// printf("after doing limiting on big tris only  in limiter\n");

	/*******************************************/
	/*******************************************/
        /// Now update 3rd and 2nd degree terms
        for(bigHR_round = 0;  bigHR_round < 0; bigHR_round++)
        {
            subcell_update_high_degree_terms(fr,midsoln,limit_store,
                  row_limit_tris,N_alloc,N_row,N_use,rk_step,HR_times);
        }
        //// END: update 3rd and 2nd degree degree terms
	/*******************************************/
	/*******************************************/

        /***
        for(i = 0; i < N_row; i++)
        {
            if(i == N_row-1) N = N_use;
            else N = N_alloc;
            for(j = 0; j < N; j++)
            {
                if(row_limit_tris[i][j]->id == 10 ||
                   row_limit_tris[i][j]->id == 14)
                {
                    printf("check tri[%d] state before subcell limiting\n", row_limit_tris[i][j]->id);
                    if(rk_step == RK_STEP)
                       g_verbose_print_state(row_limit_tris[i][j]->st);
                    else
                       g_verbose_print_state(midsoln[row_limit_tris[i][j]->id].st[rk_step]);
                }
            }
        }
        ****/ 

        for(i = 0; i < N_row; i++)
        {
            if(i == N_row-1) N = N_use;
            else N = N_alloc;
            for(j = 0; j < N; j++)
            {
                // TMP
                // printf(" Subcell_limiting_soln_with_buffer_tris_multiple_times, on tri[%d],[%d][%d]\n",
                //      row_limit_tris[i][j]->id, i, j);
                for(side = 0; side < 3; side++)
                    nbtri[side] = Tri_on_side(row_limit_tris[i][j],side);
                if(debugging("char_re"))
                {
		    // side = find_char_dir(row_limit_tris[i][j],nbtri,midsoln,rk_step);
                    comput_Roe_ver2(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step,Roe_st,L,R);

                    /***
                    side = fr->step %3;
                    convert_cand_conu_charu(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step,L[side]);

                    Subcell_limiting_3rd_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                       midsoln,limit_store,rk_step,YES);
                    Subcell_limiting_2nd_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                       midsoln,limit_store,rk_step,NO);
                    Subcell_limiting_1st_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                       midsoln,limit_store,rk_step,NO,NO,NO);

                        // convert reconstructed char variables to conserv. variables.
                    convert_con_char(midsoln[row_limit_tris[i][j]->id].st[0], R[side],
                                     midsoln[row_limit_tris[i][j]->id].st[0]);
                    convert_cand_conu_charu(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step,R[side]);
                    ***/ 
                    for(side = 0; side < 3; side++)
                    {
                        convert_cand_conu_charu(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step,L[side]);

                        Subcell_limiting_3rd_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                               midsoln,limit_store,rk_step,YES);
                        Subcell_limiting_2nd_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                               midsoln,limit_store,rk_step,YES);
                        Subcell_limiting_1st_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                               midsoln,limit_store,rk_step,NO,YES,NO);

                        // convert reconstructed char variables to conserv. variables.
                        convert_con_char(midsoln[row_limit_tris[i][j]->id].st[0], R[side],
                                     midsoln[row_limit_tris[i][j]->id].st[0]);

                        save_reconstruct_st(row_limit_tris[i][j],midsoln,rk_step, conu[side]);
                        convert_cand_conu_charu(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step,R[side]);
                    }

                    // use largest jump direction
                    weno_combine_P3(row_limit_tris[i][j],conu,outcome);
                    save_weno_st_to_midsoln(row_limit_tris[i][j],midsoln,rk_step,outcome);
                    if(N_EQN == 4 && YES == unphysical_st_at_quadrature(row_limit_tris[i][j], 
                                                  midsoln[row_limit_tris[i][j]->id].st[0]))
                        row_limit_tris[i][j]->redo_limiting = YES;
                    else
                        row_limit_tris[i][j]->redo_limiting = NO;
                }
                else
                {
                    if(debugging("old_way"))
                    {
                        if(local_iter_N != 1)
                        {
                            ///// save states before HR
                            if(rk_step == RK_STEP)
                                assign(sta, row_limit_tris[i][j]->st, fr->sizest);
                            else
                                assign(sta, midsoln[row_limit_tris[i][j]->id].st[rk_step], fr->sizest);
                            ///// end:::::save states before HR
                            for(tmpi = 0; tmpi < local_iter_N; tmpi++)
                            {
                                Subcell_limiting_3rd_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                                    midsoln,limit_store,rk_step,YES);

                                Subcell_limiting_2nd_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                                    midsoln,limit_store,rk_step,YES);

                                Subcell_limiting_1st_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                                   midsoln,limit_store,rk_step,NO,YES, (tmpi == local_iter_N-1?check_quadrature:NO));

                                update_coef(row_limit_tris[i][j],midsoln,rk_step,fr,fr->sizest,NO);
                                update_tri_CVs(row_limit_tris[i][j],midsoln,limit_store,rk_step);
                            }
                            ///// restore original (before HR) states
                            if(rk_step == RK_STEP)
                                assign(row_limit_tris[i][j]->st, sta, fr->sizest);
                            else
                                assign(midsoln[row_limit_tris[i][j]->id].st[rk_step], sta, fr->sizest);
                            update_tri_CVs(row_limit_tris[i][j],midsoln,limit_store,rk_step);
                            ///// end:::: restore original (before HR) states
                        }
                        else
                        {
                            ////// TMP
                            /***
                            if(row_limit_tris[i][j]->id == 10 ||
                               row_limit_tris[i][j]->id == 14)
                            {
                                printf("check tri[%d] state before subcell limiting\n", row_limit_tris[i][j]->id);
                                if(rk_step == RK_STEP)
                                    g_verbose_print_state(row_limit_tris[i][j]->st);
                                else
                                    g_verbose_print_state(midsoln[row_limit_tris[i][j]->id].st[rk_step]);
                            }
                            ***/

                            Subcell_limiting_3rd_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                                midsoln,limit_store,rk_step,YES);
                            Subcell_limiting_2nd_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                                midsoln,limit_store,rk_step,YES);
                            Subcell_limiting_1st_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                                midsoln,limit_store,rk_step,NO,YES,check_quadrature);
                            // Subcell_limiting_1st_degreeP3_PNC_2nd_degree_tech(row_limit_tris[i][j],nbtri,
                            //     midsoln,limit_store,rk_step,NO,YES,check_quadrature);
                        }
                        // Subcell_limiting_1st_degreeP3_PNC_2nd_degree_tech(row_limit_tris[i][j],nbtri,
                        //     midsoln,limit_store,rk_step,NO,YES,check_quadrature);
                        // Subcell_limiting_3rd_degreeP3(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step);
                        // Subcell_limiting_2nd_degreeP3(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step);
                    }
                    else
                    {
                        // Subcell_limiting_3rd_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                        //     midsoln,limit_store,rk_step,YES);

                        /***
                        Subcell_limiting_3rd_degreeP3_PNC_grouping(row_limit_tris[i][j],nbtri,
                               midsoln,limit_store,rk_step,YES);
      
                        Subcell_limiting_2nd_degreeP3_PNC_grouping(row_limit_tris[i][j],nbtri,
                               midsoln,limit_store,rk_step,YES);

                        Subcell_limiting_1st_degreeP3_PNC_grouping(row_limit_tris[i][j],nbtri,
                               midsoln,limit_store,rk_step,NO,YES,YES);
                        ***/

                        ///// save states before HR
                        if(rk_step == RK_STEP)
                            assign(sta, row_limit_tris[i][j]->st, fr->sizest);
                        else
                            assign(sta, midsoln[row_limit_tris[i][j]->id].st[rk_step], fr->sizest);
                        ///// end:::::save states before HR
                        for(tmpi = 0; tmpi < local_iter_N; tmpi++)
                        {
                            Subcell_limiting_3rd_degreeP3_PNC_grouping(row_limit_tris[i][j],nbtri,
                               midsoln,limit_store,rk_step,YES);

                            Subcell_limiting_2nd_degreeP3_PNC_grouping(row_limit_tris[i][j],nbtri,
                               midsoln,limit_store,rk_step,YES);

                            Subcell_limiting_1st_degreeP3_PNC_grouping(row_limit_tris[i][j],nbtri,
                               midsoln,limit_store,rk_step,NO,YES,(tmpi==local_iter_N-1? YES:NO));

                            update_coef(row_limit_tris[i][j],midsoln,rk_step,fr,fr->sizest,NO);
                            update_tri_CVs(row_limit_tris[i][j],midsoln,limit_store,rk_step);
                        }
                        ///// restore original (before HR) states
                        if(rk_step == RK_STEP)
                            assign(row_limit_tris[i][j]->st, sta, fr->sizest);
                        else
                            assign(midsoln[row_limit_tris[i][j]->id].st[rk_step], sta, fr->sizest);
                        update_tri_CVs(row_limit_tris[i][j],midsoln,limit_store,rk_step);
                        ///// end:::: restore original (before HR) states
                    }
                }
            }
        }

        if(loop_num == HR_times && rk_step == RK_STEP)
            comput_time = YES;

        /// printf("before fix_unphysical_st() in limiter\n");

        if(loop_num == HR_times)
        {
            for(i = 0; i < N_row; i++)
            {
                if(i == N_row-1) N = N_use;
                else N = N_alloc;
                for(j = 0; j < N; j++)
                {
                    if(row_limit_tris[i][j]->redo_limiting == YES)
                         fix_unphysical_st(row_limit_tris[i][j],midsoln,0,fr);
                }
            }
        }

        for(i = 0; i < N_row; i++)
        {
            if(i == N_row-1) N = N_use;
            else N = N_alloc;
            for(j = 0; j < N; j++)
            {
                update_coef(row_limit_tris[i][j],midsoln,rk_step,fr,fr->sizest,comput_time);
                update_tri_CVs(row_limit_tris[i][j],midsoln,limit_store,rk_step);
            }
        }

        if(loop_num < HR_times)
        {
            update_buffer_of_test_problems(fr,midsoln,rk_step,limit_store,row_limit_tris,N_alloc,N_row,N_use);
            goto redo_HR;
        }

        for(i = 0; i < N_row; i++)
            free(row_limit_tris[i]);

        ////////////////////// TMP
        debug_flag = NO;
}

EXPORT void Subcell_limiting_soln_with_buffer_tris_multiple_times_local_HR_MHD(
        Front    *fr,
        Mid_soln *midsoln,
        Limiting_store **limit_store,
        int      rk_step,
        int      HR_times,
        int      check_quadrature,
        int      total_tri)
{
        TRI       *tri, *crsp_tri, *nbtri[3], *sten_tri[30], *sten_sets[20][3];
        SURFACE   **surf;
        int       dim = 2, i, j, side, tmpi, cv_indx, indx, sten_tri_num;
        double     *cent, max_vz = -1.0e10, max_Bz = -1.0e10;
        size_t    sizest = fr->sizest;
        Locstate  st2;
        float     max_dt;
        TRI       **limit_tris, **row_limit_tris[1500];
        int       N_alloc = 800, N_row, N_use =0, N, Row_alloc = 1500, local_iter_N = 1;
        int       loop_num = 0, detect_extr = NO, comput_time = NO, check_quadr = NO;
        static Locstate Roe_st[3] = {NULL, NULL, NULL},sta;  //Roe mean value
        static float  **L[3], **R[3];
        float     conu[3][4][MAX_N_COEF];  // [side][# eqn][coef]
        float     outcome[4][MAX_N_COEF], old_avg[4], new_avg[4];
        char      s[256];
        int       debug = NO, bigHR_round, update_layer[3] = {NO, YES, YES}, N_sten;
        Locstate  st, nst;

        if(Roe_st[0] == NULL)
        {
             for(i = 0; i < 3; i++)
             {
                 matrix(&L[i], 4, 4, sizeof(float));
                 matrix(&R[i], 4, 4, sizeof(float));
                 g_alloc_state(&Roe_st[i], fr->sizest);
             }
             g_alloc_state(&sta, fr->sizest);
        }

        if(mass_1st_row == NULL)
        {
            vector(&alltri_mass_1st_rows,total_tri,sizeof(Tri_mass_1st_rows));
            vector(&alltri_mass_1st_rows_grp,total_tri,sizeof(Tri_mass_1st_rows));
            for(i = 0; i < total_tri; i++)
            {
                alltri_mass_1st_rows[i].mass_1st_rows[0] = NULL;
                alltri_mass_1st_rows[i].mass_1st_rows_ppcell[0] = NULL;
                alltri_mass_1st_rows_grp[i].mass_1st_rows[0] = NULL;
            }

            matrix(&mass_1st_row, 1, MAX_N_COEF,sizeof(double));
            // for(i = 0; i < 30; i++)
            //     matrix(&mass_1st_rows[i], 1, MAX_N_COEF,sizeof(double));
            vector(&alltri_HR_sten_2,total_tri,sizeof(Tri_HR_sten));
            vector(&alltri_HR_sten_2_grp,total_tri,sizeof(Tri_HR_sten));
            for(i = 0; i < total_tri; i++)
            {
                alltri_HR_sten_2[i].HR_sten_set_3rd = alltri_HR_sten_2[i].HR_sten_set_2nd =
                alltri_HR_sten_2[i].HR_sten_set_1st = alltri_HR_sten_2[i].HR_sten_set_1st_pt = NO;
                alltri_HR_sten_2_grp[i].HR_sten_set_3rd = alltri_HR_sten_2_grp[i].HR_sten_set_2nd =
                alltri_HR_sten_2_grp[i].HR_sten_set_1st = alltri_HR_sten_2_grp[i].HR_sten_set_1st_pt = NO;
                alltri_HR_sten_2[i].WENO_cent_set = alltri_HR_sten_2[i].WENO_side_set[0] =
                alltri_HR_sten_2[i].WENO_side_set[1] = alltri_HR_sten_2[i].WENO_side_set[2] =
                alltri_HR_sten_2[i].WENO_rev_set[0] = alltri_HR_sten_2[i].WENO_rev_set[1] =
                alltri_HR_sten_2[i].WENO_rev_set[2] = NO;
            }
        }

        //// Do HR for all tris.
        int  do_HR_for_all = YES;
        if(do_HR_for_all != YES)
        {
            N_row = identify_limiting_candidates(fr,midsoln,limit_store,rk_step,N_alloc,
                                        Row_alloc,row_limit_tris,&N_use);
            // printf("After identify_limiting_candidates() N_row=%d\n",N_row);fflush(stdout); 
            // printf("L4654 N=%d N_row=%d N_use=%d N_alloc=%d\n",N,N_row,N_use,N_alloc);fflush(stdout);
        }
        else
        {
            ////// Start: collect all tris.
            vector(&limit_tris, N_alloc, sizeof(TRI*));
            row_limit_tris[0] = limit_tris;
            N_row = 1;

            for(surf = fr->mesh->surfaces; surf && *surf; surf++)
            {
                for (tri = first_tri(*surf);
                 !at_end_of_tri_list(tri,*surf); tri = tri->next)
                {
                    cent = fg_centroid(tri);
                    if(tri->BC_type == SUBDOMAIN)
                        continue;

                    if(Boundary_tri(tri) || tri_on_phy_bdry(tri))
                    {
                        if(debugging("g_sine"))
                            attach_g_sine_buffer_states_ver2(fr,midsoln,rk_step,tri,limit_store);
                        else if(debugging("Burgers"))
                        {
                            if(!debugging("diag_tri"))
                                attach_Burgers_buffer_states_ver2(fr,midsoln,rk_step,tri,limit_store);
                        }
                    }
                    limit_tris[N_use] = tri;
                    N_use++;
                    if(N_use == N_alloc)
                    {
                        if(N_row +1 >= Row_alloc)
                        {
                            printf("ERROR: Subcell_limiting_soln_with_buffer_tris_multiple_times_local_HR_MHD,"
                                    " exceed alloc. limit\n");
                            clean_up(ERROR);
                        }
                        vector(&limit_tris, N_alloc, sizeof(TRI*));
                        row_limit_tris[N_row] = limit_tris;
                        N_row++;
                        N_use = 0;
                    }
                }
            }
            ////// END: collect all tris.
        }
        ////// END: collect all tris.

redo_HR:
        loop_num++;
        // HR_times = 1
        if(loop_num == HR_times) check_quadr = YES;

        for(i = 0; i < N_row; i++)
        {
            // printf("L4709 N=%d N_row=%d N_use=%d N_alloc=%d\n",N,N_row,N_use,N_alloc);fflush(stdout);
            if(i == N_row-1) N = N_use;
            else N = N_alloc;
            for(j = 0; j < N; j++)
            {
                // TMP
                // printf(" Subcell_limiting_soln_with_buffer_tris_multiple_times, on tri[%d],[%d][%d]\n",
                //          row_limit_tris[i][j]->id, i, j);
                for(side = 0; side < 3; side++)
                    nbtri[side] = Tri_on_side(row_limit_tris[i][j],side);

                            if(10 == MAX_N_COEF)
                            {
                                // set_HR_sten(nbtri, row_limit_tris[i][j],sten_tri,&sten_tri_num);
                                // limiting_3rd_degreeP3(row_limit_tris[i][j],sten_tri,sten_tri_num, midsoln,rk_step,YES);
                                // limiting_2nd_degreeP3(row_limit_tris[i][j],sten_tri,sten_tri_num, midsoln,rk_step,YES);
                                // limiting_1st_degreeP3(row_limit_tris[i][j],sten_tri,sten_tri_num, midsoln,rk_step,NO);
                                Subcell_limiting_3rd_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                                        midsoln,limit_store,rk_step,YES);
                                Subcell_limiting_2nd_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                                        midsoln,limit_store,rk_step,YES);
                                Subcell_limiting_1st_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                                        midsoln,limit_store,rk_step,NO,YES,NO);
                            }
                            else if(6 == MAX_N_COEF)
                            {
                                // printf("before set_HR_sten()\n");fflush(stdout);
                                int flag_sten_type = 1;
                                if(flag_sten_type == 1)
                                {
                                    //// sten 1
                                    set_HR_sten(nbtri, row_limit_tris[i][j],sten_tri,&sten_tri_num);
                                }
                                else
                                {
                                    //// sten 2
                                    // all_neighboring_cells(row_limit_tris[i][j],sten_tri,&sten_tri_num); // OLD
                                    all_neighboring_cells_ver2(row_limit_tris[i][j],sten_tri,&sten_tri_num); // Huijing 0514
                                    
                                    // printf("before limiting_2nd_degree()\n");fflush(stdout);
                                    // print_tri_crds(row_limit_tris[i][j]);
                                    // printf("print stencil tris for tri %d\n", row_limit_tris[i][j]->id);
                               
                                    // for(side = 0; side < sten_tri_num; side++)
                                    //     print_tri_crds(sten_tri[side]);
                                    // clean_up(0);

                                    // for(int kk=0; kk<sten_tri_num; kk++)
                                    //     printf("%d: tri[%d]\n",kk,sten_tri[kk]->id);
                                }

                                // limiting_2nd_degree(row_limit_tris[i][j], sten_tri, sten_tri_num, midsoln,rk_step);
                                // limiting_1st_degree(row_limit_tris[i][j], sten_tri, sten_tri_num, midsoln,rk_step);
                                Subcell_limiting_2nd_degreeP2(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step);
                                Subcell_limiting_1st_degreeP2(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step);
                                // printf("before limiting_1st_degree()\n");fflush(stdout);
                                // Subcell_limiting_1st_degreeP2_multi_pt_limit(row_limit_tris[i][j],nbtri,
                                //     midsoln,limit_store,rk_step);
                            }
                            else if(3 == MAX_N_COEF)
                            {
                                set_HR_sten(nbtri,row_limit_tris[i][j],sten_tri,&sten_tri_num); //sten_tri structure

                                N_sten = 0;
                                for(side = 0; side < sten_tri_num; side++)
                                {
                                    sten_sets[N_sten][0] = row_limit_tris[i][j];
                                    sten_sets[N_sten][1] = sten_tri[side];
                                    sten_sets[N_sten][2] = sten_tri[(side+1)%sten_tri_num];
                                    N_sten++;
                                }
                                // set_WENO_combined_sten() uses 7 stencils.
                                // set_WENO_combined_sten(row_limit_tris[i][j],sten_sets,&N_sten);

                                // Subcell_limiting_1st_degreeP1(row_limit_tris[i][j],nbtri,midsoln,rk_step);
                                // limiting_1st_degreeP1(row_limit_tris[i][j],nbtri,midsoln,rk_step);
                                // Subcell_limiting_1st_degreeP1_MHD_FV(row_limit_tris[i][j],nbtri,midsoln,rk_step,fr);
                                // limiting_1st_degreeP1_MHD_FV(row_limit_tris[i][j],nbtri,midsoln,rk_step,fr);//OLD
                                limiting_1st_degreeP1_MHD(row_limit_tris[i][j],sten_sets, 
                                                         N_sten, midsoln,rk_step,fr);//NEW Huijing
                                // clean_up(ERROR);
                            }
                            else
                            {
                                printf("ERROR: Subcell_limiting_soln_with_buffer_tris_multiple_times_local_HR_MHD(),"
                                       " implement max_n_coef = %d\n", MAX_N_COEF);
                                clean_up(ERROR);
                            }

            }
        }

        if(loop_num == HR_times && rk_step == RK_STEP)
            comput_time = YES;

        // should be used after updating all the variables
        if(loop_num == HR_times)
        {
            for(i = 0; i < N_row; i++)
            {
                if(i == N_row-1) N = N_use;
                else N = N_alloc;
                for(j = 0; j < N; j++)
                {
                    if(row_limit_tris[i][j]->redo_limiting == YES)
                        fix_unphysical_st(row_limit_tris[i][j],midsoln,0,fr);
                }
            }
        }

        /// update st from st2
        for(i = 0; i < N_row; i++)
        {
            if(i == N_row-1) N = N_use;
            else N = N_alloc;
            for(j = 0; j < N; j++)
            {
                if(row_limit_tris[i][j]->BC_type == SUBDOMAIN)
                    continue;

                update_coef(row_limit_tris[i][j],midsoln,rk_step,fr,fr->sizest,comput_time);
                // update_tri_CVs(row_limit_tris[i][j],midsoln,limit_store,rk_step);
            }
        }

        if(loop_num < HR_times)
        {
            // update_buffer_of_test_problems(fr,midsoln,rk_step,limit_store,row_limit_tris,N_alloc,N_row,N_use);
            printf("ERROR: Subcell_limiting_soln_with_buffer_tris_multiple_times_local_HR_MHD()\n");
            printf("Implement buffer update for MHD \n");
            clean_up(ERROR);
            goto redo_HR;
        }


        if(debugging("decay_alfven") && fr->step % 80 == 0 && rk_step == RK_STEP)
        {
            for(surf = fr->mesh->surfaces; surf && *surf; surf++)
            {
                for (tri = first_tri(*surf);
                     !at_end_of_tri_list(tri,*surf); tri = tri->next)
                {
                    if(tri->BC_type == SUBDOMAIN)
                        continue;
                    if(fabs( Mag(tri->st)[2]) >= max_Bz)
                        max_Bz = fabs( Mag(tri->st)[2]);

                    if(fabs( Mom(tri->st)[2]/Dens(tri->st)) >= max_vz)
                        max_vz = fabs(Mom(tri->st)[2]/Dens(tri->st));
                }
            }

            printf("At time = %g max_Vz = %g max_Bz = %g log10_max_Vz = %g\n", 
                   fr->time, max_vz, max_Bz, log10(max_vz));
        }

        for(i = 0; i < N_row; i++)
            free(row_limit_tris[i]);

        ////////////////////// TMP
        debug_flag = NO;
}


LOCAL void all_neighboring_cells_ver2(
         TRI       *tri,
         TRI       *sten_tris[],
         int       *nn_num)
{
         TRI       *crsp_tri, *tmptri, *new_cand[3][60], *Nbtri[3];
         int       side, N_between[3], N_cells = 0, i;
         double    *cent;

         /****
         cent = fg_centroid(tri);
         printf("tri[%d] cent(%g %g) : points(%g %g)(%g %g)(%g %g)\n",tri->id, cent[0], cent[1], 
                Coords(Point_of_tri(tri)[0])[0],Coords(Point_of_tri(tri)[0])[1],
                Coords(Point_of_tri(tri)[1])[0],Coords(Point_of_tri(tri)[1])[1],
                Coords(Point_of_tri(tri)[2])[0],Coords(Point_of_tri(tri)[2])[1]);
         printf("nbtri[%d][%d][%d]\n",Tri_on_side(tri,0)->id,Tri_on_side(tri,1)->id,Tri_on_side(tri,2)->id);
         ****/

         for(side = 0; side < 3; side++)
         {
             //tris_between_edge_neighbrs(tri, NULL, side, new_cand[side], &N_between[side]); // OLD clockwise
             tris_between_edge_neighbrs_ver2(tri, NULL, side, new_cand[side], &N_between[side]); // NEW counter-clockwise

             /****
             printf("%d tris between edges\n",N_between[side]);
             cent = fg_centroid(Tri_on_side(tri,0));
             printf("nbtri0: (%g %g)\n",cent[0], cent[1]);
             cent = fg_centroid(Tri_on_side(tri,1));
             printf("nbtri1: (%g %g)\n",cent[0], cent[1]);
             cent = fg_centroid(Tri_on_side(tri,2));
             printf("nbtri2: (%g %g)\n",cent[0], cent[1]);
             for(i = 0; i < N_between[side]; i++)
             {
                  cent = fg_centroid(new_cand[side][i]);
                  printf("tri[%d] (%g %g)\n",new_cand[side][i]->id,cent[0], cent[1]);
             }
             ****/

             for(i = 0; i < N_between[side]; i++)
             {
                 sten_tris[N_cells] = new_cand[side][i];
                 N_cells++;
             }
             //sten_tris[N_cells] = Tri_on_side(tri,side); // OLD clockwise
             sten_tris[N_cells] = Tri_on_side(tri,(side+1)%3); // NEW counter-clockwise
             N_cells++;
         }

         /****
         printf("\nall together:\n");
         for(i = 0; i < N_cells; i++)
         {
             cent = fg_centroid(sten_tris[i]);
             printf("tri[%d] (%g %g)\n",sten_tris[i]->id,cent[0],cent[1]);
         }
         printf("%d cell returned\n",N_cells);
         ****/
         *nn_num = N_cells;
}


EXPORT void Subcell_limiting_soln_with_buffer_tris_multiple_times_local_HR(
        Front    *fr,
        Mid_soln *midsoln,
        Limiting_store **limit_store,
        int      rk_step,
        int      HR_times,
        int      check_quadrature,
        int      total_tri)
{
        TRI       *tri, *crsp_tri, *nbtri[3];
        SURFACE   **surf;
        int       dim = 2, i, j, side, tmpi, cv_indx, indx;
        double     *cent;
        size_t    sizest = fr->sizest;
        Locstate  st2;
        float     max_dt;
        TRI       **limit_tris, **row_limit_tris[1500];
        int       N_alloc = 800, N_row, N_use =0, N, Row_alloc = 1500, local_iter_N = 1;
        int       loop_num = 0, detect_extr = NO, comput_time = NO, check_quadr = NO;
        static Locstate Roe_st[3] = {NULL, NULL, NULL},sta;  //Roe mean value
        static float  **L[3], **R[3];
        float     conu[3][4][MAX_N_COEF];  // [side][# eqn][coef]
        float     outcome[4][MAX_N_COEF], old_avg[4], new_avg[4];
        char      s[256];
        int       debug = NO, bigHR_round, update_layer[3] = {NO, YES, YES};
        Locstate  st, nst;

        if(Roe_st[0] == NULL)
        {
             for(i = 0; i < 3; i++)
             {
                 matrix(&L[i], 4, 4, sizeof(float));
                 matrix(&R[i], 4, 4, sizeof(float));
                 g_alloc_state(&Roe_st[i], fr->sizest);
             }
             g_alloc_state(&sta, fr->sizest);
        }
        if(mass_1st_row == NULL)
        {
            vector(&alltri_mass_1st_rows,total_tri,sizeof(Tri_mass_1st_rows));
            vector(&alltri_mass_1st_rows_grp,total_tri,sizeof(Tri_mass_1st_rows));
            for(i = 0; i < total_tri; i++)
            {
                alltri_mass_1st_rows[i].mass_1st_rows[0] = NULL;
                alltri_mass_1st_rows[i].mass_1st_rows_ppcell[0] = NULL;
                alltri_mass_1st_rows_grp[i].mass_1st_rows[0] = NULL;
            }

            matrix(&mass_1st_row, 1, MAX_N_COEF,sizeof(double));
            // for(i = 0; i < 30; i++)
            //     matrix(&mass_1st_rows[i], 1, MAX_N_COEF,sizeof(double));
            vector(&alltri_HR_sten_2,total_tri,sizeof(Tri_HR_sten));
            vector(&alltri_HR_sten_2_grp,total_tri,sizeof(Tri_HR_sten));
            for(i = 0; i < total_tri; i++)
            {
                alltri_HR_sten_2[i].HR_sten_set_3rd = alltri_HR_sten_2[i].HR_sten_set_2nd =
                alltri_HR_sten_2[i].HR_sten_set_1st = alltri_HR_sten_2[i].HR_sten_set_1st_pt = NO;
                alltri_HR_sten_2_grp[i].HR_sten_set_3rd = alltri_HR_sten_2_grp[i].HR_sten_set_2nd =
                alltri_HR_sten_2_grp[i].HR_sten_set_1st = alltri_HR_sten_2_grp[i].HR_sten_set_1st_pt = NO;
            }
        }

        // printf("Before identify_limiting_candidates()\n");

        N_row = identify_limiting_candidates(fr,midsoln,limit_store,rk_step,N_alloc,
                                    Row_alloc,row_limit_tris,&N_use);

        for(bigHR_round = 0;  bigHR_round < 0; bigHR_round++)
        {
            preprocess_P3_times_rearrange_order(fr,midsoln,limit_store,
                  row_limit_tris,N_alloc,N_row,N_use,rk_step,HR_times);
        }

        /// Now update 3rd and 2nd degree terms
        for(bigHR_round = 0;  bigHR_round < 0; bigHR_round++)
        {
            subcell_update_high_degree_terms(fr,midsoln,limit_store,
                  row_limit_tris,N_alloc,N_row,N_use,rk_step,HR_times);
        }
        //// END: update 3rd and 2nd degree degree terms

redo_HR:
        loop_num++;
        if(loop_num == HR_times) check_quadr = YES;

        // printf("Before do HR on big tris to preprocess()\n");
	/*******************************************/
	// do HR on big tris to preprocess
	/*******************************************/
        for(bigHR_round = 0;  bigHR_round < 0; bigHR_round++)
        {
            for(i = 0; i < N_row; i++)
            {
                if(i == N_row-1) N = N_use;
                else N = N_alloc;
                for(j = 0; j < N; j++)
                {
                    for(side = 0; side < 3; side++)
                        nbtri[side] = Tri_on_side(row_limit_tris[i][j],side);
                    if(debugging("char_re"))
                    {
                            // side = find_char_dir(row_limit_tris[i][j],nbtri,midsoln,rk_step);
                            // comput_Roe_ver2(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step,Roe_st,L,R);
                            // convert_cand_conu_charu(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step,L[side]);
                    }

                    pre_process_limiting_P3(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step);

                    // limiting_3rd_degreeP3(row_limit_tris[i][j],nbtri,midsoln,rk_step,YES);
                    // limiting_2nd_degreeP3(row_limit_tris[i][j],nbtri,midsoln,rk_step,NO);
	            // limiting_1st_degreeP3(row_limit_tris[i][j],nbtri,midsoln,rk_step,NO);    
                    // Subcell_limiting_3rd_degreeP3(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step);
                    // Subcell_limiting_2nd_degreeP3(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step);
                    if(debugging("char_re"))
                    {
                        // convert reconstructed char variables to conserv. variables.
                        // convert_con_char(midsoln[row_limit_tris[i][j]->id].st[0], R[side],
                        //                  midsoln[row_limit_tris[i][j]->id].st[0]);
                        // save_reconstruct_st(row_limit_tris[i][j],midsoln,rk_step, conu[side]);
                        // convert candidate char variables to conserv. variables.
                        // convert_cand_conu_charu(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step,R[side]);
                    }
                }
            }
            for(i = 0; i < N_row; i++)
            {
                if(i == N_row-1) N = N_use;
                else N = N_alloc;
                for(j = 0; j < N; j++)
                {
                    update_coef(row_limit_tris[i][j],midsoln,rk_step,fr,fr->sizest,NO);
                    // update_coef_layers(row_limit_tris[i][j],midsoln,rk_step,fr,update_layer);
                    update_tri_CVs(row_limit_tris[i][j],midsoln,limit_store,rk_step);
                }
            }

            update_buffer_of_test_problems(fr,midsoln,rk_step,limit_store,row_limit_tris,N_alloc,N_row,N_use);
        }
	/*******************************************/
	// END: do HR on big tris to preprocess
	/*******************************************/

        /// printf("after doing limiting on big tris only  in limiter\n");

	/*******************************************/
	/*******************************************/
        /// Now update 3rd and 2nd degree terms
        for(bigHR_round = 0;  bigHR_round < 0; bigHR_round++)
        {
            subcell_update_high_degree_terms(fr,midsoln,limit_store,
                  row_limit_tris,N_alloc,N_row,N_use,rk_step,HR_times);
        }
        //// END: update 3rd and 2nd degree degree terms
	/*******************************************/
	/*******************************************/

        for(i = 0; i < N_row; i++)
        {
            if(i == N_row-1) N = N_use;
            else N = N_alloc;
            for(j = 0; j < N; j++)
            {
                // TMP
                // printf(" Subcell_limiting_soln_with_buffer_tris_multiple_times, on tri[%d],[%d][%d]\n",
                //      row_limit_tris[i][j]->id, i, j);
                for(side = 0; side < 3; side++)
                    nbtri[side] = Tri_on_side(row_limit_tris[i][j],side);
                if(debugging("char_re"))
                {
		    // side = find_char_dir(row_limit_tris[i][j],nbtri,midsoln,rk_step);
                    comput_Roe_ver2(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step,Roe_st,L,R);

                    /***
                    side = fr->step %3;
                    convert_cand_conu_charu(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step,L[side]);

                    Subcell_limiting_3rd_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                       midsoln,limit_store,rk_step,YES);
                    Subcell_limiting_2nd_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                       midsoln,limit_store,rk_step,NO);
                    Subcell_limiting_1st_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                       midsoln,limit_store,rk_step,NO,NO,NO);

                        // convert reconstructed char variables to conserv. variables.
                    convert_con_char(midsoln[row_limit_tris[i][j]->id].st[0], R[side],
                                     midsoln[row_limit_tris[i][j]->id].st[0]);
                    convert_cand_conu_charu(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step,R[side]);
                    ***/ 
                    for(side = 0; side < 3; side++)
                    {
                        convert_cand_conu_charu(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step,L[side]);

                        Subcell_limiting_3rd_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                               midsoln,limit_store,rk_step,YES);
                        Subcell_limiting_2nd_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                               midsoln,limit_store,rk_step,YES);
                        Subcell_limiting_1st_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                               midsoln,limit_store,rk_step,NO,YES,NO);

                        // convert reconstructed char variables to conserv. variables.
                        convert_con_char(midsoln[row_limit_tris[i][j]->id].st[0], R[side],
                                     midsoln[row_limit_tris[i][j]->id].st[0]);

                        save_reconstruct_st(row_limit_tris[i][j],midsoln,rk_step, conu[side]);
                        convert_cand_conu_charu(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step,R[side]);
                    }

                    // use largest jump direction
                    weno_combine_P3(row_limit_tris[i][j],conu,outcome);
                    save_weno_st_to_midsoln(row_limit_tris[i][j],midsoln,rk_step,outcome);
                    if(N_EQN == 4 && YES == unphysical_st_at_quadrature(row_limit_tris[i][j], 
                                                  midsoln[row_limit_tris[i][j]->id].st[0]))
                        row_limit_tris[i][j]->redo_limiting = YES;
                    else
                        row_limit_tris[i][j]->redo_limiting = NO;
                }
                else
                {
                    if(debugging("old_way"))
                    {
                        if(local_iter_N != 1)
                        {
                            ///// save states before HR
                            if(rk_step == RK_STEP)
                                assign(sta, row_limit_tris[i][j]->st, fr->sizest);
                            else
                                assign(sta, midsoln[row_limit_tris[i][j]->id].st[rk_step], fr->sizest);
                            ///// end:::::save states before HR
                            for(tmpi = 0; tmpi < local_iter_N; tmpi++)
                            {
                                Subcell_limiting_3rd_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                                    midsoln,limit_store,rk_step,YES);

                                // Subcell_limiting_2nd_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                                //     midsoln,limit_store,rk_step,YES);
                                Subcell_limiting_2nd_degreeP3_PNC_multi_pt_limit(row_limit_tris[i][j],nbtri,
                                       midsoln,limit_store,rk_step,YES);

                                Subcell_limiting_1st_degreeP3_PNC_multi_pt_limit(row_limit_tris[i][j],nbtri,
                                       midsoln,limit_store,rk_step,NO,YES,(tmpi == local_iter_N-1?check_quadrature:NO));
                                /***
                                if(tmpi != local_iter_N-1)
                                    Subcell_limiting_1st_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                                       midsoln,limit_store,rk_step,NO,YES, (tmpi == local_iter_N-1?check_quadrature:NO));
                                else
                                    Subcell_limiting_1st_degreeP3_PNC_pt_limit(row_limit_tris[i][j],nbtri,
                                       midsoln,limit_store,rk_step,NO,YES, (tmpi == local_iter_N-1?check_quadrature:NO));
                                ***/

                                update_coef(row_limit_tris[i][j],midsoln,rk_step,fr,fr->sizest,NO);
                                update_tri_CVs(row_limit_tris[i][j],midsoln,limit_store,rk_step);
                            }
                            ///// restore original (before HR) states
                            if(rk_step == RK_STEP)
                                assign(row_limit_tris[i][j]->st, sta, fr->sizest);
                            else
                                assign(midsoln[row_limit_tris[i][j]->id].st[rk_step], sta, fr->sizest);
                            update_tri_CVs(row_limit_tris[i][j],midsoln,limit_store,rk_step);
                            ///// end:::: restore original (before HR) states
                        }
                        else
                        {
                            if(10 == MAX_N_COEF)
                            {
                                ////// TMP
                                /***
                                if(row_limit_tris[i][j]->id == 10 ||
                                   row_limit_tris[i][j]->id == 14)
                                {
                                    printf("check tri[%d] state before subcell limiting\n", row_limit_tris[i][j]->id);
                                    if(rk_step == RK_STEP)
                                        g_verbose_print_state(row_limit_tris[i][j]->st);
                                    else
                                            g_verbose_print_state(midsoln[row_limit_tris[i][j]->id].st[rk_step]);
                                }
                                ***/

                                limiting_3rd_degreeP3(row_limit_tris[i][j],nbtri,3,midsoln,rk_step,YES);
                                /// Subcell_limiting_3rd_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                                ///     midsoln,limit_store,rk_step,YES);
    
                                // limiting_2nd_degreeP3(row_limit_tris[i][j],nbtri,midsoln,rk_step,YES);
                                limiting_2nd_degreeP3_multi_pt(row_limit_tris[i][j],nbtri,midsoln,rk_step,YES);
                                // Subcell_limiting_2nd_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                                //     midsoln,limit_store,rk_step,YES);
                                // Subcell_limiting_2nd_degreeP3_PNC_multi_pt_limit(row_limit_tris[i][j],nbtri,
                                //     midsoln,limit_store,rk_step,YES);

                                // Subcell_limiting_1st_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                                //     midsoln,limit_store,rk_step,NO,YES,check_quadrature);
                                // Subcell_limiting_1st_degreeP3_PNC_2nd_degree_tech(row_limit_tris[i][j],nbtri,
                                //     midsoln,limit_store,rk_step,NO,YES,check_quadrature);
                                // Subcell_limiting_1st_degreeP3_PNC_pt_limit(row_limit_tris[i][j],nbtri,
                                //            midsoln,limit_store,rk_step,NO,YES, check_quadrature);
                                Subcell_limiting_1st_degreeP3_PNC_multi_pt_limit(row_limit_tris[i][j],nbtri,
                                    midsoln,limit_store,rk_step,NO,YES,check_quadr);
                            }
                            else if(6 == MAX_N_COEF)
                            {
                                limiting_2nd_degree(row_limit_tris[i][j],nbtri,3,midsoln,rk_step);
                                // Subcell_limiting_2nd_degreeP2(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step);
                                // Subcell_limiting_1st_degreeP2(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step);
                                Subcell_limiting_1st_degreeP2_multi_pt_limit(row_limit_tris[i][j],nbtri,
                                    midsoln,limit_store,rk_step);
                            }
                            else if(3 == MAX_N_COEF)
                            {
                                limiting_1st_degreeP1(row_limit_tris[i][j],nbtri,midsoln,rk_step);
                            }
                            else
                            {
                                printf("ERROR: Subcell_limiting_soln_with_buffer_tris_multiple_times_local_HR(), implement max_n_coef = %d\n", MAX_N_COEF);
                                clean_up(ERROR);
                            }
                        }
                        // Subcell_limiting_1st_degreeP3_PNC_2nd_degree_tech(row_limit_tris[i][j],nbtri,
                        //     midsoln,limit_store,rk_step,NO,YES,check_quadrature);
                        // Subcell_limiting_3rd_degreeP3(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step);
                        // Subcell_limiting_2nd_degreeP3(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step);
                    } /// END: if(debugging("old_way"))
                    else
                    {
                        // Subcell_limiting_3rd_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                        //     midsoln,limit_store,rk_step,YES);

                        /***
                        Subcell_limiting_3rd_degreeP3_PNC_grouping(row_limit_tris[i][j],nbtri,
                               midsoln,limit_store,rk_step,YES);
      
                        Subcell_limiting_2nd_degreeP3_PNC_grouping(row_limit_tris[i][j],nbtri,
                               midsoln,limit_store,rk_step,YES);

                        Subcell_limiting_1st_degreeP3_PNC_grouping(row_limit_tris[i][j],nbtri,
                               midsoln,limit_store,rk_step,NO,YES,YES);
                        ***/

                        ///// save states before HR
                        if(rk_step == RK_STEP)
                            assign(sta, row_limit_tris[i][j]->st, fr->sizest);
                        else
                            assign(sta, midsoln[row_limit_tris[i][j]->id].st[rk_step], fr->sizest);
                        ///// end:::::save states before HR
                        for(tmpi = 0; tmpi < local_iter_N; tmpi++)
                        {
                            Subcell_limiting_3rd_degreeP3_PNC_grouping(row_limit_tris[i][j],nbtri,
                               midsoln,limit_store,rk_step,YES);

                            Subcell_limiting_2nd_degreeP3_PNC_grouping(row_limit_tris[i][j],nbtri,
                               midsoln,limit_store,rk_step,YES);

                            Subcell_limiting_1st_degreeP3_PNC_grouping(row_limit_tris[i][j],nbtri,
                               midsoln,limit_store,rk_step,NO,YES,(tmpi==local_iter_N-1? YES:NO));

                            update_coef(row_limit_tris[i][j],midsoln,rk_step,fr,fr->sizest,NO);
                            update_tri_CVs(row_limit_tris[i][j],midsoln,limit_store,rk_step);
                        }
                        ///// restore original (before HR) states
                        if(rk_step == RK_STEP)
                            assign(row_limit_tris[i][j]->st, sta, fr->sizest);
                        else
                            assign(midsoln[row_limit_tris[i][j]->id].st[rk_step], sta, fr->sizest);
                        update_tri_CVs(row_limit_tris[i][j],midsoln,limit_store,rk_step);
                        ///// end:::: restore original (before HR) states
                    }
                }
            }
        }

        if(loop_num == HR_times && rk_step == RK_STEP)
            comput_time = YES;

        /// printf("before fix_unphysical_st() in limiter\n");

        if(loop_num == HR_times)
        {
            for(i = 0; i < N_row; i++)
            {
                if(i == N_row-1) N = N_use;
                else N = N_alloc;
                for(j = 0; j < N; j++)
                {
                    if(row_limit_tris[i][j]->redo_limiting == YES)
                         fix_unphysical_st(row_limit_tris[i][j],midsoln,0,fr);
                }
            }
        }

        for(i = 0; i < N_row; i++)
        {
            if(i == N_row-1) N = N_use;
            else N = N_alloc;
            for(j = 0; j < N; j++)
            {
                update_coef(row_limit_tris[i][j],midsoln,rk_step,fr,fr->sizest,comput_time);
                update_tri_CVs(row_limit_tris[i][j],midsoln,limit_store,rk_step);
            }
        }

        if(loop_num < HR_times)
        {
            update_buffer_of_test_problems(fr,midsoln,rk_step,limit_store,row_limit_tris,N_alloc,N_row,N_use);
            goto redo_HR;
        }

        for(i = 0; i < N_row; i++)
            free(row_limit_tris[i]);

        ////////////////////// TMP
        debug_flag = NO;
}


//New Huijing
LOCAL void limiting_1st_degreeP1_MHD(
         TRI       *tri,
         TRI       *sten_tris[][3],
         int       N_sten,
         Mid_soln  *midsoln,
         int       rk_iter,
         Front     *fr)
{
         Locstate st, nbst[3], st2, nbst2[3], tmpst[2];
         float    uave[8], nbuave[20][2][8]; /// nbuave[#stencil][2 cells][#eqn]
         float    Rave[8], nbRave[20][2][8];
         float    Lave[8], nbLave[20][2][8];
         int      i, j, dim = 2, indx, k, is_bad_sten[8] = {NO, NO, NO, NO, NO,NO,NO,NO};
         int      cv_indx, num_CVs;
         int      side, tmp_side;
         double    *cent, *nbcent[20];
         float    rside[3], rside2[3], least_soln1[3];
         float    coef[20][2];
         float    u0, u1, u2, avg1, avg2, w[19], arrya[19], arryb[19];
         double   **Lmass_matrix = tri->Lmass_matrix;
         float    eps = 0.7; // 0.5, 0.1: over-smooth???, 0.7 for all cent
         // float    c_num[5], diam;
         // float    A[3][2][2];
         double   *c_num, diam, ***A, sqrt_area;
         float    A_edge[3][2], mid[3][2], sv_coef[20];
         int      debug = NO, nn_num;
         TRI      *tm_tris[20]; // *sten_tris[10][3]; /// sten_tris[stencil #][tri indx]

         ///// from one-sided (6)
         /****
         for(side = 0; side < 3; side++)
         {
             sten_tris[N_sten][0] = tri; 
             sten_tris[N_sten][1] = Tri_on_side(tri,side); 

             for(tmp_side = 0; tmp_side < 3; tmp_side++)
             {
                 if(tri == Tri_on_side(sten_tris[N_sten][1],tmp_side))
                     break;
             }
             sten_tris[N_sten][2] = Tri_on_side(sten_tris[N_sten][1],(tmp_side+1)%3);
             N_sten++;

             /// next one on the same side
             sten_tris[N_sten][0] = tri;
             sten_tris[N_sten][1] = Tri_on_side(tri,side);
             sten_tris[N_sten][2] = Tri_on_side(sten_tris[N_sten-1][1],(tmp_side+2)%3);
             N_sten++;
         }

         ///// from reverse-sided (3)
         for(side = 0; side < 3; side++)
         {
             get_reverse_sten_P1(tri, side, tm_tris, &nn_num);
             sten_tris[N_sten][0] = tm_tris[0];
             sten_tris[N_sten][1] = tm_tris[1];
             sten_tris[N_sten][2] = tm_tris[2];
             N_sten++;
         }
         ***/

         if(NULL == midsoln)
             st = tri->st;
         else
         {
             if(rk_iter == RK_STEP)
                 st = tri->st;
             else
                 st = midsoln[tri->id].st[rk_iter];
         }

         st2 = midsoln[tri->id].st[0];
         diam = fg_diam(tri);
         cent = fg_centroid(tri);
         sqrt_area = sqrt(fg_area(tri));

         Set_params(st2,st);
         set_type_of_state(st2,state_type(st));

         uave[0] = Dens(st);
         for(i = 0; i < 3; i++)
             uave[i+1] = Mom(st)[i];
         uave[4] = Energy(st);
         for(i = 0; i < 3; i++)
             uave[i+5] = Mag(st)[i];
 
         ///// get average of states from stencil cells
         for(i = 0; i < N_sten; i++)
         {
             if(NULL == midsoln)
             {
                 tmpst[0] = sten_tris[i][1]->st;
                 tmpst[1] = sten_tris[i][2]->st;
             }
             else
             {
                 if(rk_iter == RK_STEP)
                 {
                     tmpst[0] = sten_tris[i][1]->st;
                     tmpst[1] = sten_tris[i][2]->st;
                 }
                 else
                 {
                     tmpst[0] = midsoln[sten_tris[i][1]->id].st[rk_iter];
                     tmpst[1] = midsoln[sten_tris[i][2]->id].st[rk_iter];
                 }
             }

             for(j = 0; j < 2; j++)
             {
                 nbuave[i][j][0] = Dens(tmpst[j]);
                 for(k = 0; k < 3; k++)
                     nbuave[i][j][k+1] = Mom(tmpst[j])[k];
                 nbuave[i][j][4] = Energy(tmpst[j]);
                 for(k = 0; k < 3; k++)
                     nbuave[i][j][k+5] = Mag(tmpst[j])[k];
             }
         }

         NEW_extrema_detec_on_WENO_sten(uave, nbuave, N_sten, is_bad_sten);

         if(alltri_HR_sten_2[tri->id].HR_sten_set_1st == NO)
         {
             tri_array(&(A),N_sten,2,2,sizeof(double));
             vector(&(c_num),N_sten,sizeof(double));
             for(i = 0; i < N_sten; i++)
             {
                 A[i][0][0] = (fg_centroid(sten_tris[i][1])[0]-cent[0])/sqrt_area;
                 A[i][0][1] = (fg_centroid(sten_tris[i][1])[1]-cent[1])/sqrt_area;
                 A[i][1][0] = (fg_centroid(sten_tris[i][2])[0]-cent[0])/sqrt_area;
                 A[i][1][1] = (fg_centroid(sten_tris[i][2])[1]-cent[1])/sqrt_area;
                 c_num[i] = cond_num2(A,i); // c_num[i] =1.0;
             }
             alltri_HR_sten_2[tri->id].HR_sten_set_1st = YES;
             alltri_HR_sten_2[tri->id].c_num = c_num;
             alltri_HR_sten_2[tri->id].A = A;
         }
         else
         {
             A = alltri_HR_sten_2[tri->id].A;
             c_num = alltri_HR_sten_2[tri->id].c_num;
         }

         for(k = 0; k < N_EQN; k++)
         {
             // if(k == 5 || k == 6) continue; // mag[0] and mag[1] do not need limiting.

             // linear part of polynomial
             for(i = 0; i < N_sten; i++)
             {
                 rside[0] = nbuave[i][0][k] - uave[k];
                 rside[1] = nbuave[i][1][k] - uave[k];
                 // comp_coef(A[i],rside,coef[i]);
                 comp_coef2(A,rside,coef[i],i);
             }
             for(i = 0; i < N_sten; i++)
             {
                 arrya[i] = coef[i][0];
                 arryb[i] = coef[i][1];
             }

             // if(debugging("weno_w"))
             {
                 // WENO_mod_1(arrya, arryb, c_num, num_CVs, w);
                 // WENO_mod_1(arrya, arryb, NULL, num_CVs, w);
                 WENO_mod_1_sqr_weight(arrya, arryb, c_num, N_sten, w);
                 u1 = u2 = 0.0;
                 for(i = 0; i < N_sten; i++)
                 {
                     u1 += w[i]*coef[i][0];
                     u2 += w[i]*coef[i][1];
                 } 

                 if(is_bad_sten[k] == YES)
                 {
                   u1 = u2 = 0.0;
                 }
             } 
             u0 = uave[k];
             //printf("eqn[%d] u0=%g u1=%g u2=%g\n",k,u0,u1,u2);
             switch(k)
             {
             case 0:
                 dg_Dens(st2)[0] = u0;
                 dg_Dens(st2)[1] = u1;
                 dg_Dens(st2)[2] = u2;
             break;
             case 1:
                 dg_Mom(st2)[0][0] = u0;
                 dg_Mom(st2)[0][1] = u1;
                 dg_Mom(st2)[0][2] = u2;
             break;
             case 2:
                 dg_Mom(st2)[1][0] = u0;
                 dg_Mom(st2)[1][1] = u1;
                 dg_Mom(st2)[1][2] = u2;
             break;
             case 3:
                 dg_Mom(st2)[2][0] = u0;
                 dg_Mom(st2)[2][1] = u1;
                 dg_Mom(st2)[2][2] = u2;
             break;
             case 4:
                 dg_Energy(st2)[0] = u0;
                 dg_Energy(st2)[1] = u1;
                 dg_Energy(st2)[2] = u2;
             break;
             case 5:
                 dg_B(st2)[0][0] = u0;
                 dg_B(st2)[0][1] = u1;
                 dg_B(st2)[0][2] = u2;
             break;
             case 6:
                 dg_B(st2)[1][0] = u0;
                 dg_B(st2)[1][1] = u1;
                 dg_B(st2)[1][2] = u2;
             break;
             case 7:
                 dg_B(st2)[2][0] = u0;
                 dg_B(st2)[2][1] = u1;
                 dg_B(st2)[2][2] = u2;
             break;
             default:
                 printf("ERROR: limiting_1st_degreeP1_MHD(), implement case %d\n", k);
                 clean_up(ERROR);
             break;
             }
         }

         Dens(st2) = Dens(st);
         Energy(st2) = Energy(st);
         for(i = 0; i < 3; i++)
             Mom(st2)[i] = Mom(st)[i];
         for(i = 0; i < 3; i++)
             Mag(st2)[i] = Mag(st)[i];

         ///// Re-enforce divergence-free property
         /*** OLD working implementation before 01-28-2015
         if(dg_B(st2)[0][1] >= 0.0)
             dg_B(st2)[0][1] = min(fabs(dg_B(st2)[0][1]), fabs(dg_B(st2)[1][2]));
         else 
             dg_B(st2)[0][1] = -min(fabs(dg_B(st2)[0][1]), fabs(dg_B(st2)[1][2]));
         dg_B(st2)[1][2] = -dg_B(st2)[0][1];
         ***/
         /*New Re-enforce divergence-free property: 01-28-2015*/
         if(fabs(dg_B(st2)[0][1]) <= fabs(dg_B(st2)[1][2]))
         {
             dg_B(st2)[1][2] = -dg_B(st2)[0][1];
         }
         else
         {
             dg_B(st2)[0][1] = -dg_B(st2)[1][2];
         }

         if((N_EQN == 4 || N_EQN == 8) && YES == unphysical_st_at_quadrature(tri, st2))
         {
             tri->redo_limiting = YES;
             // fix_unphysical_st(tri,midsoln,rk_iter,fr);
             // printf("Tri(%d) limiting in limiting_1st_degreeP1_MHD(), detect unphysical_state at rk stage %d\n",
             //       tri->id, rk_iter);
         }
         else
             tri->redo_limiting = NO;

         // if(tri->id == 1044)
         //     printf("Tri(%d) limiting in limiting_1st_degreeP1_MHD(), fix unphysical_state = %d\n",
         //           tri->id, tri->redo_limiting);
}


LOCAL void NEW_extrema_detec_on_WENO_sten(
         float     *uave,
         float     nbuave[][2][8],
         int       N,
         int       *is_bad)
{
         int      i, j;
         float    umax[N_EQN], umin[N_EQN];

         for(i = 0; i < N_EQN; i++)
         {
             umax[i] = umin[i] = uave[i];
             is_bad[i] = NO;  
         }

         for(j = 0; j < N; j++)
         {
             for(i = 0; i < N_EQN; i++)
             { 
                 if(nbuave[j][0][i] > umax[i])
                     umax[i] = nbuave[j][0][i];
                 if(nbuave[j][0][i] < umin[i])
                     umin[i] = nbuave[j][0][i];

                 if(nbuave[j][1][i] > umax[i])
                     umax[i] = nbuave[j][1][i];
                 if(nbuave[j][1][i] < umin[i])
                     umin[i] = nbuave[j][1][i];
             } 
         }
    
         for(i = 0; i < N_EQN; i++)
         {
             if(uave[i] >= umax[i] || uave[i] <= umin[i])
                 is_bad[i] = YES;
         }
}


LOCAL void limiting_1st_degreeP1_MHD_FV(
         TRI       *tri,
         TRI       *nbtri[3],
         Mid_soln  *midsoln,
         int       rk_iter,
         Front     *fr)
{
         Locstate st, nbst[3], st2, nbst2[3], tmpst[2];
         float    uave[8], nbuave[20][2][8]; /// nbuave[#stencil][2 cells][#eqn]
         float    Rave[8], nbRave[20][2][8];
         float    Lave[8], nbLave[20][2][8];
         int      i, j, dim = 2, indx, k, is_bad_sten[8] = {NO, NO, NO, NO, NO,NO,NO,NO};
         int      cv_indx, num_CVs;
         int      side, tmp_side;
         double    *cent, *nbcent[20];
         float    rside[3], rside2[3], least_soln1[3];
         float    coef[20][2];
         float    u0, u1, u2, avg1, avg2, w[19], arrya[19], arryb[19];
         double   **Lmass_matrix = tri->Lmass_matrix;
         float    eps = 0.7; // 0.5, 0.1: over-smooth???, 0.7 for all cent
         // float    c_num[5], diam;
         // float    A[3][2][2];
         double   *c_num, diam, ***A;
         float    A_edge[3][2], mid[3][2], sv_coef[20];
         int      debug = NO, N_sten = 0, nn_num;
         TRI      *tm_tris[20], *sten_tris[10][3]; /// sten_tris[stencil #][tri indx]

         ///// from one-sided
         for(side = 0; side < 3; side++)
         {
             sten_tris[N_sten][0] = tri; 
             sten_tris[N_sten][1] = Tri_on_side(tri,side); 

             for(tmp_side = 0; tmp_side < 3; tmp_side++)
             {
                 if(tri == Tri_on_side(sten_tris[N_sten][1],tmp_side))
                     break;
             }
             sten_tris[N_sten][2] = Tri_on_side(sten_tris[N_sten][1],(tmp_side+1)%3);
             N_sten++;

             /// next one on the same side
             sten_tris[N_sten][0] = tri;
             sten_tris[N_sten][1] = Tri_on_side(tri,side);
             sten_tris[N_sten][2] = Tri_on_side(sten_tris[N_sten-1][1],(tmp_side+2)%3);
             N_sten++;
         }

         ///// from reverse-sided
         for(side = 0; side < 3; side++)
         {
             get_reverse_sten_P1(tri, side, tm_tris, &nn_num);
             sten_tris[N_sten][0] = tm_tris[0];
             sten_tris[N_sten][1] = tm_tris[1];
             sten_tris[N_sten][2] = tm_tris[2];
             N_sten++;
         }

         if(NULL == midsoln)
         {
             st = tri->st;
             for(i = 0; i < 3; i++)
                 nbst[i] = nbtri[i]->st;
         }
         else
         {
             if(rk_iter == RK_STEP)
             {
                 st = tri->st;
                 // for(i = 0; i < 3; i++)
                 //     nbst[i] = nbtri[i]->st;
             }
             else
             {
                 st = midsoln[tri->id].st[rk_iter];
                 // for(i = 0; i < 3; i++)
                 //     nbst[i] = midsoln[nbtri[i]->id].st[rk_iter];
             }
         }

         st2 = midsoln[tri->id].st[0];
         diam = fg_diam(tri);
         cent = fg_centroid(tri);

         Set_params(st2,st);
         set_type_of_state(st2,state_type(st));

         uave[0] = Dens(st);
         for(i = 0; i < 3; i++)
             uave[i+1] = Mom(st)[i];
         uave[4] = Energy(st);
         uave[7] = Mag(st)[2];
 
         ///// get average of states from stencil cells
         for(i = 0; i < N_sten; i++)
         {
             if(NULL == midsoln)
             {
                 tmpst[0] = sten_tris[i][1]->st;
                 tmpst[1] = sten_tris[i][2]->st;
             }
             else
             {
                 if(rk_iter == RK_STEP)
                 {
                     tmpst[0] = sten_tris[i][1]->st;
                     tmpst[1] = sten_tris[i][2]->st;
                 }
                 else
                 {
                     tmpst[0] = midsoln[sten_tris[i][1]->id].st[rk_iter];
                     tmpst[1] = midsoln[sten_tris[i][2]->id].st[rk_iter];
                 }
             }

             for(j = 0; j < 2; j++)
             {
                 nbuave[i][j][0] = Dens(tmpst[j]);
                 for(k = 0; k < 3; k++)
                     nbuave[i][j][k+1] = Mom(tmpst[j])[k];
                 nbuave[i][j][4] = Energy(tmpst[j]);
                 nbuave[i][j][7] = Mag(tmpst[j])[2];
             }
         }

         if(alltri_HR_sten_2[tri->id].HR_sten_set_1st == NO)
         {
             tri_array(&(A),N_sten,2,2,sizeof(double));
             vector(&(c_num),N_sten,sizeof(double));
             for(i = 0; i < N_sten; i++)
             {
                 A[i][0][0] = (fg_centroid(sten_tris[i][1])[0]-cent[0]);
                 A[i][0][1] = (fg_centroid(sten_tris[i][1])[1]-cent[1]);
                 A[i][1][0] = (fg_centroid(sten_tris[i][2])[0]-cent[0]);
                 A[i][1][1] = (fg_centroid(sten_tris[i][2])[1]-cent[1]);
                 c_num[i] = cond_num2(A,i); // c_num[i] =1.0;
             }
             alltri_HR_sten_2[tri->id].HR_sten_set_1st = YES;
             alltri_HR_sten_2[tri->id].c_num = c_num;
             alltri_HR_sten_2[tri->id].A = A;
         }
         else
         {
             A = alltri_HR_sten_2[tri->id].A;
             c_num = alltri_HR_sten_2[tri->id].c_num;
         }

         for(k = 0; k < N_EQN; k++)
         {
             if(k == 5 || k == 6) continue; // mag[0] and mag[1] do not need limiting.

             // linear part of polynomial
             for(i = 0; i < N_sten; i++)
             {
                 rside[0] = nbuave[i][0][k] - uave[k];
                 rside[1] = nbuave[i][1][k] - uave[k];
                 // comp_coef(A[i],rside,coef[i]);
                 comp_coef2(A,rside,coef[i],i);
             }
             for(i = 0; i < N_sten; i++)
             {
                 arrya[i] = coef[i][0];
                 arryb[i] = coef[i][1];
             }

             // if(debugging("weno_w"))
             {
                 // WENO_mod_1(arrya, arryb, c_num, num_CVs, w);
                 // WENO_mod_1(arrya, arryb, NULL, num_CVs, w);
                 WENO_mod_1_sqr_weight(arrya, arryb, c_num, N_sten, w);
                 u1 = u2 = 0.0;
                 for(i = 0; i < N_sten; i++)
                 {
                     u1 += w[i]*coef[i][0];
                     u2 += w[i]*coef[i][1];
                 } 
                 // if(is_bad_sten[k] == YES)
                 {
                   //   u1 = u2 = 0.0;
                 }
             } 
             u0 = uave[k];
             switch(k)
             {
             case 0:
                 dg_Dens(st2)[0] = u0;
                 dg_Dens(st2)[1] = u1;
                 dg_Dens(st2)[2] = u2;
             break;
             case 1:
                 dg_Mom(st2)[0][0] = u0;
                 dg_Mom(st2)[0][1] = u1;
                 dg_Mom(st2)[0][2] = u2;
             break;
             case 2:
                 dg_Mom(st2)[1][0] = u0;
                 dg_Mom(st2)[1][1] = u1;
                 dg_Mom(st2)[1][2] = u2;
             break;
             case 3:
                 dg_Mom(st2)[2][0] = u0;
                 dg_Mom(st2)[2][1] = u1;
                 dg_Mom(st2)[2][2] = u2;
             break;
             case 4:
                 dg_Energy(st2)[0] = u0;
                 dg_Energy(st2)[1] = u1;
                 dg_Energy(st2)[2] = u2;
             break;
             case 7:
                 dg_B(st2)[2][0] = u0;
                 dg_B(st2)[2][1] = u1;
                 dg_B(st2)[2][2] = u2;
             break;
             default:
                 printf("ERROR: Subcell_limiting_1st_degreeP1_MHD_FV(), implement case %d\n", k);
                 clean_up(ERROR);
             break;
             }
         }

         for(i = 0; i < 2; i++)
         {
             for(k = 0; k < MAX_N_COEF; k++)
                 dg_B(st2)[i][k] = dg_B(st)[i][k];
             Mag(st2)[i] = Mag(st)[i];
         }

         Dens(st2) = Dens(st);
         Energy(st2) = Energy(st);
         for(i = 0; i < 3; i++)
             Mom(st2)[i] = Mom(st)[i];
         Mag(st2)[2] = Mag(st)[2];

         if((N_EQN == 4 || N_EQN == 8) && YES == unphysical_st_at_quadrature(tri, st2))
         {
             tri->redo_limiting = YES;
             // fix_unphysical_st(tri,midsoln,rk_iter,fr);
         }
         else
             tri->redo_limiting = NO;

}

LOCAL void Subcell_limiting_1st_degreeP1_MHD_FV(
         TRI       *tri,
         TRI       *nbtri[3],
         Mid_soln  *midsoln,
         int       rk_iter,
         Front     *fr)
{
         Locstate st, nbst[3], st2, nbst2[3], tmpst;
         float    uave[8], nbuave[20][8]; /// nbuave[#cv][#eqn]
         float    Rave[8], nbRave[20][8];
         float    Lave[8], nbLave[20][8];
         int      i, dim = 2, indx, k, is_bad_sten[8] = {NO, NO, NO, NO, NO,NO,NO,NO};
         int      cv_indx, num_CVs;
         int      on_SV_side, on_SV_side2, nbcv_indx[19], nbcv_side[19];
         double    *cent, *nbcent[20];
         float    rside[3], rside2[3], least_soln1[3];
         float    coef[20][2];
         float    u0, u1, u2, avg1, avg2, w[19], arrya[19], arryb[19];
         double   **Lmass_matrix = tri->Lmass_matrix;
         float    eps = 0.7; // 0.5, 0.1: over-smooth???, 0.7 for all cent
         // float    c_num[5], diam;
         // float    A[3][2][2];
         double   *c_num, diam, ***A;
         float    A_edge[3][2], mid[3][2], sv_coef[20];
         int      debug = NO;
         TRI      *tris[20];

         if(NULL == midsoln)
         {
             st = tri->st;
             for(i = 0; i < 3; i++)
                 nbst[i] = nbtri[i]->st;
         }
         else
         {
             if(rk_iter == RK_STEP)
             {
                 st = tri->st;
                 for(i = 0; i < 3; i++)
                     nbst[i] = nbtri[i]->st;
             }
             else
             {
                 st = midsoln[tri->id].st[rk_iter];
                 for(i = 0; i < 3; i++)
                     nbst[i] = midsoln[nbtri[i]->id].st[rk_iter];
             }
         }

         st2 = midsoln[tri->id].st[0];
         diam = fg_diam(tri);
         cent = fg_centroid(tri);

         Set_params(st2,st);
         set_type_of_state(st2,state_type(st));

         //// TMP
         // assign(st2, st, Params(st)->sizest);
         // return;
         //// END: TMP

         /****
         for(i=0; i < MAX_N_COEF; i++) 
         {
             dg_B(st2)[0][i] =dg_B(st)[0][i];
             dg_B(st2)[1][i] =dg_B(st)[1][i];
             Mag(st2)[0] = Mag(st)[0];
             Mag(st2)[1] = Mag(st)[1];
         }
         ****/

         num_CVs = get_tri_near_neighbr_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_edge_vert_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);

         for(i = 0; i < num_CVs; i++)
             nbcent[i] = tris[i]->CVcent[nbcv_indx[i]];

         uave[0] = Dens(st);
         for(i = 0; i < 3; i++)
             uave[i+1] = Mom(st)[i];
         uave[4] = Energy(st);
         uave[7] = Mag(st)[2];

         for(i = 0; i < num_CVs; i++)
         {
             // if(rk_iter == RK_STEP)
             //     tmpst = tris[i]->st;
             // else
             //     tmpst = midsoln[tris[i]->id].st[rk_iter];
             if(NULL == midsoln)
                 tmpst = tris[i]->st;
             else
             {
                 if(rk_iter == RK_STEP)
                     tmpst = tris[i]->st;
                 else
                     tmpst = midsoln[tris[i]->id].st[rk_iter];

             }
             avg_st_on_cv_ver3_MHD(tris[i], nbcv_indx[i], tmpst, nbuave[i]);
         }

         NEW_extrema_detec(uave,nbuave,num_CVs,is_bad_sten);

         if(alltri_HR_sten_2[tri->id].HR_sten_set_1st == NO)
         {
             tri_array(&(A),num_CVs,2,2,sizeof(double));
             vector(&(c_num),num_CVs,sizeof(double));
             for(i = 0; i < num_CVs; i++)
             {
                 A[i][0][0] = (nbcent[i][0]-cent[0]);
                 A[i][0][1] = (nbcent[i][1]-cent[1]);
                 A[i][1][0] = (nbcent[(i+1)%num_CVs][0]-cent[0]);
                 A[i][1][1] = (nbcent[(i+1)%num_CVs][1]-cent[1]);
                 c_num[i] = cond_num2(A,i); // c_num[i] =1.0;
             }
             alltri_HR_sten_2[tri->id].HR_sten_set_1st = YES;
             alltri_HR_sten_2[tri->id].c_num = c_num;
             alltri_HR_sten_2[tri->id].A = A;
         }
         else
         {
             A = alltri_HR_sten_2[tri->id].A;
             c_num = alltri_HR_sten_2[tri->id].c_num;
         }

         for(k = 0; k < N_EQN; k++)
         {
             if(k == 5 || k == 6) continue; // mag[0] and mag[1] do not need limiting.

             // linear part of polynomial
             for(i = 0; i < num_CVs; i++)
             {
                 rside[0] = nbuave[i][k] - uave[k];
                 rside[1] = nbuave[(i+1)%num_CVs][k] - uave[k];
                 // comp_coef(A[i],rside,coef[i]);
                 comp_coef2(A,rside,coef[i],i);
             }
             for(i = 0; i < num_CVs; i++)
             {
                 arrya[i] = coef[i][0];
                 arryb[i] = coef[i][1];
             }

             if(debugging("weno_w"))
             {
                 // WENO_mod_1(arrya, arryb, c_num, num_CVs, w);
                 // WENO_mod_1(arrya, arryb, NULL, num_CVs, w);
                 WENO_mod_1_sqr_weight(arrya, arryb, c_num, num_CVs, w);
                 u1 = u2 = 0.0;
                 for(i = 0; i < num_CVs; i++)
                 {
                     u1 += w[i]*coef[i][0];
                     u2 += w[i]*coef[i][1];
                 }
                 if(is_bad_sten[k] == YES)
                 {
                     u1 = u2 = 0.0;
                 }
             }

             u0 = uave[k];
             switch(k)
             {
             case 0:
                 dg_Dens(st2)[0] = u0;
                 dg_Dens(st2)[1] = u1;
                 dg_Dens(st2)[2] = u2;
             break;
             case 1:
                 dg_Mom(st2)[0][0] = u0;
                 dg_Mom(st2)[0][1] = u1;
                 dg_Mom(st2)[0][2] = u2;
             break;
             case 2:
                 dg_Mom(st2)[1][0] = u0;
                 dg_Mom(st2)[1][1] = u1;
                 dg_Mom(st2)[1][2] = u2;
             break;
             case 3:
                 dg_Mom(st2)[2][0] = u0;
                 dg_Mom(st2)[2][1] = u1;
                 dg_Mom(st2)[2][2] = u2;
             break;
             case 4:
                 dg_Energy(st2)[0] = u0;
                 dg_Energy(st2)[1] = u1;
                 dg_Energy(st2)[2] = u2;
             break;
             case 7:
                 dg_B(st2)[2][0] = u0;
                 dg_B(st2)[2][1] = u1;
                 dg_B(st2)[2][2] = u2;
             break;
             default:
                 printf("ERROR: Subcell_limiting_1st_degreeP1_MHD_FV(), implement case %d\n", k);
                 clean_up(ERROR);
             break;
             }
         }

         for(i = 0; i < 2; i++) 
         {
             for(k = 0; k < MAX_N_COEF; k++)
                 dg_B(st2)[i][k] = dg_B(st)[i][k];
             Mag(st2)[i] = Mag(st)[i];
         }

         Dens(st2) = Dens(st);
         Energy(st2) = Energy(st);
         for(i = 0; i < 3; i++)
             Mom(st2)[i] = Mom(st)[i];
         Mag(st2)[2] = Mag(st)[2];

         if((N_EQN == 4 || N_EQN == 8) && YES == unphysical_st_at_quadrature(tri, st2))
         {
             tri->redo_limiting = YES;
             // fix_unphysical_st(tri,midsoln,rk_iter,fr);
         }
         else
             tri->redo_limiting = NO;
}


EXPORT void limiting_P1_MHD(
         TRI       *tri,
         TRI       *sten_tri[],
         int        sten_num,
         Mid_soln  *midsoln,
         int       rk_iter,
         Front     *fr)
{
         Locstate st, nbst[30], st2, nbst2[3], tmpst;
         float    uave[8], nbuave[30][8]; /// nbuave[#cv][#eqn]
         float    Rave[8], nbRave[30][8];
         float    Lave[8], nbLave[30][8];
         int      i, j, dim = 2, indx, k, is_bad_sten[30];
         int      cv_indx;
         int      on_SV_side, on_SV_side2, nbcv_indx[30], nbcv_side[30];
         double    *cent, *nbcent[30];
         float    rside[3], rside2[3], least_soln1[3];
         float    coef[30][2];
         float    u0, u1, u2, avg1, avg2, w[30], arrya[30], arryb[30];
         double   **Lmass_matrix = tri->Lmass_matrix;
         float    eps = 0.7; // 0.5, 0.1: over-smooth???, 0.7 for all cent
         // float    c_num[5], diam;
         // float    A[3][2][2];
         static double   *c_num, ***A = NULL, **loc_mass_1st_row, **cent_con_u;
         float    diam;
         int      debug = NO, side;
         TRI      *edge_tri[3];

         if(A == NULL)
         {
             tri_array(&(A),30,2,2,sizeof(double));
             // matrix(&loc_mass_1st_row, 1, MAX_N_COEF,sizeof(double));
             vector(&(c_num),30,sizeof(double));
             matrix(&cent_con_u, 8, MAX_N_COEF,sizeof(double));
         }

         for(side = 0; side < 3; side++)
             edge_tri[side] = Tri_on_side(tri,side);
         init_tri_comput_P1_polynomials_from_avg_MHD(tri,edge_tri,3, midsoln, rk_iter, cent_con_u);
         for(i = 0; i < MAX_N_COEF; i++)
         {
             u0 = cent_con_u[5][i];
             cent_con_u[5][i] = cent_con_u[7][i];
             cent_con_u[7][i] = u0;
         }

         if(NULL == midsoln)
         {
             st = tri->st;
             for(i = 0; i < sten_num; i++)
                 nbst[i] = sten_tri[i]->st;
         }
         else
         {
             if(rk_iter == RK_STEP)
             {
                 st = tri->st;
                 for(i = 0; i < sten_num; i++)
                     nbst[i] = sten_tri[i]->st;
             }
             else
             {
                 st = midsoln[tri->id].st[rk_iter];
                 for(i = 0; i < sten_num; i++)
                     nbst[i] = midsoln[sten_tri[i]->id].st[rk_iter];
             }
         }

         if(rk_iter == RK_STEP)
             st2 = tri->st;
         else
             st2 = midsoln[tri->id].st[rk_iter];

         diam = fg_diam(tri);
         cent = fg_centroid(tri);

         for(i = 0; i < sten_num; i++)
             nbcent[i] = fg_centroid(sten_tri[i]);

         uave[0] = Dens(st);
         for(i = 0; i < 3; i++)
             uave[i+1] = Mom(st)[i];
         uave[4] = Energy(st);
         uave[7] = Mag(st)[2];

         for(i = 0; i < sten_num; i++)
         {
             if(NULL == midsoln)
                 tmpst = sten_tri[i]->st;
             else
             {
                 if(rk_iter == RK_STEP)
                     tmpst = sten_tri[i]->st;
                 else
                     tmpst = midsoln[sten_tri[i]->id].st[rk_iter];

             }
             nbuave[i][0] = Dens(tmpst);
             for(j = 0; j < 3; j++)
                 nbuave[i][j+1] = Mom(tmpst)[j];
             nbuave[i][4] = Energy(tmpst);
             nbuave[i][7] = Mag(tmpst)[2];

             A[i][0][0] = (nbcent[i][0]-cent[0]);
             A[i][0][1] = (nbcent[i][1]-cent[1]);
             A[i][1][0] = (nbcent[(i+1)%sten_num][0]-cent[0]);
             A[i][1][1] = (nbcent[(i+1)%sten_num][1]-cent[1]);
             c_num[i] = cond_num2(A,i); // c_num[i] =1.0;
         }

         NEW_extrema_detec(uave,nbuave,sten_num,is_bad_sten);

         for(k = 0; k < N_EQN; k++)
         {
             if(k == 5 || k == 6) continue; // mag[0] and mag[1] do not need limiting.
             // linear part of polynomial
             for(i = 0; i < sten_num; i++)
             {
                 rside[0] = nbuave[i][k] - uave[k];
                 rside[1] = nbuave[(i+1)%sten_num][k] - uave[k];
                 // comp_coef(A[i],rside,coef[i]);
                 comp_coef2(A,rside,coef[i],i);
             }
             for(i = 0; i < sten_num; i++)
             {
                 arrya[i] = coef[i][0];
                 arryb[i] = coef[i][1];
             }

             if(debugging("weno_w"))
             {
                 WENO_mod_1(arrya, arryb, c_num, sten_num, w);
                 // WENO_mod_1_sqr_weight(arrya, arryb, c_num, sten_num, w);
                 u1 = u2 = 0.0;
                 for(i = 0; i < sten_num; i++)
                 {
                     u1 += w[i]*coef[i][0];
                     u2 += w[i]*coef[i][1];
                 }
                 if(is_bad_sten[k] == YES)
                 {
                     u1 = u2 = 0.0;
                 }
             }
             else if(debugging("MC_limiting"))
             {
                /** MC limiter
                u1 = minmod(arrya[0], arrya[1]); 
                u2 = minmod(arryb[0], arryb[1]); 
                for(i = 2; i < sten_num; i++)
                {
                    u1 = minmod(u1, arrya[i]);
                    u2 = minmod(u2, arryb[i]);
                }
                u1 = minmod((2.0*u1), cent_con_u[k][1]);
                u2 = minmod((2.0*u2), cent_con_u[k][2]);
                **/
                /// Test only use minimod limiter
                u1 = minmod(arrya[0], arrya[1]); 
                u2 = minmod(arryb[0], arryb[1]); 
                for(i = 2; i < sten_num; i++)
                {
                    u1 = minmod(u1, arrya[i]);
                    u2 = minmod(u2, arryb[i]);
                }
                //// DO NOT USE central stencil
                // u1 = minmod((u1), cent_con_u[k][1]);
                // u2 = minmod((u2), cent_con_u[k][2]);
             }

             /***
             u1 = u2 = 0.0;
             for(i = 0; i < sten_num; i++)
             {
                 u1 += arrya[i];
                 u2 += arryb[i];
             }
             u1 /= sten_num; u2 /= sten_num;
             u1 = biased_min_mod(arrya,sten_num,u1); 
             u2 = biased_min_mod(arryb,sten_num,u2); 
             ****/

             u0 = uave[k];
             switch(k)
             {
             case 0:
                 dg_Dens(st2)[0] = u0;
                 dg_Dens(st2)[1] = u1;
                 dg_Dens(st2)[2] = u2;
             break;
             case 1:
                 dg_Mom(st2)[0][0] = u0;
                 dg_Mom(st2)[0][1] = u1;
                 dg_Mom(st2)[0][2] = u2;
             break;
             case 2:
                 dg_Mom(st2)[1][0] = u0;
                 dg_Mom(st2)[1][1] = u1;
                 dg_Mom(st2)[1][2] = u2;
             break;
             case 3:
                 dg_Mom(st2)[2][0] = u0;
                 dg_Mom(st2)[2][1] = u1;
                 dg_Mom(st2)[2][2] = u2;
             break;
             case 4:
                 dg_Energy(st2)[0] = u0;
                 dg_Energy(st2)[1] = u1;
                 dg_Energy(st2)[2] = u2;
             break;
             case 7:
                 dg_B(st2)[2][0] = u0;
                 dg_B(st2)[2][1] = u1;
                 dg_B(st2)[2][2] = u2;
             break;
             default:
                 printf("ERROR: limiting_P1_MHD(), implement case %d\n", k);
                 clean_up(ERROR);
             break;
             }
         }

         for(i = 0; i < 2; i++)
         {
             for(k = 0; k < MAX_N_COEF; k++)
                 dg_B(st2)[i][k] = dg_B(st)[i][k];
             Mag(st2)[i] = Mag(st)[i];
         }

         Dens(st2) = Dens(st);
         Energy(st2) = Energy(st);
         for(i = 0; i < 3; i++)
             Mom(st2)[i] = Mom(st)[i];
         Mag(st2)[2] = Mag(st)[2];

         if((N_EQN == 4 || N_EQN == 8) && YES == unphysical_st_at_quadrature(tri, st2))
         {
             tri->redo_limiting = YES;
             fix_unphysical_st(tri,midsoln,rk_iter,fr);
         }
         else
             tri->redo_limiting = NO;
}

LOCAL void limiting_1st_degreeP1(
         TRI       *tri,
         TRI       *nbtri[3],
         Mid_soln  *midsoln,
         int       rk_iter)
{
         Locstate st, nbst[3], st2, nbst2[3];
         float    uave[4], nbuave[3][4];
         float    Rave[4], nbRave[3][4];
         float    Lave[4], nbLave[3][4];
         int      i, dim = 2, indx, k, is_bad_sten[4] = {NO, NO, NO, NO};
         int      cv_indx, num_CVs;
         int      on_SV_side, on_SV_side2, nbcv_indx[9], nbcv_side[9];
         double    *cent, *nbcent[3];
         float    rside[3], rside2[3], least_soln1[3];
         float    coef[3][2];
         float    u0, u1, u2, avg1, avg2, w[9], arrya[9], arryb[9];
         double   **Lmass_matrix = tri->Lmass_matrix;
         float    eps = 0.7; // 0.5, 0.1: over-smooth???, 0.7 for all cent
         // float    c_num[5], diam;
         // float    A[3][2][2];
         double   *c_num, diam, ***A;
         float    A_edge[3][2], mid[3][2], sv_coef[6];
         int      debug = NO;
         TRI      *tris[9];

         if(rk_iter == RK_STEP)
         {
             st = tri->st;
             nbst[0] = nbtri[0]->st;
             nbst[1] = nbtri[1]->st;
             nbst[2] = nbtri[2]->st;
         }
         else
         {
             st = midsoln[tri->id].st[rk_iter];
             for(i = 0; i < 3; i++)
                 nbst[i] = midsoln[nbtri[i]->id].st[rk_iter];
         }

         st2 = midsoln[tri->id].st[0];
         diam = fg_diam(tri);
         cent = fg_centroid(tri);

         num_CVs = get_tri_near_neighbr_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         for(i = 0; i < num_CVs; i++)
             nbcent[i] = tris[i]->CVcent[nbcv_indx[i]];

         uave[0] = Dens(st);
         uave[1] = Mom(st)[0];
         uave[2] = Mom(st)[1];
         uave[3] = Energy(st);

         for(i = 0; i < 3; i++)
         {
             nbuave[i][0] = Dens(nbst[i]);
             nbuave[i][1] = Mom(nbst[i])[0];
             nbuave[i][2] = Mom(nbst[i])[1];
             nbuave[i][3] = Energy(nbst[i]);
         }

         extrema_detec(uave,nbuave,is_bad_sten);

         cent = fg_centroid(tri);
         for(i = 0; i < 3; i++)
             nbcent[i] = fg_centroid(nbtri[i]);
         diam = (fg_diam(tri));

         if(alltri_HR_sten_2[tri->id].HR_sten_set_1st == NO)
         {
             tri_array(&(A),3,2,2,sizeof(double));
             vector(&(c_num),3,sizeof(double));
             for(i = 0; i < 3; i++)
             {
                 A[i][0][0] = (nbcent[i][0]-cent[0]);
                 A[i][0][1] = (nbcent[i][1]-cent[1]);
                 A[i][1][0] = (nbcent[(i+1)%3][0]-cent[0]);
                 A[i][1][1] = (nbcent[(i+1)%3][1]-cent[1]);
                 c_num[i] = cond_num2(A,i); // c_num[i] =1.0;
             }
             alltri_HR_sten_2[tri->id].HR_sten_set_1st = YES;
             alltri_HR_sten_2[tri->id].c_num = c_num;
             alltri_HR_sten_2[tri->id].A = A;
         }
         else
         {
             A = alltri_HR_sten_2[tri->id].A;
             c_num = alltri_HR_sten_2[tri->id].c_num;
         }

         for(k = 0; k < N_EQN; k++)
         {
             // linear part of polynomial
             for(i = 0; i < 3; i++)
             {
                 rside[0] = nbuave[i][k] - uave[k];
                 rside[1] = nbuave[(i+1)%3][k] - uave[k];
                 // comp_coef(A[i],rside,coef[i]);
                 comp_coef2(A,rside,coef[i],i);
             }
             for(i = 0; i < 3; i++)
             {
                 arrya[i] = coef[i][0];
                 arryb[i] = coef[i][1];
             }

             if(debugging("weno_w"))
             {
                 WENO_mod_1(arrya, arryb, c_num, 3, w);
                 // WENO_mod_cand1_P3(arrya,c_num,3,diam,w);
                 u1 = w[0]*coef[0][0] + w[1]*coef[1][0] + w[2]*coef[2][0];
                 // u1 = (coef[0][0] + coef[1][0] + coef[2][0])/3.0;
                 // WENO_mod_cand1_P3(arryb,c_num,3,diam,w);
                 u2 = w[0]*coef[0][1] + w[1]*coef[1][1] + w[2]*coef[2][1];
                 // u2 = (coef[0][1] + coef[1][1] + coef[2][1])/3.0;

                 if(is_bad_sten[k] == YES)
                 {
                     u1 = u2 = 0.0;
                 }
             }

             u0 = uave[k];
             switch(k)
             {
             case 0:
                 dg_Dens(st2)[0] = u0;
                 dg_Dens(st2)[1] = u1;
                 dg_Dens(st2)[2] = u2;
                 // TMP
                 // if(debug == YES)
                 //     printf("fixed u_x, u_y coeff: %g %g\n\n",
                 //               dg_Dens(st2)[1], dg_Dens(st2)[2]);
                 // END TMP
             break;
             case 1:
                 dg_Mom(st2)[0][0] = u0;
                 dg_Mom(st2)[0][1] = u1;
                 dg_Mom(st2)[0][2] = u2;
             break;
             case 2:
                 dg_Mom(st2)[1][0] = u0;
                 dg_Mom(st2)[1][1] = u1;
                 dg_Mom(st2)[1][2] = u2;
             break;
             case 3:
                 dg_Energy(st2)[0] = u0;
                 dg_Energy(st2)[1] = u1;
                 dg_Energy(st2)[2] = u2;
             break;
             }
         }
         Dens(st2) = Dens(st);
         Mom(st2)[0] = Mom(st)[0];
         Mom(st2)[1] = Mom(st)[1];
         Energy(st2) = Energy(st);

         if(N_EQN == 4 && YES == unphysical_st_at_quadrature(tri, st2))
         {
             tri->redo_limiting = YES;
         }
         else
             tri->redo_limiting = NO;
}



LOCAL void update_buffer_of_test_problems(
	Front    *fr,
        Mid_soln *midsoln,
        int      rk_step,
        Limiting_store **limit_store,
	TRI      ***row_limit_tris,
        int      N_alloc,
        int      N_row,
        int      N_use)
{
        int      i, j, N;
#if defined(__MPI__)
            if(debugging("db_Mach"))
                update_db_Mach_buffer(fr,midsoln,rk_step,limit_store);
#endif // if defined(__MPI__)
        if(debugging("Sod") || debugging("Lax"))
        {
            // update_buffer_x_peri(fr,midsoln,0,limit_store);
            // update_buffer_x_peri(fr,midsoln,rk_step,limit_store);
            update_buffer_x_ref(fr,midsoln,rk_step,limit_store);
        }
        else if(debugging("shock_vort"))
            update_buffer_x_ref(fr,midsoln,rk_step,limit_store);
        else if(debugging("v_evo") || debugging("Burgers"))
            update_buffer(fr,midsoln,rk_step,limit_store);

        for(i = 0; i < N_row; i++)
        {
            if(i == N_row-1) N = N_use;
            else N = N_alloc;
            for(j = 0; j < N; j++)
            {
                if(Boundary_tri(row_limit_tris[i][j]) ||
                   tri_on_phy_bdry(row_limit_tris[i][j]))
                {
                    if(debugging("twod_riemann"))
                        twod_riemann_attach_buffer_states(fr,midsoln,rk_step,row_limit_tris[i][j],limit_store);
                    if(debugging("db_Mach"))
                        db_Mach_attach_buffer_states(fr,midsoln,rk_step,row_limit_tris[i][j],limit_store);
                    if(debugging("shock_vort"))
                        shock_vort_attach_buffer_states(fr,midsoln,rk_step,row_limit_tris[i][j],limit_store);
                    if(debugging("Sod"))
                        shock_vort_attach_buffer_states(fr,midsoln,rk_step,row_limit_tris[i][j],limit_store);
                }
            }
        }
}

LOCAL void preprocess_P3_times_rearrange_order(
        Front    *fr,
        Mid_soln *midsoln,
        Limiting_store **limit_store,
        TRI      ***row_limit_tris,
        int      N_alloc,
        int      N_row,
        int      N_use,
        int      rk_step,
        int      HR_times)
{
        TRI       *tri, *crsp_tri, *nbtri[3];
        SURFACE   **surf;
        int       dim = 2, i, j, side, tmpi, cv_indx, indx;
        double     *cent;
        size_t    sizest = fr->sizest;
        Locstate  st2;
        TRI       **limit_tris;
        int       N;
        int       loop_num = 0, detect_extr = NO, comput_time = NO, check_quadr = NO;
        float     conu[3][4][MAX_N_COEF];  // [side][# eqn][coef]
        float     outcome[4][MAX_N_COEF], old_avg[4], new_avg[4];
        char      s[256];
        int       debug = NO, bigHR_round, update_layer[3] = {NO, YES, YES};
        Locstate  st, nst;

        if(mass_1st_row == NULL)
        {
            matrix(&mass_1st_row, 1, MAX_N_COEF,sizeof(double));
            for(i = 0; i < 30; i++)
                matrix(&mass_1st_rows[i], 1, MAX_N_COEF,sizeof(double));
        }

        /// first update 3rd degree terms on all tris
        for(i = 0; i < N_row; i++)
        {
            if(i == N_row-1) N = N_use;
            else N = N_alloc;
            for(j = 0; j < N; j++)
            {
                for(side = 0; side < 3; side++)
                    nbtri[side] = Tri_on_side(row_limit_tris[i][j],side);
                limiting_3rd_degreeP3(row_limit_tris[i][j],nbtri,3,midsoln,rk_step,YES);
                limiting_2nd_degreeP3(row_limit_tris[i][j],nbtri,3,midsoln,rk_step,YES);
            }
        }
        for(i = 0; i < N_row; i++)
        {
            if(i == N_row-1) N = N_use;
            else N = N_alloc;
            for(j = 0; j < N; j++)
            {
                if(rk_step == RK_STEP)
                    st = row_limit_tris[i][j]->st;
                else
                    st = midsoln[row_limit_tris[i][j]->id].st[rk_step];
                nst = midsoln[row_limit_tris[i][j]->id].st[0];
                old_avg[0] = old_avg[1] = old_avg[2] = old_avg[3] = 0.0;
                new_avg[0] = new_avg[1] = new_avg[2] = new_avg[3] = 0.0;
                // for(tmpi = 6; tmpi < MAX_N_COEF; tmpi++)
                for(tmpi = 3; tmpi < MAX_N_COEF; tmpi++)
                {
                    old_avg[0] += dg_Dens(st)[tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                    old_avg[1] += dg_Mom(st)[0][tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                    old_avg[2] += dg_Mom(st)[1][tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                    old_avg[3] += dg_Energy(st)[tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                    new_avg[0] += dg_Dens(nst)[tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                    new_avg[1] += dg_Mom(nst)[0][tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                    new_avg[2] += dg_Mom(nst)[1][tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                    new_avg[3] += dg_Energy(nst)[tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                }
                for(tmpi = 0; tmpi < N_EQN; tmpi++)
                {
                    old_avg[tmpi] /= row_limit_tris[i][j]->Lmass_matrix[0][0];
                    new_avg[tmpi] /= row_limit_tris[i][j]->Lmass_matrix[0][0];
                }
                dg_Dens(st)[0]   += (old_avg[0] - new_avg[0]);
                dg_Mom(st)[0][0] += (old_avg[1] - new_avg[1]);
                dg_Mom(st)[1][0] += (old_avg[2] - new_avg[2]);
                dg_Energy(st)[0] += (old_avg[3] - new_avg[3]);
                // for(tmpi = 6; tmpi < MAX_N_COEF; tmpi++)
                for(tmpi = 3; tmpi < MAX_N_COEF; tmpi++)
                {
                    dg_Dens(st)[tmpi] = dg_Dens(nst)[tmpi];
                    dg_Mom(st)[0][tmpi] = dg_Mom(nst)[0][tmpi];
                    dg_Mom(st)[1][tmpi] = dg_Mom(nst)[1][tmpi];
                    dg_Energy(st)[tmpi] = dg_Energy(nst)[tmpi];
                }
            }
        }
        ///END: first update 3rd degree terms on all tris

        update_buffer_of_test_problems(fr,midsoln,rk_step,limit_store,row_limit_tris,N_alloc,N_row,N_use);

        /*************************
        /// now do 2nd degree term reconstruction on all tris
        for(i = 0; i < N_row; i++)
        {
            if(i == N_row-1) N = N_use;
            else N = N_alloc;
            for(j = 0; j < N; j++)
            {
                for(side = 0; side < 3; side++)
                    nbtri[side] = Tri_on_side(row_limit_tris[i][j],side);
                limiting_2nd_degreeP3(row_limit_tris[i][j],nbtri,midsoln,rk_step,YES);
            }
        }
        for(i = 0; i < N_row; i++)
        {
            if(i == N_row-1) N = N_use;
            else N = N_alloc;
            for(j = 0; j < N; j++)
            {
                if(rk_step == RK_STEP)
                    st = row_limit_tris[i][j]->st;
                else
                    st = midsoln[row_limit_tris[i][j]->id].st[rk_step];
                nst = midsoln[row_limit_tris[i][j]->id].st[0];
                old_avg[0] = old_avg[1] = old_avg[2] = old_avg[3] = 0.0;
                new_avg[0] = new_avg[1] = new_avg[2] = new_avg[3] = 0.0;
                for(tmpi = 3; tmpi < 6; tmpi++)
                {
                    old_avg[0] += dg_Dens(st)[tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                    old_avg[1] += dg_Mom(st)[0][tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                    old_avg[2] += dg_Mom(st)[1][tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                    old_avg[3] += dg_Energy(st)[tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                    new_avg[0] += dg_Dens(nst)[tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                    new_avg[1] += dg_Mom(nst)[0][tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                    new_avg[2] += dg_Mom(nst)[1][tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                    new_avg[3] += dg_Energy(nst)[tmpi]*row_limit_tris[i][j]->Lmass_matrix[0][tmpi];
                }
                for(tmpi = 0; tmpi < N_EQN; tmpi++)
                {
                    old_avg[tmpi] /= row_limit_tris[i][j]->Lmass_matrix[0][0];
                    new_avg[tmpi] /= row_limit_tris[i][j]->Lmass_matrix[0][0];
                }
                dg_Dens(st)[0]   += (old_avg[0] - new_avg[0]);
                dg_Mom(st)[0][0] += (old_avg[1] - new_avg[1]);
                dg_Mom(st)[1][0] += (old_avg[2] - new_avg[2]);
                dg_Energy(st)[0] += (old_avg[3] - new_avg[3]);
                for(tmpi = 3; tmpi < 6; tmpi++)
                {
                    dg_Dens(st)[tmpi] = dg_Dens(nst)[tmpi];
                    dg_Mom(st)[0][tmpi] = dg_Mom(nst)[0][tmpi];
                    dg_Mom(st)[1][tmpi] = dg_Mom(nst)[1][tmpi];
                    dg_Energy(st)[tmpi] = dg_Energy(nst)[tmpi];
                }
            }
        }
        /////END: now do 2nd degree term reconstruction on all tris

        update_buffer_of_test_problems(fr,midsoln,rk_step,limit_store,row_limit_tris,N_alloc,N_row,N_use);

        //// update 1st and 0 degree terms on all tris
        for(i = 0; i < N_row; i++)
        {
            if(i == N_row-1) N = N_use;
            else N = N_alloc;
            for(j = 0; j < N; j++)
            {
                for(side = 0; side < 3; side++)
                    nbtri[side] = Tri_on_side(row_limit_tris[i][j],side);
                limiting_1st_degreeP3(row_limit_tris[i][j],nbtri,midsoln,rk_step,YES);
            }
        }
        for(i = 0; i < N_row; i++)
        {
            if(i == N_row-1) N = N_use;
            else N = N_alloc;
            for(j = 0; j < N; j++)
            {
                if(rk_step == RK_STEP)
                    st = row_limit_tris[i][j]->st;
                else
                    st = midsoln[row_limit_tris[i][j]->id].st[rk_step];
                nst = midsoln[row_limit_tris[i][j]->id].st[0];
                for(tmpi = 0; tmpi < 3; tmpi++)
                {
                    dg_Dens(st)[tmpi] = dg_Dens(nst)[tmpi];
                    dg_Mom(st)[0][tmpi] = dg_Mom(nst)[0][tmpi];
                    dg_Mom(st)[1][tmpi] = dg_Mom(nst)[1][tmpi];
                    dg_Energy(st)[tmpi] = dg_Energy(nst)[tmpi];
                }
            }
        }
        ////END: update 1st and 0 degree terms on all tris

        for(i = 0; i < N_row; i++)
        {
            if(i == N_row-1) N = N_use;
            else N = N_alloc;
            for(j = 0; j < N; j++)
            {
                update_tri_CVs(row_limit_tris[i][j],midsoln,limit_store,rk_step);
            }
        }

        update_buffer_of_test_problems(fr,midsoln,rk_step,limit_store,row_limit_tris,N_alloc,N_row,N_use);

        debug_flag = NO;
        *********************/
}

// Reconstructed coeffs. are stored in limit_store for CV polynomials
// Reconstruct for CVs
LOCAL void Subcell_limiting_2nd_degreeP3(
         TRI       *tri,
         TRI       *nbtri[3],
         Mid_soln  *midsoln,
         Limiting_store **store,
         int       rk_iter)
{
         Locstate st, nbst[30], st2, nbst2[30];
         int      cv_indx, num_CVs, i, k, j;
         int      on_SV_side, on_SV_side2, nbcv_indx[9], nbcv_side[9];
         TRI      *tris[9];
         float    uxave[4], nbuxave[30][4], uyave[4], nbuyave[30][4];
         float    Rxave[4], nbRxave[30][4], Ryave[4], nbRyave[30][4];
         float    Lxave[4], nbLxave[30][4], Lyave[4], nbLyave[30][4];
         int      dim = 2, indx, l;
         double    *cent, *nbcent[30];
         float    rside[3], Ax[30][2][2], Ay[30][2][2], rside2[3];
         float    coefx[30][2], coefy[30][2];
         float    u3, u4, u5, u4_0, u4_1, avg1, avg2, wx[90], wy[90], arrya[90], arryb[90], w[90];
         double **Lmass_matrix = tri->Lmass_matrix;
         float    eps = 1.5, diam;
         // float    sqr_diam, cub_diam; // 0.5, 0.8 for all_cent
         int      idir, is_bad_stenx[30], is_bad_steny[30];
         float    c_num_x[30], c_num_y[30];
         int      debug = NO;
         float    tmpnbLxave[3][4], tmpnbLyave[3][4];
         float    area = fg_area(tri);

         if(rk_iter == RK_STEP)
         {
             st = tri->st;
             for(i = 0; i < 3; i++)
                 nbst[i] = nbtri[i]->st;
         }
         else
         {
             st = midsoln[tri->id].st[rk_iter];
             for(i = 0; i < 3; i++)
                 nbst[i] = midsoln[nbtri[i]->id].st[rk_iter];
         }
         st2 = midsoln[tri->id].st[0];
         diam = fg_diam(tri);

         for(cv_indx = 0; cv_indx < 4; cv_indx++)
         {
             cent = tri->CVcent[cv_indx];
             num_CVs = get_tri_CV_stencil(tri,nbtri,cv_indx,nbcv_indx,nbcv_side,tris,&num_CVs);
             for(i = 0; i < num_CVs; i++)
                 nbcent[i] = tris[i]->CVcent[nbcv_indx[i]];

             for(i = 0; i < MAX_N_COEF; i++)
                 mass_1st_row[0][i] = tri->CVmass_matrix[cv_indx][i];
             if(rk_iter == RK_STEP)
             {
                 CV_u_average_indx(tri,cv_indx,1,uxave);
                 CV_u_average_indx(tri,cv_indx,2,uyave);

                 CV_R_degree3_term_averageP3(tri,cv_indx,mass_1st_row,0,Rxave);
                 CV_R_degree3_term_averageP3(tri,cv_indx,mass_1st_row,1,Ryave);
                 for(k = 0; k < N_EQN; k++)
                 {
                     uxave[k] += Rxave[k];
                     uyave[k] += Ryave[k];
                 }
                 CV_R_degree3_term_averageP3_store(tri,cv_indx,store[0][tri->id],mass_1st_row,0,Rxave);
                 CV_R_degree3_term_averageP3_store(tri,cv_indx,store[0][tri->id],mass_1st_row,1,Ryave);
                 // sten_n neighbor tris
                 for(i = 0; i < num_CVs; i++)
                 {
                     for(j = 0; j < MAX_N_COEF; j++)
                         mass_1st_row[0][j] = tris[i]->CVmass_matrix[nbcv_indx[i]][j];

                     CV_u_average_indx(tris[i],nbcv_indx[i],1,nbuxave[i]);
                     CV_u_average_indx(tris[i],nbcv_indx[i],2,nbuyave[i]);
                     CV_R_degree3_term_averageP3(tris[i],nbcv_indx[i],mass_1st_row,0,nbRxave[i]);
                     CV_R_degree3_term_averageP3(tris[i],nbcv_indx[i],mass_1st_row,1,nbRyave[i]);
                     for(k = 0; k < N_EQN; k++)
                     {
                         nbuxave[i][k] += nbRxave[i][k];
                         nbuyave[i][k] += nbRyave[i][k];
                     }
                     comp_CV_mass_matrix_1st_row(MAX_N_COEF,tris[i],dim,nbcv_indx[i],cent,mass_1st_rows[i]);
                 }
             }
             else
             {
             }
         }
}

// Reconstructed coeffs. are stored in limit_store for CV polynomials
// Reconstruct by partial neighboring cells
LOCAL void Subcell_limiting_2nd_degreeP3_PNC_multi_pt_limit(
         TRI       *tri,
         TRI       *nbtri[3],
         Mid_soln  *midsoln,
         Limiting_store **store,
         int       rk_iter,
         int       detect_extr)
{
         Locstate st, nbst[30], st2, nbst2[30];
         int      cv_indx, num_CVs, i, k, j, ipt;
         int      on_SV_side, on_SV_side2, nbcv_indx[20], nbcv_side[20];
         TRI      *tris[20];
         float    uxave[4], nbuxave[30][4], uyave[4], nbuyave[30][4],
                  tmpuxave[4], tmpuyave[4], tmpnbuxave[30][4], tmpnbuyave[30][4];
         double   nbuxave2[30][4], nbuxave3[30][4], nbuxave4[30][4], nbuyave2[30][4], nbuyave3[30][4], nbuyave4[30][4],
                  nbuxaveipt[30][10][4], nbuyaveipt[30][10][4]; /// [cv id][#pts][ state of conservative variable]
         float    Rxave[4], nbRxave[30][4], Ryave[4], nbRyave[30][4], nbRxave2[30][4], nbRxave3[30][4], nbRxave4[30][4],
                  nbRyave2[30][4], nbRyave3[30][4], nbRyave4[30][4], nbRxaveipt[30][10][4], nbRyaveipt[30][10][4];
         float    Lxave[4], nbLxave[30][8], Lyave[4], nbLyave[30][8],
                   tmpnbLxave[30][4], tmpnbLyave[30][4];
         int      dim = 2, indx, l;
         double    *cent, *nbcent[30], rside[3], rside2[3], tmpnbcent[30][MAXD], midpt[3][3], newcent[30][MAXD];
         // float    Ax[30][2][2], Ay[30][2][2], rside2[3];
         // float    c_num_x[30], c_num_y[30];
         double    ***Ax, ***Ay, *c_num_x, *c_num_y;
         float    coefx[30][2], coefy[30][2];
         float    u3, u4, u5, u4_0, u4_1, avg1, avg2, wx[90], wy[90], arrya[90], arryb[90], w[90];
         double **Lmass_matrix = tri->Lmass_matrix;
         float    eps = 1.5, diam;
         // float    sqr_diam, cub_diam; // 0.5, 0.8 for all_cent
         int      idir, is_bad_stenx[30], is_bad_steny[30];
         int      debug = NO, N_PTS = 4;
         float    area = fg_area(tri);
         float     Axb[30][2][2], Ayb[30][2][2], coefxb[30][2], coefyb[30][2];
         double   qcrds[MAXD], *pcrds[4], qcrds_cv[30][8][3]; //[cv id][#pts][crds]

         if(rk_iter == RK_STEP)
         {
             st = tri->st;
             for(i = 0; i < 3; i++)
                 nbst[i] = nbtri[i]->st;
         }
         else
         {
             st = midsoln[tri->id].st[rk_iter];
             for(i = 0; i < 3; i++)
                 nbst[i] = midsoln[nbtri[i]->id].st[rk_iter];
         }
         st2 = midsoln[tri->id].st[0];
         diam = fg_diam(tri);
         // diam = 1.0;
         cent = fg_centroid(tri);

         // num_CVs = get_tri_edge_vert_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         num_CVs = get_tri_near_neighbr_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_near_neighbr_CV_stencil_ver2(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);

         // num_CVs = get_tri_center_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_near_neighbr_overlap_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs--;
         // num_CVs = get_tri_liu_neighbr_overlap_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_CV_stencil_unstable_SV_P2_partition_no_regrouping(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         for(i = 0; i < num_CVs; i++)
             nbcent[i] = tris[i]->CVcent[nbcv_indx[i]];

         /// find crds of points where we evaulate function values.
         for(i = 0; i < num_CVs; i++)
         {
             for(j = 0; j < MAX_N_COEF; j++)
                 mass_1st_row[0][j] = tris[i]->CVmass_matrix[nbcv_indx[i]][j];
             for(j = 0; j < 3; j++)
                 pcrds[j] = Coords( Point_of_tri(tris[i])[j] );
             for(j = 0; j < 3; j++)
             {
                 for(k = 0; k < dim; k++)
                     midpt[j][k] = (pcrds[(j+1)%3][k] + pcrds[j][k])*0.5;
             }

             /// use vertex ---- 1
             if(nbcv_side[i] == nbcv_indx[i])
             {
                 for(j = 0; j < dim; j++)
                     qcrds_cv[i][0][j] = pcrds[nbcv_side[i]][j];
             }
             else
             {
                 for(j = 0; j < dim; j++)
                    qcrds_cv[i][0][j] = pcrds[(nbcv_side[i]+1)%3][j];
             }

             /// use mid point on adjacent edge midpt[nbcv_side[i]] ---- 2
             for(j = 0; j < dim; j++)
                 qcrds_cv[i][1][j] = midpt[nbcv_side[i]][j];

             /// use mid point on the opposite side   ---- 3
             if(nbcv_side[i] == nbcv_indx[i])
             {
                  //// midpt[(nbcv_side[i]+2)%3];
                 for(j = 0; j < dim; j++)
                     qcrds_cv[i][2][j] = midpt[(nbcv_side[i]+2)%3][j];
             }
             else
             {
                  //// midpt[(nbcv_side[i]+1)%3];
                 for(j = 0; j < dim; j++)
                     qcrds_cv[i][2][j] = midpt[(nbcv_side[i]+1)%3][j];
             }

             /// use CV centroid ---- 4
             for(j = 0; j < dim; j++)
                 qcrds_cv[i][3][j] = nbcent[i][j];

             //// use mid point between vertex 1 and vertex 2 ---- 5
             //// for(j = 0; j < dim; j++)
             ////     qcrds_cv[i][4][j] = (qcrds_cv[i][0][j] + qcrds_cv[i][1][j])/2.0;

             //// use center of partial partial cell ----- 5
             comput_tri_cent(2, qcrds_cv[i][0], qcrds_cv[i][1], qcrds_cv[i][3], qcrds_cv[i][4]);
             /****
             /// use CV centroid ---- 3
             for(j = 0; j < dim; j++)
                 qcrds_cv[i][2][j] = nbcent[i][j];

             //// use center of partial partial cell ----- 4
             comput_tri_cent(2, qcrds_cv[i][0], qcrds_cv[i][1], qcrds_cv[i][2], qcrds_cv[i][3]);
             ****/

             newcent[i][0] = newcent[i][1] = 0.0;
             for(ipt = 0; ipt < N_PTS; ipt++)
             {
                 newcent[i][0] += qcrds_cv[i][ipt][0];
                 newcent[i][1] += qcrds_cv[i][ipt][1];
             }
             newcent[i][0] /= N_PTS;
             newcent[i][1] /= N_PTS;
         }

         if(alltri_HR_sten_2[tri->id].HR_sten_set_2nd == NO)
         {
             tri_array(&Ax,num_CVs,2,2,sizeof(double));
             tri_array(&Ay,num_CVs,2,2,sizeof(double));
             vector(&c_num_x,num_CVs,sizeof(double));
             vector(&c_num_y,num_CVs,sizeof(double));
             for(i = 0; i < num_CVs; i++)
             {
                 /***
                 Ax[i][0][0] = 2.0*(nbcent[i][0]-cent[0]);
                 Ax[i][0][1] =     (nbcent[i][1]-cent[1]);
                 Ax[i][1][0] = 2.0*(nbcent[(i+1)%num_CVs][0]-cent[0]);
                 Ax[i][1][1] =     (nbcent[(i+1)%num_CVs][1]-cent[1]);
                 c_num_x[i] = cond_num2(Ax,i); // c_num_x[i] = 1.0;
                 Ay[i][0][0] =     (nbcent[i][0]-cent[0]);
                 Ay[i][0][1] = 2.0*(nbcent[i][1]-cent[1]);
                 Ay[i][1][0] =     (nbcent[(i+1)%num_CVs][0]-cent[0]);
                 Ay[i][1][1] = 2.0*(nbcent[(i+1)%num_CVs][1]-cent[1]);
                 c_num_y[i] = cond_num2(Ay,i); // c_num_y[i] = 1.0;
                 ***/
                 Ax[i][0][0] = 2.0*(newcent[i][0]-cent[0]);
                 Ax[i][0][1] =     (newcent[i][1]-cent[1]);
                 Ax[i][1][0] = 2.0*(newcent[(i+1)%num_CVs][0]-cent[0]);
                 Ax[i][1][1] =     (newcent[(i+1)%num_CVs][1]-cent[1]);
                 c_num_x[i] = cond_num2(Ax,i); // c_num_x[i] = 1.0;
                 Ay[i][0][0] =     (newcent[i][0]-cent[0]);
                 Ay[i][0][1] = 2.0*(newcent[i][1]-cent[1]);
                 Ay[i][1][0] =     (newcent[(i+1)%num_CVs][0]-cent[0]);
                 Ay[i][1][1] = 2.0*(newcent[(i+1)%num_CVs][1]-cent[1]);
                 c_num_y[i] = cond_num2(Ay,i); // c_num_y[i] = 1.0;
             }
             alltri_HR_sten_2[tri->id].HR_sten_set_2nd = YES;
             alltri_HR_sten_2[tri->id].Ax = Ax;
             alltri_HR_sten_2[tri->id].Ay = Ay;
             alltri_HR_sten_2[tri->id].c_num_x = c_num_x; alltri_HR_sten_2[tri->id].c_num_y = c_num_y;
         }
         else
         {
             Ax = alltri_HR_sten_2[tri->id].Ax;
             Ay = alltri_HR_sten_2[tri->id].Ay;
             c_num_x = alltri_HR_sten_2[tri->id].c_num_x; c_num_y = alltri_HR_sten_2[tri->id].c_num_y;
         }

         u_average_indx(tri,st,1,uxave);
         u_average_indx(tri,st,2,uyave);
         R_degree3_term_averageP3(tri,st,tri->Lmass_matrix,0,1.0,Rxave);
         R_degree3_term_averageP3(tri,st,tri->Lmass_matrix,1,1.0,Ryave);
         for(k = 0; k < N_EQN; k++)
         {
             uxave[k] += Rxave[k];
             uyave[k] += Ryave[k];
         }
         R_degree3_term_averageP3(tri,st2,tri->Lmass_matrix,0,1.0,Rxave);
         R_degree3_term_averageP3(tri,st2,tri->Lmass_matrix,1,1.0,Ryave);

         if(rk_iter == RK_STEP)
         {
             // sten_n partial neighbor Cvs
             for(i = 0; i < num_CVs; i++)
             {
                 for(ipt = 0; ipt < N_PTS; ipt++)
                 {
                     CV_du_indx(tris[i], nbcv_indx[i], 1, nbcent[i], qcrds_cv[i][ipt], nbuxaveipt[i][ipt]);
                     CV_du_indx(tris[i], nbcv_indx[i], 2, nbcent[i], qcrds_cv[i][ipt], nbuyaveipt[i][ipt]);
                 }
             }
         }
         else
         {
             // sten_n partial neighbor Cvs
             for(i = 0; i < num_CVs; i++)
             {
                 for(ipt = 0; ipt < N_PTS; ipt++)
                 {
                     CV_du_indx_from_store(tris[i], nbcv_indx[i], 1, store[rk_iter], nbcent[i], qcrds_cv[i][ipt], nbuxaveipt[i][ipt]);
                     CV_du_indx_from_store(tris[i], nbcv_indx[i], 2, store[rk_iter], nbcent[i], qcrds_cv[i][ipt], nbuyaveipt[i][ipt]);
                 }
             }
         }

         for(i = 0; i < num_CVs; i++)
         {
             for(ipt = 0; ipt < N_PTS; ipt++)
             {
                 R_degree3_term_pt_P3(st2,0, cent, qcrds_cv[i][ipt], nbRxaveipt[i][ipt]);
                 R_degree3_term_pt_P3(st2,1, cent, qcrds_cv[i][ipt], nbRyaveipt[i][ipt]);
             }
         }

         for(k = 0; k < N_EQN; k++)
         {
             Lxave[k] = uxave[k]-Rxave[k];
             Lyave[k] = uyave[k]-Ryave[k];
             for(i = 0; i < num_CVs; i++)
             {
                 /// nbLxave[i][k] = (nbuxave[i][k]-nbRxave[i][k] + nbuxave2[i][k]-nbRxave2[i][k] + nbuxave3[i][k]-nbRxave3[i][k])/3.0;
                 /// nbLyave[i][k] = (nbuyave[i][k]-nbRyave[i][k] + nbuyave2[i][k]-nbRyave2[i][k] + nbuyave3[i][k]-nbRyave3[i][k])/3.0;
                 /// new 05132010, add centroid
                 /***
                 nbLxave[i][k] = (nbuxave[i][k]-nbRxave[i][k] + nbuxave2[i][k]-nbRxave2[i][k] 
                                + nbuxave3[i][k]-nbRxave3[i][k] + nbuxave4[i][k]-nbRxave4[i][k])/4.0;
                 nbLyave[i][k] = (nbuyave[i][k]-nbRyave[i][k] + nbuyave2[i][k]-nbRyave2[i][k] 
                                + nbuyave3[i][k]-nbRyave3[i][k] + nbuyave4[i][k]-nbRyave4[i][k])/4.0;
                 ***/
                 nbLxave[i][k] = nbLyave[i][k] = 0.0;
                 for(ipt = 0; ipt < N_PTS; ipt++)
                 {
                     nbLxave[i][k] += (nbuxaveipt[i][ipt][k]-nbRxaveipt[i][ipt][k]);
                     nbLyave[i][k] += (nbuyaveipt[i][ipt][k]-nbRyaveipt[i][ipt][k]);
                 }
                 nbLxave[i][k] /= N_PTS;
                 nbLyave[i][k] /= N_PTS;
             }
         }

         if(detect_extr == YES)
         {
             NEW_extrema_detec(Lxave,nbLxave,num_CVs,is_bad_stenx);
             NEW_extrema_detec(Lyave,nbLyave,num_CVs,is_bad_steny);
         }

         for(k = 0; k < N_EQN; k++)
         {
             for(i = 0; i < num_CVs; i++)
             {
                 rside[0] = nbLxave[i][k] - Lxave[k];
                 rside[1] = nbLxave[(i+1)%num_CVs][k] - Lxave[k];
                 // comp_coef(Ax[i],rside,coefx[i]);
                 comp_coef2(Ax,rside,coefx[i],i);

                 rside[0] = nbLyave[i][k] - Lyave[k];
                 rside[1] = nbLyave[(i+1)%num_CVs][k] - Lyave[k];
                 comp_coef2(Ay,rside,coefy[i],i);
                 ////// use every other stencil
                 /*
                 rside[0] = nbLxave[i][k] - Lxave[k];
                 rside[1] = nbLxave[(i+2)%num_CVs][k] - Lxave[k];
                 comp_coef(Axb[i],rside,coefxb[i]);

                 rside[0] = nbLyave[i][k] - Lyave[k];
                 rside[1] = nbLyave[(i+2)%num_CVs][k] - Lyave[k];
                 comp_coef(Ayb[i],rside,coefyb[i]);
                 */
                 //////END: use every other stencil
             }
             for(i = 0; i < num_CVs; i++)
             {
                 arrya[i] = coefx[i][0];
                 arryb[i] = coefx[i][1];
             }
             ////// use every other stencil
             /*
             for(i = 0; i < num_CVs; i++)
             {
                 arrya[i+num_CVs] = coefxb[i][0];
                 arryb[i+num_CVs] = coefxb[i][1];
             }
             */
             //////END: use every other stencil
             WENO_mod_on_2nd(arrya,arryb,c_num_x,num_CVs,diam,w);
             u3 = u4_0 = 0.0;
             for(i = 0; i < num_CVs; i++)
             {
                 u3   += w[i]*coefx[i][0];
                 u4_0 += w[i]*coefx[i][1];
             }
             ////// use every other stencil
             /*
             for(i = 0; i < num_CVs; i++)
             {
                 u3   += w[i+num_CVs]*coefxb[i][0];
                 u4_0 += w[i+num_CVs]*coefxb[i][1];
             }
             */
             //////END: use every other stencil
             if(is_bad_stenx[k] == YES && detect_extr == YES)
             {
                 u3 = 0.0; u4_0 = 0.0;
             }

             for(i = 0; i < num_CVs; i++)
             {
                 arrya[i] = coefy[i][0];
                 arryb[i] = coefy[i][1];
             }
             ////// use every other stencil
             /*
             for(i = 0; i < num_CVs; i++)
             {
                 arrya[i+num_CVs] = coefyb[i][0];
                 arryb[i+num_CVs] = coefyb[i][1];
             }
             */
             //////END: use every other stencil
             WENO_mod_on_2nd(arrya,arryb,c_num_y,num_CVs,diam,w);
             u4_1 = u5 = 0.0;
             for(i = 0; i < num_CVs; i++)
             {
                 u4_1 += w[i]*coefy[i][0];
                 u5   += w[i]*coefy[i][1];
             }
             ////// use every other stencil
             /*
             for(i = 0; i < num_CVs; i++)
             {
                 u4_1 += w[i+num_CVs]*coefyb[i][0];
                 u5   += w[i+num_CVs]*coefyb[i][1];
             }
             */
             //////END: use every other stencil
             if(is_bad_steny[k] == YES && detect_extr == YES)
             {
                 u4_1 = 0.0; u5 = 0.0;
             }
             u4 = minmod((1+0.05)*minmod(u4_0,u4_1), 0.5*(u4_0 + u4_1)); // 0.05, 0.01
             // u4 = minmod2((1+0.05)*minmod2(u4_0,u4_1), 0.5*(u4_0 + u4_1)); // 0.05, 0.01
             switch(k)
             {
             case 0:
                 // if(fabs(dg_Dens(st)[3]) < 1.0E-13) dg_Dens(st2)[3] = 0.0;
                 // else
                     dg_Dens(st2)[3] = u3;
                 // if(fabs(dg_Dens(st)[4]) < 1.0E-13)
                 //     dg_Dens(st2)[4] = 0.0;
                 // else
                     dg_Dens(st2)[4] = u4;
                 // if(fabs(dg_Dens(st)[5]) < 1.0E-13)
                 //     dg_Dens(st2)[5] = 0.0;
                 // else
                     dg_Dens(st2)[5] = u5;
             break;
             case 1:
                 // if(fabs(dg_Mom(st)[0][3]) < 1.0E-13)
                 //     dg_Mom(st2)[0][3] = 0.0;
                 // else
                     dg_Mom(st2)[0][3] = u3;
                 // if(fabs(dg_Mom(st)[0][4]) < 1.0E-13)
                 //     dg_Mom(st2)[0][4] = 0.0;
                 // else
                     dg_Mom(st2)[0][4] = u4;
                 // if(fabs(dg_Mom(st)[0][5]) < 1.0E-13)
                 //     dg_Mom(st2)[0][5] = 0.0;
                 // else
                     dg_Mom(st2)[0][5] = u5;
             break;
             case 2:
                 // if(fabs(dg_Mom(st)[1][3]) < 1.0E-13)
                 //     dg_Mom(st2)[1][3] = 0.0;
                 // else
                     dg_Mom(st2)[1][3] = u3;
                 // if(fabs(dg_Mom(st)[1][4]) < 1.0E-13)
                 //     dg_Mom(st2)[1][4] = 0.0;
                 // else
                     dg_Mom(st2)[1][4] = u4;
                 // if(fabs(dg_Mom(st)[1][5]) < 1.0E-13)
                 //     dg_Mom(st2)[1][5] = 0.0;
                 // else
                     dg_Mom(st2)[1][5] = u5;
             break;
             case 3:
                 // if(fabs(dg_Energy(st)[3]) < 1.0E-13)
                 //     dg_Energy(st2)[3] = 0.0;
                 // else
                     dg_Energy(st2)[3] = u3;
                 // if(fabs(dg_Energy(st)[4]) < 1.0E-13)
                 //     dg_Energy(st2)[4] = 0.0;
                 // else
                     dg_Energy(st2)[4] = u4;
                 // if(fabs(dg_Energy(st)[5]) < 1.0E-13)
                 //     dg_Energy(st2)[5] = 0.0;
                 // else
                     dg_Energy(st2)[5] = u5;
             break;
             }
         }
}



// Reconstructed coeffs. are stored in limit_store for CV polynomials
// Reconstruct by partial neighboring cells
LOCAL void Subcell_limiting_2nd_degreeP3_PNC(
         TRI       *tri,
         TRI       *nbtri[3],
         Mid_soln  *midsoln,
         Limiting_store **store,
         int       rk_iter,
         int       detect_extr)
{
         Locstate st, nbst[30], st2, nbst2[30], tmpst;
         int      cv_indx, num_CVs, i, k, j;
         int      on_SV_side, on_SV_side2, nbcv_indx[20], nbcv_side[20];
         TRI      *tris[20];
         float    uxave[8], nbuxave[30][8], uyave[8], nbuyave[30][8], 
                  tmpuxave[8], tmpuyave[8], tmpnbuxave[30][8], tmpnbuyave[30][8];
         float    Rxave[8], nbRxave[30][8], Ryave[8], nbRyave[30][8];
         float    Lxave[8], nbLxave[30][8], Lyave[8], nbLyave[30][8],
                   tmpnbLxave[30][8], tmpnbLyave[30][8];
         int      dim = 2, indx, l;
         double    *cent, *nbcent[30], rside[3], rside2[3], area, sqrt_area, tmp_sqrt_area;
         double    ***Ax, ***Ay, *c_num_x, *c_num_y;
         float    coefx[30][2], coefy[30][2];
         float    u3, u4, u5, u4_0, u4_1, avg1, avg2, wx[90], wy[90], arrya[90], arryb[90], w[90];
         double **Lmass_matrix = tri->Lmass_matrix;
         float    eps = 1.5, diam;
         // float    sqr_diam, cub_diam; // 0.5, 0.8 for all_cent
         int      idir, is_bad_stenx[30], is_bad_steny[30];
         int      debug = NO;
         float     Axb[30][2][2], Ayb[30][2][2], coefxb[30][2], coefyb[30][2];

         if(NULL == midsoln || rk_iter == RK_STEP)
             st = tri->st;
         else
             st = midsoln[tri->id].st[rk_iter];

         st2 = midsoln[tri->id].st[0];
         diam = fg_diam(tri);
         // diam = 1.0;
         cent = fg_centroid(tri);
         area = fg_area(tri);
         sqrt_area = sqrt(fg_area(tri));

         // num_CVs = get_tri_edge_vert_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_near_neighbr_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         num_CVs = get_tri_neighbr_CV_stencil_from_set_HR_sten(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_near_neighbr_CV_stencil_ver2(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);

         // num_CVs = get_tri_center_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_near_neighbr_overlap_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs--;
         // num_CVs = get_tri_liu_neighbr_overlap_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_CV_stencil_unstable_SV_P2_partition_no_regrouping(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         for(i = 0; i < num_CVs; i++)
             nbcent[i] = tris[i]->CVcent[nbcv_indx[i]];

         if(alltri_HR_sten_2[tri->id].HR_sten_set_2nd == NO)
         {
             tri_array(&Ax,num_CVs,2,2,sizeof(double));
             tri_array(&Ay,num_CVs,2,2,sizeof(double));
             vector(&c_num_x,num_CVs,sizeof(double));
             vector(&c_num_y,num_CVs,sizeof(double));
             for(i = 0; i < num_CVs; i++)
             {
                 Ax[i][0][0] = 2.0*(nbcent[i][0]-cent[0])/area;
                 Ax[i][0][1] =     (nbcent[i][1]-cent[1])/area;
                 Ax[i][1][0] = 2.0*(nbcent[(i+1)%num_CVs][0]-cent[0])/area;
                 Ax[i][1][1] =     (nbcent[(i+1)%num_CVs][1]-cent[1])/area;
                 c_num_x[i] = cond_num2(Ax,i); // c_num_x[i] = 1.0;
                 Ay[i][0][0] =     (nbcent[i][0]-cent[0])/area;
                 Ay[i][0][1] = 2.0*(nbcent[i][1]-cent[1])/area;
                 Ay[i][1][0] =     (nbcent[(i+1)%num_CVs][0]-cent[0])/area;
                 Ay[i][1][1] = 2.0*(nbcent[(i+1)%num_CVs][1]-cent[1])/area;
                 c_num_y[i] = cond_num2(Ay,i); // c_num_y[i] = 1.0;
             }
             alltri_HR_sten_2[tri->id].HR_sten_set_2nd = YES;
             alltri_HR_sten_2[tri->id].Ax = Ax;
             alltri_HR_sten_2[tri->id].Ay = Ay;
             alltri_HR_sten_2[tri->id].c_num_x = c_num_x; alltri_HR_sten_2[tri->id].c_num_y = c_num_y;
         }
         else
         {
             Ax = alltri_HR_sten_2[tri->id].Ax;
             Ay = alltri_HR_sten_2[tri->id].Ay;
             c_num_x = alltri_HR_sten_2[tri->id].c_num_x; c_num_y = alltri_HR_sten_2[tri->id].c_num_y;
         }

         u_average_indx(tri,st,1,uxave);
         u_average_indx(tri,st,2,uyave);

         for(k = 0; k < N_EQN; k++)
             uxave[k] /= sqrt_area;
         for(k = 0; k < N_EQN; k++)
             uyave[k] /= sqrt_area;

         R_degree3_term_averageP3(tri,st,tri->Bmass_matrix,0,sqrt_area,Rxave);
         R_degree3_term_averageP3(tri,st,tri->Bmass_matrix,1,sqrt_area,Ryave);
         for(k = 0; k < N_EQN; k++)
         {
             uxave[k] += Rxave[k];
             uyave[k] += Ryave[k];
         }
         R_degree3_term_averageP3(tri,st2,tri->Bmass_matrix,0,sqrt_area,Rxave);
         R_degree3_term_averageP3(tri,st2,tri->Bmass_matrix,1,sqrt_area,Ryave);

         if(alltri_mass_1st_rows[tri->id].mass_1st_rows[0] == NULL)
         {
             for(i = 0; i < 20; i++)
                 matrix(&(alltri_mass_1st_rows[tri->id].mass_1st_rows[i]), 1, MAX_N_COEF,sizeof(double));
             for(i = 0; i < num_CVs; i++)
                 comp_CV_Mag_mass_matrix_1st_row(MAX_N_COEF,tris[i],dim,nbcv_indx[i],cent,sqrt_area,
                          alltri_mass_1st_rows[tri->id].mass_1st_rows[i]);
         }

         for(i = 0; i < num_CVs; i++)
         {
             mass_1st_rows[i] = alltri_mass_1st_rows[tri->id].mass_1st_rows[i];

             tmp_sqrt_area = sqrt(fg_area(tris[i]));
             if(rk_iter == RK_STEP)
                 tmpst = tris[i]->st;
             else
                 tmpst = midsoln[tris[i]->id].st[rk_iter];
             CV_u_average_indx_MHD(tris[i],nbcv_indx[i],1,tmpst,nbuxave[i]);
             for(k = 0; k < N_EQN; k++)
                 nbuxave[i][k] /= tmp_sqrt_area;

             CV_u_average_indx_MHD(tris[i],nbcv_indx[i],2,tmpst,nbuyave[i]);
             for(k = 0; k < N_EQN; k++)
                 nbuyave[i][k] /= tmp_sqrt_area;

             R_degree3_term_averageP3(tris[i],st2,mass_1st_rows[i],0,sqrt_area,nbRxave[i]);
             R_degree3_term_averageP3(tris[i],st2,mass_1st_rows[i],1,sqrt_area,nbRyave[i]);
         }

         for(k = 0; k < N_EQN; k++)
             Lxave[k] = uxave[k]-Rxave[k];
         for(k = 0; k < N_EQN; k++)
             Lyave[k] = uyave[k]-Ryave[k];

         for(i = 0; i < num_CVs; i++)
         {
             for(k = 0; k < N_EQN; k++)
                 nbLxave[i][k] = nbuxave[i][k]-nbRxave[i][k];
             for(k = 0; k < N_EQN; k++)
                 nbLyave[i][k] = nbuyave[i][k]-nbRyave[i][k];
         }

         if(detect_extr == YES)
         {
             /**
             u_average_indx(tri,st,1,tmpuxave);
             u_average_indx(tri,st,2,tmpuyave);
             for(i = 0; i < 3; i++)
             {
                 u_average_indx(nbtri[i],nbst[i],1,tmpnbuxave[i]);
                 u_average_indx(nbtri[i],nbst[i],2,tmpnbuyave[i]);
             }
             NEW_extrema_detec(tmpuxave,tmpnbuxave,3,is_bad_stenx);
             NEW_extrema_detec(tmpuyave,tmpnbuyave,3,is_bad_steny);
             **/
             // NEW_extrema_detec(tmpuxave,tmpnbuxave,num_CVs,is_bad_stenx);
             // NEW_extrema_detec(tmpuyave,tmpnbuyave,num_CVs,is_bad_steny);

             NEW_extrema_detec(Lxave,nbLxave,num_CVs,is_bad_stenx);
             NEW_extrema_detec(Lyave,nbLyave,num_CVs,is_bad_steny);
	     /*
             j = 0;
             for(i = 0; i < num_CVs; i++)
             {
                 if(i == 0 || i == 3 || i == 6)
                     continue;
                 for(k = 0; k < N_EQN; k++)
                 {
                     tmpnbLxave[j][k] = nbLxave[i][k];
                     tmpnbLyave[j][k] = nbLyave[i][k];
                 }
                 j++;
             }
             NEW_extrema_detec(Lxave,tmpnbLxave,num_CVs-3,is_bad_stenx);
             NEW_extrema_detec(Lyave,tmpnbLyave,num_CVs-3,is_bad_steny);
	     */
         }

         for(k = 0; k < N_EQN; k++)
         {
             for(i = 0; i < num_CVs; i++)
             {
                 rside[0] = nbLxave[i][k] - Lxave[k];
                 rside[1] = nbLxave[(i+1)%num_CVs][k] - Lxave[k];
                 // comp_coef(Ax[i],rside,coefx[i]);
                 comp_coef2(Ax,rside,coefx[i],i);

                 rside[0] = nbLyave[i][k] - Lyave[k];
                 rside[1] = nbLyave[(i+1)%num_CVs][k] - Lyave[k];
                 comp_coef2(Ay,rside,coefy[i],i);
                 ////// use every other stencil
		 /*
                 rside[0] = nbLxave[i][k] - Lxave[k];
                 rside[1] = nbLxave[(i+2)%num_CVs][k] - Lxave[k];
                 comp_coef(Axb[i],rside,coefxb[i]);

                 rside[0] = nbLyave[i][k] - Lyave[k];
                 rside[1] = nbLyave[(i+2)%num_CVs][k] - Lyave[k];
                 comp_coef(Ayb[i],rside,coefyb[i]);
		 */
                 //////END: use every other stencil
             }
             for(i = 0; i < num_CVs; i++)
             {
                 arrya[i] = coefx[i][0];
                 arryb[i] = coefx[i][1];
             }
             ////// use every other stencil
	     /*
             for(i = 0; i < num_CVs; i++)
             {
                 arrya[i+num_CVs] = coefxb[i][0];
                 arryb[i+num_CVs] = coefxb[i][1];
             }
	     */
             //////END: use every other stencil
             WENO_mod_on_2nd(arrya,arryb,c_num_x,num_CVs,diam,w);
             u3 = u4_0 = 0.0;
             for(i = 0; i < num_CVs; i++)
             {
                 u3   += w[i]*coefx[i][0];
                 u4_0 += w[i]*coefx[i][1];
             }
             ////// use every other stencil
	     /*
             for(i = 0; i < num_CVs; i++)
             {
                 u3   += w[i+num_CVs]*coefxb[i][0];
                 u4_0 += w[i+num_CVs]*coefxb[i][1];
             }
	     */
             //////END: use every other stencil
             if(is_bad_stenx[k] == YES && detect_extr == YES)
             {
                 u3 = 0.0; u4_0 = 0.0;
             }

             for(i = 0; i < num_CVs; i++)
             {
                 arrya[i] = coefy[i][0];
                 arryb[i] = coefy[i][1];
             }
             ////// use every other stencil
	     /*
             for(i = 0; i < num_CVs; i++)
             {
                 arrya[i+num_CVs] = coefyb[i][0];
                 arryb[i+num_CVs] = coefyb[i][1];
             }
	     */
             //////END: use every other stencil
             WENO_mod_on_2nd(arrya,arryb,c_num_y,num_CVs,diam,w);
             u4_1 = u5 = 0.0;
             for(i = 0; i < num_CVs; i++)
             {
                 u4_1 += w[i]*coefy[i][0];
                 u5   += w[i]*coefy[i][1];
             }
             ////// use every other stencil
	     /*
             for(i = 0; i < num_CVs; i++)
             {
                 u4_1 += w[i+num_CVs]*coefyb[i][0];
                 u5   += w[i+num_CVs]*coefyb[i][1];
             }
	     */
             //////END: use every other stencil
             if(is_bad_steny[k] == YES && detect_extr == YES)
             {
                 u4_1 = 0.0; u5 = 0.0;
             }
             u4 = minmod((1+0.05)*minmod(u4_0,u4_1), 0.5*(u4_0 + u4_1)); // 0.05, 0.01
             // u4 = minmod2((1+0.05)*minmod2(u4_0,u4_1), 0.5*(u4_0 + u4_1)); // 0.05, 0.01
             switch(k)
             {
             case 0:
                 dg_Dens(st2)[3] = u3;
                 dg_Dens(st2)[4] = u4;
                 dg_Dens(st2)[5] = u5;
             break;
             case 1:
                 dg_Mom(st2)[0][3] = u3;
                 dg_Mom(st2)[0][4] = u4;
                 dg_Mom(st2)[0][5] = u5;
             break;
             case 2:
                 dg_Mom(st2)[1][3] = u3;
                 dg_Mom(st2)[1][4] = u4;
                 dg_Mom(st2)[1][5] = u5;
             break;
             case 3:
                 dg_Mom(st2)[2][3] = u3;
                 dg_Mom(st2)[2][4] = u4;
                 dg_Mom(st2)[2][5] = u5;
             break;
             case 4:
                 dg_Energy(st2)[3] = u3;
                 dg_Energy(st2)[4] = u4;
                 dg_Energy(st2)[5] = u5;
             break;
             case 5:
                 dg_B(st2)[0][3] = u3;
                 dg_B(st2)[0][4] = u4;
                 dg_B(st2)[0][5] = u5;
             break;
             case 6:
                 dg_B(st2)[1][3] = u3;
                 dg_B(st2)[1][4] = u4;
                 dg_B(st2)[1][5] = u5;
             break;
             case 7:
                 dg_B(st2)[2][3] = u3;
                 dg_B(st2)[2][4] = u4;
                 dg_B(st2)[2][5] = u5;
             break;
             }
         }

         // re-enforce divergence-free
         if(dg_B(st2)[0][3] >= 0.0)
             dg_B(st2)[0][3] = min(fabs(dg_B(st2)[0][3]), 0.5*fabs(dg_B(st2)[1][4]));
         else
             dg_B(st2)[0][3] = -min(fabs(dg_B(st2)[0][3]), 0.5*fabs(dg_B(st2)[1][4]));

         dg_B(st2)[1][4] = -2.0*dg_B(st2)[0][3];

         if(dg_B(st2)[0][4] >= 0.0)
             dg_B(st2)[0][4] = min(fabs(dg_B(st2)[0][4]), 2.0*fabs(dg_B(st2)[1][5]));
         else
             dg_B(st2)[0][4] = -min(fabs(dg_B(st2)[0][4]), 2.0*fabs(dg_B(st2)[1][5]));

         dg_B(st2)[1][5] = -0.5*dg_B(st2)[0][4];
}

// Reconstructed coeffs. are stored in limit_store for CV polynomials
// Reconstruct by partial neighboring cells
LOCAL void Subcell_limiting_2nd_degreeP3_PNC_grouping(
         TRI       *tri,
         TRI       *nbtri[3],
         Mid_soln  *midsoln,
         Limiting_store **store,
         int       rk_iter,
         int       detect_extr)
{
         Locstate st, nbst[30], st2, nbst2[30];
         int      cv_indx, num_CVs, i, k, j;
         int      on_SV_side, on_SV_side2, nbcv_indx[20], nbcv_side[20];
         TRI      *tris[20];
         float    uxave[4], nbuxave[30][4], uyave[4], nbuyave[30][4],
                  tmpuxave[4], tmpuyave[4], tmpnbuxave[30][4], tmpnbuyave[30][4];
         float    Rxave[4], nbRxave[30][4], Ryave[4], nbRyave[30][4];
         float    Lxave[4], nbLxave[30][8], Lyave[4], nbLyave[30][8],
                   tmpnbLxave[30][4], tmpnbLyave[30][4];
         int      dim = 2, indx, l;
         double    *cent, *nbcent[30], rside[3], rside2[3];
         // float    Ax[30][2][2], Ay[30][2][2], c_num_x[30], c_num_y[30];
         double   ***Ax, ***Ay, *c_num_x, *c_num_y;
         float    coefx[30][2], coefy[30][2];
         float    u3, u4, u5, u4_0, u4_1, avg1, avg2, wx[90], wy[90], arrya[90], arryb[90], w[90];
         double **Lmass_matrix = tri->Lmass_matrix;
         float    eps = 1.5, diam;
         // float    sqr_diam, cub_diam; // 0.5, 0.8 for all_cent
         int      idir, is_bad_stenx[30], is_bad_steny[30];
         int      debug = NO;
         float    area = fg_area(tri);
         float     Axb[30][2][2], Ayb[30][2][2], coefxb[30][2], coefyb[30][2];
         float    gnbcent[20][3], areatmp;
         float    gnbuxave[30][4], gnbuyave[30][4];
         float    gRxave[4], gnbRxave[30][4], gRyave[4], gnbRyave[30][4];

         if(rk_iter == RK_STEP)
         {
             st = tri->st;
             for(i = 0; i < 3; i++)
                 nbst[i] = nbtri[i]->st;
         }
         else
         {
             st = midsoln[tri->id].st[rk_iter];
             for(i = 0; i < 3; i++)
                 nbst[i] = midsoln[nbtri[i]->id].st[rk_iter];
         }
         st2 = midsoln[tri->id].st[0];
         // diam = fg_diam(tri);
         diam = 1.0;
         cent = fg_centroid(tri);

         // num_CVs = get_tri_edge_vert_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_near_neighbr_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_near_neighbr_CV_stencil_ver2(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_center_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         /////// currently used stencil for regrouping 
         num_CVs = get_tri_near_neighbr_overlap_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_liu_neighbr_overlap_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);

         // num_CVs = get_tri_CV_stencil_unstable_SV_P2_partition(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         for(i = 0; i < num_CVs; i++)
             nbcent[i] = tris[i]->CVcent[nbcv_indx[i]];

         u_average_indx(tri,st,1,uxave);
         u_average_indx(tri,st,2,uyave);
         R_degree3_term_averageP3(tri,st,tri->Lmass_matrix,0,1.0,Rxave);
         R_degree3_term_averageP3(tri,st,tri->Lmass_matrix,1,1.0,Ryave);
         for(k = 0; k < N_EQN; k++)
         {
             uxave[k] += Rxave[k];
             uyave[k] += Ryave[k];
         }
         R_degree3_term_averageP3(tri,st2,tri->Lmass_matrix,0,1.0,Rxave);
         R_degree3_term_averageP3(tri,st2,tri->Lmass_matrix,1,1.0,Ryave);

         if(rk_iter == RK_STEP)
         {
             // sten_n partial neighbor Cvs
             for(i = 0; i < num_CVs; i++)
             {
                 for(j = 0; j < MAX_N_COEF; j++)
                     mass_1st_row[0][j] = tris[i]->CVmass_matrix[nbcv_indx[i]][j];

                 CV_u_average_indx(tris[i],nbcv_indx[i],1,nbuxave[i]);
                 CV_u_average_indx(tris[i],nbcv_indx[i],2,nbuyave[i]);
                 CV_R_degree3_term_averageP3(tris[i],nbcv_indx[i],mass_1st_row,0,nbRxave[i]);
                 CV_R_degree3_term_averageP3(tris[i],nbcv_indx[i],mass_1st_row,1,nbRyave[i]);
                 for(k = 0; k < N_EQN; k++)
                 {
                     nbuxave[i][k] += nbRxave[i][k];
                     nbuyave[i][k] += nbRyave[i][k];
                 }
             }
         }
         else
         {
             /// rewrite ?????? 
             // sten_n partial neighbor Cvs
             for(i = 0; i < num_CVs; i++)
             {  
                 for(j = 0; j < MAX_N_COEF; j++)
                     mass_1st_row[0][j] = tris[i]->CVmass_matrix[nbcv_indx[i]][j];

                 CV_u_average_indx_from_store(tris[i],nbcv_indx[i],1,store[rk_iter],nbuxave[i]);
                 CV_u_average_indx_from_store(tris[i],nbcv_indx[i],2,store[rk_iter],nbuyave[i]);
                 CV_R_degree3_term_averageP3_store(tris[i],nbcv_indx[i],
                                   store[rk_iter][tris[i]->id],mass_1st_row,0,nbRxave[i]);
                 CV_R_degree3_term_averageP3_store(tris[i],nbcv_indx[i],
                                   store[rk_iter][tris[i]->id],mass_1st_row,1,nbRyave[i]);
                 for(k = 0; k < N_EQN; k++)
                 {
                     nbuxave[i][k] += nbRxave[i][k];
                     nbuyave[i][k] += nbRyave[i][k];
                 }
             }  
         }

         if(alltri_mass_1st_rows_grp[tri->id].mass_1st_rows[0] == NULL)
         {
             for(i = 0; i < 30; i++)
                 matrix(&(alltri_mass_1st_rows_grp[tri->id].mass_1st_rows[i]), 1, MAX_N_COEF,sizeof(double));
             for(i = 0; i < num_CVs; i++)
                 comp_CV_mass_matrix_1st_row(MAX_N_COEF,tris[i],dim,nbcv_indx[i],cent,
                          alltri_mass_1st_rows_grp[tri->id].mass_1st_rows[i]);
         }

         for(i = 0; i < num_CVs; i++)
         {
             // now precompute mass_1st_rows[i], see above
             // comp_CV_mass_matrix_1st_row(MAX_N_COEF,tris[i],dim,nbcv_indx[i],cent,mass_1st_rows[i]); 
             // now use saved data
             mass_1st_rows[i] = alltri_mass_1st_rows_grp[tri->id].mass_1st_rows[i];

             R_degree3_term_averageP3(tris[i],st2,mass_1st_rows[i],0,1.0,nbRxave[i]);
             R_degree3_term_averageP3(tris[i],st2,mass_1st_rows[i],1,1.0,nbRyave[i]);
         }

         for(i = 0; i < gnum_CVs; i++)
         {
              if(grouping_n[i] == 1)
              {
                  if(alltri_HR_sten_2_grp[tri->id].HR_sten_set_2nd == NO)
                  {
                      for(j = 0; j < dim; j++)
                          gnbcent[i][j] = nbcent[grouping_indx[i][0]][j];
                  }
                  for(k = 0; k < N_EQN; k++)
                  {
                      gnbuxave[i][k] = nbuxave[grouping_indx[i][0]][k];
                      gnbuyave[i][k] = nbuyave[grouping_indx[i][0]][k];
                      gnbRxave[i][k] = nbRxave[grouping_indx[i][0]][k];
                      gnbRyave[i][k] = nbRyave[grouping_indx[i][0]][k];
                  }
              }
              else if(grouping_n[i] == 2)
              {
                  area = tris[grouping_indx[i][0]]->CVmass_matrix[nbcv_indx[grouping_indx[i][0]]][0] +
                         tris[grouping_indx[i][1]]->CVmass_matrix[nbcv_indx[grouping_indx[i][1]]][0];
                  if(alltri_HR_sten_2_grp[tri->id].HR_sten_set_2nd == NO)
                  {
                      for(j = 0; j < dim; j++)
                      {
                          gnbcent[i][j] = (nbcent[grouping_indx[i][0]][j]*
                                         tris[grouping_indx[i][0]]->CVmass_matrix[nbcv_indx[grouping_indx[i][0]]][0] +
                                       nbcent[grouping_indx[i][1]][j]*
                                         tris[grouping_indx[i][1]]->CVmass_matrix[nbcv_indx[grouping_indx[i][1]]][0]
                                      )/area;
                      }
                  }
                  for(k = 0; k < N_EQN; k++)
                  {
                      gnbuxave[i][k] = (nbuxave[grouping_indx[i][0]][k]*
                                         tris[grouping_indx[i][0]]->CVmass_matrix[nbcv_indx[grouping_indx[i][0]]][0] +
                                        nbuxave[grouping_indx[i][1]][k]*
                                         tris[grouping_indx[i][1]]->CVmass_matrix[nbcv_indx[grouping_indx[i][1]]][0]
                                        )/area;
                      gnbuyave[i][k] = (nbuyave[grouping_indx[i][0]][k]*
                                         tris[grouping_indx[i][0]]->CVmass_matrix[nbcv_indx[grouping_indx[i][0]]][0] +
                                        nbuyave[grouping_indx[i][1]][k]*
                                         tris[grouping_indx[i][1]]->CVmass_matrix[nbcv_indx[grouping_indx[i][1]]][0]
                                        )/area;
                      gnbRxave[i][k] = (nbRxave[grouping_indx[i][0]][k]*
                                         tris[grouping_indx[i][0]]->CVmass_matrix[nbcv_indx[grouping_indx[i][0]]][0] +
                                        nbRxave[grouping_indx[i][1]][k]*
                                         tris[grouping_indx[i][1]]->CVmass_matrix[nbcv_indx[grouping_indx[i][1]]][0]
                                        )/area;
                      gnbRyave[i][k] = (nbRyave[grouping_indx[i][0]][k]*
                                         tris[grouping_indx[i][0]]->CVmass_matrix[nbcv_indx[grouping_indx[i][0]]][0] +
                                        nbRyave[grouping_indx[i][1]][k]*
                                         tris[grouping_indx[i][1]]->CVmass_matrix[nbcv_indx[grouping_indx[i][1]]][0]
                                        )/area;
                  }
              }
              else if(grouping_n[i] == 3)
              {
                  area = tris[grouping_indx[i][0]]->CVmass_matrix[nbcv_indx[grouping_indx[i][0]]][0] +
                         tris[grouping_indx[i][1]]->CVmass_matrix[nbcv_indx[grouping_indx[i][1]]][0] +
                         tris[grouping_indx[i][2]]->CVmass_matrix[nbcv_indx[grouping_indx[i][2]]][0];
                  if(alltri_HR_sten_2_grp[tri->id].HR_sten_set_2nd == NO)
                  {
                      for(j = 0; j < dim; j++)
                      {
                          gnbcent[i][j] = (nbcent[grouping_indx[i][0]][j]*
                                         tris[grouping_indx[i][0]]->CVmass_matrix[nbcv_indx[grouping_indx[i][0]]][0] +
                                       nbcent[grouping_indx[i][1]][j]*
                                         tris[grouping_indx[i][1]]->CVmass_matrix[nbcv_indx[grouping_indx[i][1]]][0] +
                                       nbcent[grouping_indx[i][2]][j]*
                                         tris[grouping_indx[i][2]]->CVmass_matrix[nbcv_indx[grouping_indx[i][2]]][0]
                                      )/area;
                      }
                  }
                  for(k = 0; k < N_EQN; k++)
                  {
                      gnbuxave[i][k] = (nbuxave[grouping_indx[i][0]][k]*
                                         tris[grouping_indx[i][0]]->CVmass_matrix[nbcv_indx[grouping_indx[i][0]]][0] +
                                        nbuxave[grouping_indx[i][1]][k]*
                                         tris[grouping_indx[i][1]]->CVmass_matrix[nbcv_indx[grouping_indx[i][1]]][0] +
                                        nbuxave[grouping_indx[i][2]][k]*
                                         tris[grouping_indx[i][2]]->CVmass_matrix[nbcv_indx[grouping_indx[i][2]]][0]
                                        )/area;
                      gnbuyave[i][k] = (nbuyave[grouping_indx[i][0]][k]*
                                         tris[grouping_indx[i][0]]->CVmass_matrix[nbcv_indx[grouping_indx[i][0]]][0] +
                                        nbuyave[grouping_indx[i][1]][k]*
                                         tris[grouping_indx[i][1]]->CVmass_matrix[nbcv_indx[grouping_indx[i][1]]][0] +
                                        nbuyave[grouping_indx[i][2]][k]*
                                         tris[grouping_indx[i][2]]->CVmass_matrix[nbcv_indx[grouping_indx[i][2]]][0]
                                        )/area;
                      gnbRxave[i][k] = (nbRxave[grouping_indx[i][0]][k]*
                                         tris[grouping_indx[i][0]]->CVmass_matrix[nbcv_indx[grouping_indx[i][0]]][0] +
                                        nbRxave[grouping_indx[i][1]][k]*
                                         tris[grouping_indx[i][1]]->CVmass_matrix[nbcv_indx[grouping_indx[i][1]]][0] +
                                        nbRxave[grouping_indx[i][2]][k]*
                                         tris[grouping_indx[i][2]]->CVmass_matrix[nbcv_indx[grouping_indx[i][2]]][0]
                                        )/area;
                      gnbRyave[i][k] = (nbRyave[grouping_indx[i][0]][k]*
                                         tris[grouping_indx[i][0]]->CVmass_matrix[nbcv_indx[grouping_indx[i][0]]][0] +
                                        nbRyave[grouping_indx[i][1]][k]*
                                         tris[grouping_indx[i][1]]->CVmass_matrix[nbcv_indx[grouping_indx[i][1]]][0] +
                                        nbRyave[grouping_indx[i][2]][k]*
                                         tris[grouping_indx[i][2]]->CVmass_matrix[nbcv_indx[grouping_indx[i][2]]][0]
                                        )/area;
                  }
              }
         }

         for(k = 0; k < N_EQN; k++)
         {
             Lxave[k] = uxave[k]-Rxave[k];
             Lyave[k] = uyave[k]-Ryave[k];
             for(i = 0; i < gnum_CVs; i++)
             {
                 nbLxave[i][k] = gnbuxave[i][k]-gnbRxave[i][k];
                 nbLyave[i][k] = gnbuyave[i][k]-gnbRyave[i][k];
             }
         }

         if(detect_extr == YES)
         {
             /*
             u_average_indx(tri,st,1,tmpuxave);
             u_average_indx(tri,st,2,tmpuyave);
             // R_degree3_term_averageP3(tri,st,tri->Lmass_matrix,0,Rxave);
             // R_degree3_term_averageP3(tri,st,tri->Lmass_matrix,1,Ryave);
             // for(k = 0; k < N_EQN; k++)
             // {
             //     tmpuxave[k] += Rxave[k];
             //     tmpuyave[k] += Ryave[k];
             // }

             for(i = 0; i < 3; i++)
             {
                 u_average_indx(nbtri[i],nbst[i],1,tmpnbuxave[i]);
                 u_average_indx(nbtri[i],nbst[i],2,tmpnbuyave[i]);
                 // R_degree3_term_averageP3(nbtri[i],nbst[i],nbtri[i]->Lmass_matrix,0,nbRxave[i]);
                 // R_degree3_term_averageP3(nbtri[i],nbst[i],nbtri[i]->Lmass_matrix,1,nbRyave[i]);
                 // for(k = 0; k < N_EQN; k++)
                 // {
                 //     tmpnbuxave[i][k] += nbRxave[i][k];
                 //     tmpnbuyave[i][k] += nbRyave[i][k];
                 // }
             }
             NEW_extrema_detec(tmpuxave,tmpnbuxave,3,is_bad_stenx);
             NEW_extrema_detec(tmpuyave,tmpnbuyave,3,is_bad_steny);
             */
             NEW_extrema_detec(Lxave,nbLxave,gnum_CVs,is_bad_stenx);
             NEW_extrema_detec(Lyave,nbLyave,gnum_CVs,is_bad_steny);
	     /*
             j = 0;
             for(i = 0; i < num_CVs; i++)
             {
                 if(i == 0 || i == 3 || i == 6)
                     continue;
                 for(k = 0; k < N_EQN; k++)
                 {
                     tmpnbLxave[j][k] = nbLxave[i][k];
                     tmpnbLyave[j][k] = nbLyave[i][k];
                 }
                 j++;
             }
             NEW_extrema_detec(Lxave,tmpnbLxave,num_CVs-3,is_bad_stenx);
             NEW_extrema_detec(Lyave,tmpnbLyave,num_CVs-3,is_bad_steny);
	     */
         }

         if(alltri_HR_sten_2_grp[tri->id].HR_sten_set_2nd == NO)
         {
             tri_array(&Ax,gnum_CVs,2,2,sizeof(double));
             tri_array(&Ay,gnum_CVs,2,2,sizeof(double));
             vector(&c_num_x,gnum_CVs,sizeof(double));
             vector(&c_num_y,gnum_CVs,sizeof(double));
             for(i = 0; i < gnum_CVs; i++)
             {
                 Ax[i][0][0] = 2.0*(gnbcent[i][0]-cent[0]);
                 Ax[i][0][1] =     (gnbcent[i][1]-cent[1]);
                 Ax[i][1][0] = 2.0*(gnbcent[(i+1)%gnum_CVs][0]-cent[0]);
                 Ax[i][1][1] =     (gnbcent[(i+1)%gnum_CVs][1]-cent[1]);
                 // c_num_x[i] = cond_num(Ax[i]); // c_num_x[i] = 1.0;
                 c_num_x[i] = cond_num2(Ax,i); // c_num_x[i] = 1.0;
                 Ay[i][0][0] =     (gnbcent[i][0]-cent[0]);
                 Ay[i][0][1] = 2.0*(gnbcent[i][1]-cent[1]);
                 Ay[i][1][0] =     (gnbcent[(i+1)%gnum_CVs][0]-cent[0]);
                 Ay[i][1][1] = 2.0*(gnbcent[(i+1)%gnum_CVs][1]-cent[1]);
                 // c_num_y[i] = cond_num(Ay[i]); // c_num_y[i] = 1.0;
                 c_num_y[i] = cond_num2(Ay,i); // c_num_y[i] = 1.0;
             }
             alltri_HR_sten_2_grp[tri->id].HR_sten_set_2nd = YES;
             alltri_HR_sten_2_grp[tri->id].Ax = Ax;
             alltri_HR_sten_2_grp[tri->id].Ay = Ay;
             alltri_HR_sten_2_grp[tri->id].c_num_x = c_num_x; alltri_HR_sten_2_grp[tri->id].c_num_y = c_num_y;
         }
         else
         {
             Ax = alltri_HR_sten_2_grp[tri->id].Ax;
             Ay = alltri_HR_sten_2_grp[tri->id].Ay;
             c_num_x = alltri_HR_sten_2_grp[tri->id].c_num_x; c_num_y = alltri_HR_sten_2_grp[tri->id].c_num_y;
         }

         for(k = 0; k < N_EQN; k++)
         {
             for(i = 0; i < gnum_CVs; i++)
             {
                 rside[0] = nbLxave[i][k] - Lxave[k];
                 rside[1] = nbLxave[(i+1)%gnum_CVs][k] - Lxave[k];
                 // comp_coef(Ax[i],rside,coefx[i]);
                 comp_coef2(Ax,rside,coefx[i],i);

                 rside[0] = nbLyave[i][k] - Lyave[k];
                 rside[1] = nbLyave[(i+1)%gnum_CVs][k] - Lyave[k];
                 // comp_coef(Ay[i],rside,coefy[i]);
                 comp_coef2(Ay,rside,coefy[i],i);
             }
             for(i = 0; i < gnum_CVs; i++)
             {
                 arrya[i] = coefx[i][0];
                 arryb[i] = coefx[i][1];
             }

             WENO_mod_on_2nd(arrya,arryb,c_num_x,gnum_CVs,diam,w);
             u3 = u4_0 = 0.0;
             for(i = 0; i < gnum_CVs; i++)
             {
                 u3   += w[i]*coefx[i][0];
                 u4_0 += w[i]*coefx[i][1];
             }

             if(is_bad_stenx[k] == YES && detect_extr == YES)
             {
                 u3 = 0.0; u4_0 = 0.0;
             }

             for(i = 0; i < gnum_CVs; i++)
             {
                 arrya[i] = coefy[i][0];
                 arryb[i] = coefy[i][1];
             }

             WENO_mod_on_2nd(arrya,arryb,c_num_y,gnum_CVs,diam,w);
             u4_1 = u5 = 0.0;
             for(i = 0; i < gnum_CVs; i++)
             {
                 u4_1 += w[i]*coefy[i][0];
                 u5   += w[i]*coefy[i][1];
             }
             if(is_bad_steny[k] == YES && detect_extr == YES)
             {
                 u4_1 = 0.0; u5 = 0.0;
             }
             u4 = minmod((1+0.05)*minmod(u4_0,u4_1), 0.5*(u4_0 + u4_1)); // 0.05, 0.01
             // u4 = minmod2((1+0.05)*minmod2(u4_0,u4_1), 0.5*(u4_0 + u4_1)); // 0.05, 0.01
             switch(k)
             {
             case 0:
                 // if(fabs(dg_Dens(st)[3]) < 1.0E-13) dg_Dens(st2)[3] = 0.0;
                 // else
                     dg_Dens(st2)[3] = u3;
                 // if(fabs(dg_Dens(st)[4]) < 1.0E-13)
                 //     dg_Dens(st2)[4] = 0.0;
                 // else
                     dg_Dens(st2)[4] = u4;
                 // if(fabs(dg_Dens(st)[5]) < 1.0E-13)
                 //     dg_Dens(st2)[5] = 0.0;
                 // else
                     dg_Dens(st2)[5] = u5;
             break;
             case 1:
                 // if(fabs(dg_Mom(st)[0][3]) < 1.0E-13)
                 //     dg_Mom(st2)[0][3] = 0.0;
                 // else
                     dg_Mom(st2)[0][3] = u3;
                 // if(fabs(dg_Mom(st)[0][4]) < 1.0E-13)
                 //     dg_Mom(st2)[0][4] = 0.0;
                 // else
                     dg_Mom(st2)[0][4] = u4;
                 // if(fabs(dg_Mom(st)[0][5]) < 1.0E-13)
                 //     dg_Mom(st2)[0][5] = 0.0;
                 // else
                     dg_Mom(st2)[0][5] = u5;
             break;
             case 2:
                 // if(fabs(dg_Mom(st)[1][3]) < 1.0E-13)
                 //     dg_Mom(st2)[1][3] = 0.0;
                 // else
                     dg_Mom(st2)[1][3] = u3;
                 // if(fabs(dg_Mom(st)[1][4]) < 1.0E-13)
                 //     dg_Mom(st2)[1][4] = 0.0;
                 // else
                     dg_Mom(st2)[1][4] = u4;
                 // if(fabs(dg_Mom(st)[1][5]) < 1.0E-13)
                 //     dg_Mom(st2)[1][5] = 0.0;
                 // else
                     dg_Mom(st2)[1][5] = u5;
             break;
             case 3:
                 // if(fabs(dg_Energy(st)[3]) < 1.0E-13)
                 //     dg_Energy(st2)[3] = 0.0;
                 // else
                     dg_Energy(st2)[3] = u3;
                 // if(fabs(dg_Energy(st)[4]) < 1.0E-13)
                 //     dg_Energy(st2)[4] = 0.0;
                 // else
                     dg_Energy(st2)[4] = u4;
                 // if(fabs(dg_Energy(st)[5]) < 1.0E-13)
                 //     dg_Energy(st2)[5] = 0.0;
                 // else
                     dg_Energy(st2)[5] = u5;
             break;
             }
         }
}

LOCAL void avg_st_on_tri_for_p2_poly(
         TRI       *tri,
         Locstate  st,
         double **Lmass_matrix,
         float     *ave)
{
         float      area;
         int        i;

         area = Lmass_matrix[0][0];

         for(i = 0; i < N_EQN; i++)
             ave[i] = 0.0;
         for(i = 0; i < 6; i++)
         // for(i = 0; i < MAX_N_COEF; i++)
             ave[0] += dg_Dens(st)[i]*Lmass_matrix[0][i];
         ave[0] /= area;
         if(N_EQN == 1) return;

         for(i = 0; i < 6; i++)
         // for(i = 0; i < MAX_N_COEF; i++)
         {
             ave[1] += dg_Mom(st)[0][i]*Lmass_matrix[0][i];
             ave[2] += dg_Mom(st)[1][i]*Lmass_matrix[0][i];
             ave[3] += dg_Energy(st)[i]*Lmass_matrix[0][i];
         }
         for(i = 1; i < N_EQN; i++)
             ave[i] /= area;

}

// Reconstructed coeffs. are stored in limit_store for CV polynomials
// Reconstruct by partial neighboring cells
LOCAL void Subcell_limiting_1st_degreeP3_PNC_2nd_degree_tech(
         TRI       *tri,
         TRI       *nbtri[3],
         Mid_soln  *midsoln,
         Limiting_store **store,
         int       rk_iter,
         int       comput_mat,
         int       detect_extr,
         int       check_quadr)
{
         Locstate st, nbst[3], st2, nbst2[3];
         float    uave[4], nbuave[30][4], tmpuave[4], tmpnbuave[30][4];
         float    Rave[4], nbRave[30][4];
         float    Lave[4], nbLave[30][8], tmpnbLave[20][4];
         int      i, j, dim = 2, indx, k, is_bad_sten[20], num_CVs;
         int      on_SV_side, on_SV_side2, nbcv_indx[20], nbcv_side[20], cv_indx;
         double    *cent, *nbcent[20], rside[3], rside2[3], least_soln1[3];
         // float    A[30][2][2], c_num[30];
         double   *c_num, ***A;
         float    coef[30][2];
         float    u0, u1, u2, avg1, avg2, w[20], arrya[20], arryb[20];
         double **Lmass_matrix = tri->Lmass_matrix, diam, eps = 0.05;
         float    A_edge[20][2], mid[20][2], sv_coef[20];
         int      debug = NO;
         TRI      *tris[30];
         float    qcrds[MAXD], *pcrds[4];
         static float tmpq[2] = {-0.5, 0.5};
         float    coefb[30][2], Ab[30][2][2];

         if(rk_iter == RK_STEP)
         {
             st = tri->st;
             for(i = 0; i < 3; i++)
                 nbst[i] = nbtri[i]->st;
         }
         else
         {
             st = midsoln[tri->id].st[rk_iter];
             for(i = 0; i < 3; i++)
                 nbst[i] = midsoln[nbtri[i]->id].st[rk_iter];
         }

         st2 = midsoln[tri->id].st[0];
         uave[0] = Dens(st);
         uave[1] = Mom(st)[0];
         uave[2] = Mom(st)[1];
         uave[3] = Energy(st);

         // avg_st_on_tri_for_p2_poly(tri,st,tri->Lmass_matrix,uave);

         R_degree2_above_term_averageP3(tri,st2,tri->Lmass_matrix,Rave);

         diam = fg_diam(tri);
         cent = fg_centroid(tri);

         // num_CVs = get_tri_edge_vert_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         num_CVs = get_tri_near_neighbr_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_near_neighbr_CV_stencil_ver2(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);

         // num_CVs = get_tri_center_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_near_neighbr_overlap_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs--;
         // num_CVs = get_tri_liu_neighbr_overlap_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_CV_stencil_unstable_SV_P2_partition_no_regrouping(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         for(i = 0; i < num_CVs; i++)
             nbcent[i] = tris[i]->CVcent[nbcv_indx[i]];

         if(alltri_HR_sten_2[tri->id].HR_sten_set_1st == NO)
         {
             tri_array(&A,num_CVs,2,2,sizeof(double));
             vector(&c_num,num_CVs,sizeof(double));

             for(i = 0; i < num_CVs; i++)
             {
                 A[i][0][0] = (nbcent[i][0]-cent[0]);
                 A[i][0][1] = (nbcent[i][1]-cent[1]);
                 A[i][1][0] = (nbcent[(i+1)%num_CVs][0]-cent[0]);
                 A[i][1][1] = (nbcent[(i+1)%num_CVs][1]-cent[1]);
                 // c_num[i] = cond_num(A[i]); // c_num[i] =1.0;
                 c_num[i] = cond_num2(A,i); // c_num[i] =1.0;
             }
             alltri_HR_sten_2[tri->id].HR_sten_set_1st = YES;
             alltri_HR_sten_2[tri->id].c_num = c_num;
             alltri_HR_sten_2[tri->id].A = A;
         }
         else
         {
             A = alltri_HR_sten_2[tri->id].A;
             c_num = alltri_HR_sten_2[tri->id].c_num;
         }

         if(comput_mat == YES)
         {
             //// Is this correct ???????, 2nd degree used
             //// comp_CV_mass_matrix_1st_row()
             for(i = 0; i < num_CVs; i++)
                 comp_mass_matrix_1st_row(MAX_N_COEF,tris[i],dim,fg_centroid(tri),mass_1st_rows[i]);
         }

         /////////////////////////////////////
         /////////////////////////////////////
         /////////////////////////////////////
         if(USE_2nd_degree_tech == YES)
         {
             if(NULL == cv_soln_2degree_tech1)
                 vector(&cv_soln_2degree_tech1,num_CVs,sizeof(CV_Soln));
             map_p2_part_of_p3_ploy(tris,nbcv_indx,nbcv_side,num_CVs,store,rk_iter,cent);
             //// now compute avg of this p2 polynomial
             for(i = 0; i < num_CVs; i++)
             {
                 // mass_1st_rows[i] = alltri_mass_1st_rows[tri->id].mass_1st_rows[i];
                 // avg_st_on_cv_for_mapped_p2_poly(tris[i],i,mass_1st_rows[i],nbuave[i]);
                 avg_st_on_cv_for_mapped_p2_poly(tris[i],i,alltri_mass_1st_rows[tri->id].mass_1st_rows[i],nbuave[i]);
                 // avg_st_for_mapped_p2(tris[i], cv_soln_2degree_tech1[i], nbcv_indx[i], nbuave[i]);

             }
             //// now compute avg of quadratic polynomial with updated coeff
             for(i = 0; i < num_CVs; i++)
             {
                 // mass_1st_rows[i] = alltri_mass_1st_rows[tri->id].mass_1st_rows[i];
                 // R_quadr_term_averageP3(tris[i],st2,mass_1st_rows[i],nbRave[i]);
                 R_quadr_term_averageP3(tris[i],st2,alltri_mass_1st_rows[tri->id].mass_1st_rows[i],nbRave[i]);
             }
         }
         else
         {
             if(rk_iter == RK_STEP)
             {
                 for(i = 0; i < num_CVs; i++)
                     avg_st_on_cv_ver2(tris[i], nbcv_indx[i], NULL, nbuave[i]);
             }
             else
             {
                 for(i = 0; i < num_CVs; i++)
                     avg_st_on_cv_from_store(tris[i], nbcv_indx[i], NULL, store[rk_iter], nbuave[i]);
             }

             for(i = 0; i < num_CVs; i++)
             {
                 // R_degree2_above_term_averageP3(tris[i],st2,mass_1st_rows[i],nbRave[i]);
                 R_degree2_above_term_averageP3(tris[i],st2,alltri_mass_1st_rows[tri->id].mass_1st_rows[i],nbRave[i]);
             }
         }

         for(k = 0; k < N_EQN; k++)
         {
             Lave[k] = uave[k]-Rave[k];
             for(i = 0; i < num_CVs; i++)
                 nbLave[i][k] = nbuave[i][k]-nbRave[i][k];
         }

         if(detect_extr == YES)
         {
             // NEW_extrema_detec(tmpuave,tmpnbuave,3,is_bad_sten);
             NEW_extrema_detec(Lave,nbLave,num_CVs,is_bad_sten);
         }

         for(k = 0; k < N_EQN; k++)
         {
             // linear part of polynomial
             for(i = 0; i < num_CVs; i++)
             {
                 rside[0] = nbLave[i][k] - Lave[k];
                 rside[1] = nbLave[(i+1)%num_CVs][k] - Lave[k];
                 // comp_coef(A[i],rside,coef[i]);
                 comp_coef2(A,rside,coef[i],i);
                 //// use every other stencil
                 /*
                 rside[0] = nbLave[i][k] - Lave[k];
                 rside[1] = nbLave[(i+2)%num_CVs][k] - Lave[k];
                 comp_coef(Ab[i],rside,coefb[i]);
                 */
                 //////END: use every other stencil
             }
             if(debugging("weno_w"))
             {
                 for(i = 0; i < num_CVs; i++)
                 {
                     arrya[i] = coef[i][0];
                     arryb[i] = coef[i][1];
                 }
                 WENO_mod_1(arrya, arryb, c_num, num_CVs, w);
                 u1 = u2 = 0.0;
                 for(i = 0; i < num_CVs; i++)
                 {
                     u1 += w[i]*coef[i][0];
                     u2 += w[i]*coef[i][1];
                 }
                 if(is_bad_sten[k] == YES && detect_extr == YES)
                 {
                     u1 = u2 = 0.0;
                 }
                 u0 = Lave[k];
                 /////////
                 /****
                 u1 = u2 = 0.0;
                 for(i = 0; i < num_CVs; i++)
                 {
                     u1 += coef[i][0];
                     u2 += coef[i][1];
                 }
                 u1 /= num_CVs;
                 u2 /= num_CVs;
                 u0 = Lave[k];
                 ****/
             }
             else if(debugging("cent_bias"))
             {
                 avg1 = 0.0; u1 = coef[0][0];
                 avg2 = 0.0; u2 = coef[0][1];
                 for(i = 0; i < num_CVs; i++)
                 {
                     avg1 += coef[i][0];
                     u1 = minmod(coef[i][0],u1);
                     avg2 += coef[i][1];
                     u2 = minmod(coef[i][1],u2);
                 }
                 avg1 /= num_CVs;
                 avg2 /= num_CVs;
                 u1 = minmod(((1+eps)*u1), avg1);
                 u2 = minmod(((1+eps)*u2), avg2);
                 u0 = Lave[k];
             }
             switch(k)
             {
             case 0:
                 dg_Dens(st2)[0] = u0;
                 dg_Dens(st2)[1] = u1;
                 dg_Dens(st2)[2] = u2;
             break;
             case 1:
                 dg_Mom(st2)[0][0] = u0;
                 dg_Mom(st2)[0][1] = u1;
                 dg_Mom(st2)[0][2] = u2;
             break;
             case 2:
                 dg_Mom(st2)[1][0] = u0;
                 dg_Mom(st2)[1][1] = u1;
                 dg_Mom(st2)[1][2] = u2;
             break;
             case 3:
                 dg_Energy(st2)[0] = u0;
                 dg_Energy(st2)[1] = u1;
                 dg_Energy(st2)[2] = u2;
             break;
             }
         }
         Dens(st2) = Dens(st);
         Mom(st2)[0] = Mom(st)[0];
         Mom(st2)[1] = Mom(st)[1];
         Energy(st2) = Energy(st);

         degree1_above_term_average_tmp(tri,st2,tri->Lmass_matrix,Rave);
         // printf("new u0 = %14.12g, u0 from avg = %14.12g\n", dg_Dens(st2)[0], Dens(st2) - Rave[0]);
         dg_Dens(st2)[0] = Dens(st2) - Rave[0];
         dg_Mom(st2)[0][0] = Mom(st2)[0] - Rave[1];
         dg_Mom(st2)[1][0] = Mom(st2)[1] - Rave[2];
         dg_Energy(st2)[0] = Energy(st2) - Rave[3];
         // clean_up(0);

         if(check_quadr == YES && N_EQN == 4 && YES == unphysical_st_at_quadrature(tri, st2))
             tri->redo_limiting = YES;
         else
             tri->redo_limiting = NO;
}

LOCAL void degree1_above_term_average_tmp(
         TRI       *tri,
         Locstate  st,
         double **Lmass_matrix,
         float     *ave)
{        
         float      area;
         int        i;
         
         area = Lmass_matrix[0][0];
         
         for(i = 0; i < N_EQN; i++)
             ave[i] = 0.0; 
         for(i = 1; i < MAX_N_COEF; i++)
             ave[0] += dg_Dens(st)[i]*Lmass_matrix[0][i]; 
         ave[0] /= area;
         if(N_EQN == 1) return;
         
         for(i = 1; i < MAX_N_COEF; i++)
         {
             ave[1] += dg_Mom(st)[0][i]*Lmass_matrix[0][i];
             ave[2] += dg_Mom(st)[1][i]*Lmass_matrix[0][i];
             ave[3] += dg_Energy(st)[i]*Lmass_matrix[0][i];
         }
         for(i = 1; i < N_EQN; i++)
             ave[i] /= area;

}

// Reconstructed coeffs. are stored in limit_store for CV polynomials
// Reconstruct by partial neighboring cells
LOCAL void Subcell_limiting_1st_degreeP3_PNC_multi_pt_limit(
         TRI       *tri,
         TRI       *nbtri[3],
         Mid_soln  *midsoln,
         Limiting_store **store,
         int       rk_iter,
         int       comput_mat,
         int       detect_extr,
         int       check_quadr)
{
         Locstate st, nbst[3], st2, nbst2[3];
         float    uave[4], nbuave[30][4], tmpuave[4], tmpnbuave[30][4],
                  nbuave2[30][4], nbuave3[30][4], nbuave4[30][4], nbuaveipt[30][10][4] ; // [cv id][#pts][ state of conservative variable]
         float    Rave[4], nbRave[30][4], nbRave2[30][4], nbRave3[30][4], nbRave4[30][4], nbRaveipt[30][10][4];
         float    Lave[4], nbLave[30][8], tmpnbLave[20][4];
         int      i, j, dim = 2, indx, k, is_bad_sten[20], num_CVs, ipt;
         int      on_SV_side, on_SV_side2, nbcv_indx[20], nbcv_side[20], cv_indx;
         double   *cent, *nbcent[20], rside[3], rside2[3], least_soln1[3], tmpnbcent[30][MAXD], midpt[3][3], newcent[30][MAXD];
         // float    A[30][2][2], c_num[30];
         double   *c_num, ***A;
         float    coef[30][2];
         float    u0, u1, u2, avg1, avg2, w[20], arrya[20], arryb[20];
         double **Lmass_matrix = tri->Lmass_matrix, diam, eps = 0.05;
         float    A_edge[20][2], mid[20][2], sv_coef[20];
         int      debug = NO, N_PTS = 4;
         TRI      *tris[30];
         float    qcrds[MAXD], *pcrds[4], qcrds_cv[30][5][3]; //[#cv][#pt][crds]
         static float tmpq[2] = {-0.5, 0.5};
         float    coefb[30][2], Ab[30][2][2];

         if(rk_iter == RK_STEP)
         {
             st = tri->st;
             for(i = 0; i < 3; i++)
                 nbst[i] = nbtri[i]->st;
         }
         else
         {
             st = midsoln[tri->id].st[rk_iter];
             for(i = 0; i < 3; i++)
                 nbst[i] = midsoln[nbtri[i]->id].st[rk_iter];
         }

         st2 = midsoln[tri->id].st[0];
         uave[0] = Dens(st);
         uave[1] = Mom(st)[0];
         uave[2] = Mom(st)[1];
         uave[3] = Energy(st);
         R_degree2_above_term_averageP3(tri,st2,tri->Lmass_matrix,Rave);

         diam = fg_diam(tri);
         cent = fg_centroid(tri);

         // num_CVs = get_tri_edge_vert_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         num_CVs = get_tri_near_neighbr_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_near_neighbr_CV_stencil_ver2(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);

         // num_CVs = get_tri_center_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_near_neighbr_overlap_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs--;
         // num_CVs = get_tri_liu_neighbr_overlap_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_CV_stencil_unstable_SV_P2_partition_no_regrouping(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         for(i = 0; i < num_CVs; i++)
             nbcent[i] = tris[i]->CVcent[nbcv_indx[i]];

         /// find crds of points where we evaulate function values.
         ////// 05172010, comput center of partial partial cells
         for(i = 0; i < num_CVs; i++)
         {
             for(j = 0; j < 3; j++)
                 pcrds[j] = Coords( Point_of_tri(tris[i])[j] );
             for(j = 0; j < 3; j++)
             {
                 for(k = 0; k < dim; k++)
                     midpt[j][k] = (pcrds[(j+1)%3][k] + pcrds[j][k])*0.5;
             }

             if(nbcv_side[i] == nbcv_indx[i])
                 comput_tri_cent(2, pcrds[nbcv_side[i]], midpt[nbcv_side[i]], nbcent[i], newcent[i]);
             else
                 comput_tri_cent(2, pcrds[(nbcv_side[i]+1)%3], midpt[nbcv_side[i]], nbcent[i], newcent[i]);
         }
         ////// END: 05172010, comput center of partial partial cells
         for(i = 0; i < num_CVs; i++)
         {
             for(j = 0; j < 3; j++)
                 pcrds[j] = Coords( Point_of_tri(tris[i])[j] );
             for(j = 0; j < 3; j++)
             {
                 for(k = 0; k < dim; k++)
                     midpt[j][k] = (pcrds[(j+1)%3][k] + pcrds[j][k])*0.5;
             }
             /****
             //// point 1
             for(j = 0; j < dim; j++)
                qcrds_cv[i][0][j] = (pcrds[(nbcv_side[i]+1)%3][j] + pcrds[nbcv_side[i]][j])/2.0;
             //// point 2
             if(nbcv_side[i] == nbcv_indx[i])
             {
                 for(j = 0; j < dim; j++)
                    qcrds_cv[i][1][j] = pcrds[nbcv_side[i]][j];
             }
             else
             {
                 for(j = 0; j < dim; j++)
                    qcrds_cv[i][1][j] = pcrds[(nbcv_side[i]+1)%3][j];
             }
             //// point 3
             for(j = 0; j < dim; j++)
                 qcrds_cv[i][2][j] = nbcent[i][j];
             //// add point 4, 05172010
             for(j = 0; j < dim; j++)
                 qcrds_cv[i][3][j] = newcent[i][j]; /// center of partial partial cell
             //// add point 5, use mid point between vertex 1 and vertex 2 
             for(j = 0; j < dim; j++)
                 qcrds_cv[i][4][j] = (qcrds_cv[i][0][j] + qcrds_cv[i][1][j])/2.0;
             ****/
             /// same points are also used for quadratical terms
             /// use vertex ---- 1
             if(nbcv_side[i] == nbcv_indx[i])
             {
                 for(j = 0; j < dim; j++)
                     qcrds_cv[i][0][j] = pcrds[nbcv_side[i]][j];
             }
             else
             {
                 for(j = 0; j < dim; j++)
                    qcrds_cv[i][0][j] = pcrds[(nbcv_side[i]+1)%3][j];
             }

             /// use mid point on adjacent edge midpt[nbcv_side[i]] ---- 2
             for(j = 0; j < dim; j++)
                 qcrds_cv[i][1][j] = midpt[nbcv_side[i]][j];

             /// use mid point on the opposite side   ---- 3
             if(nbcv_side[i] == nbcv_indx[i])
             {
                  //// midpt[(nbcv_side[i]+2)%3];
                 for(j = 0; j < dim; j++)
                     qcrds_cv[i][2][j] = midpt[(nbcv_side[i]+2)%3][j];
             }
             else
             {
                  //// midpt[(nbcv_side[i]+1)%3];
                 for(j = 0; j < dim; j++)
                     qcrds_cv[i][2][j] = midpt[(nbcv_side[i]+1)%3][j];
             }

             /// use CV centroid ---- 4
             for(j = 0; j < dim; j++)
                 qcrds_cv[i][3][j] = nbcent[i][j];

             //// use mid point between vertex 1 and vertex 2 ---- 5
             //// for(j = 0; j < dim; j++)
             ////     qcrds_cv[i][4][j] = (qcrds_cv[i][0][j] + qcrds_cv[i][1][j])/2.0;

             //// use center of partial partial cell ----- 5
             comput_tri_cent(2, qcrds_cv[i][0], qcrds_cv[i][1], qcrds_cv[i][3], qcrds_cv[i][4]);


             newcent[i][0] = newcent[i][1] = 0.0;
             for(ipt = 0; ipt < N_PTS; ipt++)
             {
                 newcent[i][0] += qcrds_cv[i][ipt][0];
                 newcent[i][1] += qcrds_cv[i][ipt][1];
             }
             newcent[i][0] /= N_PTS;
             newcent[i][1] /= N_PTS;
         }

         if(alltri_HR_sten_2[tri->id].HR_sten_set_1st_pt == NO)
         {
             tri_array(&A,num_CVs,2,2,sizeof(double));
             vector(&c_num,num_CVs,sizeof(double));

             for(i = 0; i < num_CVs; i++)
             {
                 /***
                 A[i][0][0] = (tmpnbcent[i][0]-cent[0]);
                 A[i][0][1] = (tmpnbcent[i][1]-cent[1]);
                 A[i][1][0] = (tmpnbcent[(i+1)%num_CVs][0]-cent[0]);
                 A[i][1][1] = (tmpnbcent[(i+1)%num_CVs][1]-cent[1]);
                 ***/
                 A[i][0][0] = (newcent[i][0]-cent[0]);
                 A[i][0][1] = (newcent[i][1]-cent[1]);
                 A[i][1][0] = (newcent[(i+1)%num_CVs][0]-cent[0]);
                 A[i][1][1] = (newcent[(i+1)%num_CVs][1]-cent[1]);

                 c_num[i] = cond_num2(A,i); // c_num[i] =1.0;
             }

             alltri_HR_sten_2[tri->id].HR_sten_set_1st_pt = YES;
             alltri_HR_sten_2[tri->id].c_num_pt = c_num;
             alltri_HR_sten_2[tri->id].A_pt = A;
         }
         else
         {
             A = alltri_HR_sten_2[tri->id].A_pt;
             c_num = alltri_HR_sten_2[tri->id].c_num_pt;
         }

         if(comput_mat == YES)
         {
             //// Is this correct ???????, 2nd degree used
             //// comp_CV_mass_matrix_1st_row()
             for(i = 0; i < num_CVs; i++)
                 comp_mass_matrix_1st_row(MAX_N_COEF,tris[i],dim,fg_centroid(tri),mass_1st_rows[i]);
         }

         if(rk_iter == RK_STEP)
         {
             for(i = 0; i < num_CVs; i++)
             {
                 for(ipt = 0; ipt < N_PTS; ipt++)
                     con_u_at_CV_pt(tris[i], nbcv_indx[i], qcrds_cv[i][ipt], tris[i]->CVcent[nbcv_indx[i]], nbuaveipt[i][ipt]);
             }
         }
         else
         {
             for(i = 0; i < num_CVs; i++)
             {
                 for(ipt = 0; ipt < N_PTS; ipt++)
                     con_u_at_CV_pt_from_store(tris[i], nbcv_indx[i], qcrds_cv[i][ipt], tris[i]->CVcent[nbcv_indx[i]],
                         store[rk_iter], nbuaveipt[i][ipt]);
             }
         }

         //// POINT value of remainder
         for(i = 0; i < num_CVs; i++)
         {
             for(ipt = 0; ipt < N_PTS; ipt++)
                 R_degree2_above_term_P3_pt(tris[i], st2, NULL, qcrds_cv[i][ipt], cent, nbRaveipt[i][ipt]);
         }

         for(k = 0; k < N_EQN; k++)
         {
             Lave[k] = uave[k]-Rave[k];
             for(i = 0; i < num_CVs; i++)
             {
                 // nbLave[i][k] = ((nbuave[i][k]-nbRave[i][k]) + (nbuave2[i][k]-nbRave2[i][k]))*0.5;
                 //// use 3 point,
                 //// nbLave[i][k] = ((nbuave[i][k]-nbRave[i][k]) + (nbuave2[i][k]-nbRave2[i][k]) + (nbuave3[i][k]-nbRave3[i][k]))/3.0;
                 //// use 4 points, 05172010
                 /***
                 nbLave[i][k] = ((nbuave[i][k]-nbRave[i][k]) + (nbuave2[i][k]-nbRave2[i][k]) + 
                                 (nbuave3[i][k]-nbRave3[i][k]) + (nbuave4[i][k]-nbRave4[i][k]))/4.0;
                 ***/
                 nbLave[i][k] = 0.0;
                 for(ipt = 0; ipt < N_PTS; ipt++)
                     nbLave[i][k] += (nbuaveipt[i][ipt][k]-nbRaveipt[i][ipt][k]);
                 nbLave[i][k] /= N_PTS;
             }
         }

         if(detect_extr == YES)
         {
             // NEW_extrema_detec(tmpuave,tmpnbuave,3,is_bad_sten);
             NEW_extrema_detec(Lave,nbLave,num_CVs,is_bad_sten);
         }

         for(k = 0; k < N_EQN; k++)
         {
             // linear part of polynomial
             for(i = 0; i < num_CVs; i++)
             {
                 rside[0] = nbLave[i][k] - Lave[k];
                 rside[1] = nbLave[(i+1)%num_CVs][k] - Lave[k];
                 // comp_coef(A[i],rside,coef[i]);
                 comp_coef2(A,rside,coef[i],i);
             }
             if(debugging("weno_w"))
             {
                 for(i = 0; i < num_CVs; i++)
                 {
                     arrya[i] = coef[i][0];
                     arryb[i] = coef[i][1];
                 }
                 //// use every other stencil
                 /*
                 for(i = 0; i < num_CVs; i++)
                 {
                     arrya[i+num_CVs] = coefb[i][0];
                     arryb[i+num_CVs] = coefb[i][1];
                 }
                 */
                 //////END: use every other stencil
                 /***
                 // TMP
                 c_num[num_CVs] = 1.0;
                 if(k == 0)
                 {
                     coef[num_CVs][0] = arrya[num_CVs] = dg_Dens(st)[1];
                     coef[num_CVs][1] = arryb[num_CVs] = dg_Dens(st)[2];
                 }
                 else if(k == 1)
                 {
                     coef[num_CVs][0] = arrya[num_CVs] = dg_Mom(st)[0][1];
                     coef[num_CVs][1] = arryb[num_CVs] = dg_Mom(st)[0][2];
                 }
                 else if(k == 2)
                 {
                     coef[num_CVs][0] = arrya[num_CVs] = dg_Mom(st)[1][1];
                     coef[num_CVs][1] = arryb[num_CVs] = dg_Mom(st)[1][2];
                 }
                 else
                 {
                     coef[num_CVs][0] = arrya[num_CVs] = dg_Energy(st)[1];
                     coef[num_CVs][1] = arryb[num_CVs] = dg_Energy(st)[2];
                 }
                 // END TMP
                 **/
                 WENO_mod_1(arrya, arryb, c_num, num_CVs, w);
                 u1 = u2 = 0.0;
                 for(i = 0; i < num_CVs; i++)
                 {
                     u1 += w[i]*coef[i][0];
                     u2 += w[i]*coef[i][1];
                 }
                 /// use every other stencil
                 /**
                 for(i = 0; i < num_CVs; i++)
                 {
                     u1 += w[i+num_CVs]*coefb[i][0];
                     u2 += w[i+num_CVs]*coefb[i][1];
                 }
                 **/
                 //////END: use every other stencil
                 if(is_bad_sten[k] == YES && detect_extr == YES)
                 {
                     u1 = u2 = 0.0;
                 }
                 u0 = Lave[k];
             }
             else if(debugging("cent_bias"))
             {
                 avg1 = 0.0; u1 = coef[0][0];
                 avg2 = 0.0; u2 = coef[0][1];
                 for(i = 0; i < num_CVs; i++)
                 {
                     avg1 += coef[i][0];
                     u1 = minmod(coef[i][0],u1);
                     avg2 += coef[i][1];
                     u2 = minmod(coef[i][1],u2);
                 }
                 avg1 /= num_CVs;
                 avg2 /= num_CVs;
                 u1 = minmod(((1+eps)*u1), avg1);
                 u2 = minmod(((1+eps)*u2), avg2);
                 u0 = Lave[k];
             }
             switch(k)
             {
             case 0:
                 dg_Dens(st2)[0] = u0;
                 dg_Dens(st2)[1] = u1;
                 dg_Dens(st2)[2] = u2;
             break;
             case 1:
                 dg_Mom(st2)[0][0] = u0;
                 dg_Mom(st2)[0][1] = u1;
                 dg_Mom(st2)[0][2] = u2;
             break;
             case 2:
                 dg_Mom(st2)[1][0] = u0;
                 dg_Mom(st2)[1][1] = u1;
                 dg_Mom(st2)[1][2] = u2;
             break;
             case 3:
                 dg_Energy(st2)[0] = u0;
                 dg_Energy(st2)[1] = u1;
                 dg_Energy(st2)[2] = u2;
             break;
             }
         }
         Dens(st2) = Dens(st);
         Mom(st2)[0] = Mom(st)[0];
         Mom(st2)[1] = Mom(st)[1];
         Energy(st2) = Energy(st);

         if(check_quadr == YES && N_EQN == 4 && YES == unphysical_st_at_quadrature(tri, st2))
             tri->redo_limiting = YES;
         else
             tri->redo_limiting = NO;
}


// Reconstructed coeffs. are stored in limit_store for CV polynomials
// Reconstruct by partial neighboring cells
LOCAL void Subcell_limiting_1st_degreeP3_PNC_pt_limit(
         TRI       *tri,
         TRI       *nbtri[3],
         Mid_soln  *midsoln,
         Limiting_store **store,
         int       rk_iter,
         int       comput_mat,
         int       detect_extr,
         int       check_quadr)
{
         Locstate st, nbst[3], st2, nbst2[3];
         float    uave[4], nbuave[30][4], tmpuave[4], tmpnbuave[30][4];
         float    Rave[4], nbRave[30][4];
         float    Lave[4], nbLave[30][8], tmpnbLave[20][4];
         int      i, j, dim = 2, indx, k, is_bad_sten[20], num_CVs;
         int      on_SV_side, on_SV_side2, nbcv_indx[20], nbcv_side[20], cv_indx;
         double    *cent, *nbcent[20], rside[3], rside2[3], least_soln1[3], tmpnbcent[30][MAXD];
         // float    A[30][2][2], c_num[30];
         double   *c_num, ***A;
         float    coef[30][2];
         float    u0, u1, u2, avg1, avg2, w[20], arrya[20], arryb[20];
         double **Lmass_matrix = tri->Lmass_matrix, diam, eps = 0.05;
         float    A_edge[20][2], mid[20][2], sv_coef[20];
         int      debug = NO;
         TRI      *tris[30];
         float    qcrds[MAXD], *pcrds[4], midpt[MAXD], tmp0[MAXD], tmp1[MAXD];
         static float tmpq[2] = {-0.5, 0.5};
         float    coefb[30][2], Ab[30][2][2];

         if(rk_iter == RK_STEP)
         {
             st = tri->st;
             for(i = 0; i < 3; i++)
                 nbst[i] = nbtri[i]->st;
         }
         else
         {
             st = midsoln[tri->id].st[rk_iter];
             for(i = 0; i < 3; i++)
                 nbst[i] = midsoln[nbtri[i]->id].st[rk_iter];
         }

         st2 = midsoln[tri->id].st[0];
         uave[0] = Dens(st);
         uave[1] = Mom(st)[0];
         uave[2] = Mom(st)[1];
         uave[3] = Energy(st);
         R_degree2_above_term_averageP3(tri,st2,tri->Lmass_matrix,Rave);

         diam = fg_diam(tri);
         cent = fg_centroid(tri);

         // num_CVs = get_tri_edge_vert_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         num_CVs = get_tri_near_neighbr_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_near_neighbr_CV_stencil_ver2(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);

         // num_CVs = get_tri_center_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_near_neighbr_overlap_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs--;
         // num_CVs = get_tri_liu_neighbr_overlap_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_CV_stencil_unstable_SV_P2_partition_no_regrouping(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // for(i = 0; i < num_CVs; i++)
         //     nbcent[i] = tris[i]->CVcent[nbcv_indx[i]];

         if(alltri_HR_sten_2[tri->id].HR_sten_set_1st_pt == NO)
         {
             tri_array(&A,num_CVs,2,2,sizeof(double));
             vector(&c_num,num_CVs,sizeof(double));

             for(i = 0; i < num_CVs; i++)
             {
                 ///// comput mid of on the CV
                 for(j = 0; j < 3; j++)
                     pcrds[j] = Coords( Point_of_tri(tris[i])[j] );
                 if(nbcv_side[i] == nbcv_indx[i])
                 {
                     for(j = 0; j < dim; j++)
                        qcrds[j] = (pcrds[(nbcv_side[i]+1)%3][j] + pcrds[nbcv_side[i]][j])/2.0 +
                           (pcrds[(nbcv_side[i]+1)%3][j] - pcrds[nbcv_side[i]][j])/2.0*tmpq[0];
                 }
                 else
                 {
                     for(j = 0; j < dim; j++)
                        qcrds[j] = (pcrds[(nbcv_side[i]+1)%3][j] + pcrds[nbcv_side[i]][j])/2.0 +
                           (pcrds[(nbcv_side[i]+1)%3][j] - pcrds[nbcv_side[i]][j])/2.0*tmpq[1];
                 }
                 /***
                 for(j = 0; j < dim; j++)
                     midpt[j] = (pcrds[(nbcv_side[i]+1)%3][j] + pcrds[nbcv_side[i]][j])/2.0;

                 for(j = 0; j < dim; j++)
                 {
                     tmp0[j] = (pcrds[(nbcv_side[i]+1)%3][j] + midpt[j])/2.0;
                     tmp1[j] = (pcrds[nbcv_side[i]][j] + midpt[j])/2.0;
                 }
                 printf("tmp0[%g %g], tmp1[%g %g], qcrds[%g %g], side[%d], cv[%d]\n",
                       tmp0[0], tmp0[1], tmp1[0], tmp1[1], qcrds[0], qcrds[1], nbcv_side[i], nbcv_indx[i]);
                 if(i == 1)
                 {
                     printf("EXIT by test\n");
                     exit(0);
                 }
                 ***/
                 tmpnbcent[i][0] =  qcrds[0];
                 tmpnbcent[i][1] =  qcrds[1];
             }

             for(i = 0; i < num_CVs; i++)
             {
                 A[i][0][0] = (tmpnbcent[i][0]-cent[0]);
                 A[i][0][1] = (tmpnbcent[i][1]-cent[1]);
                 A[i][1][0] = (tmpnbcent[(i+1)%num_CVs][0]-cent[0]);
                 A[i][1][1] = (tmpnbcent[(i+1)%num_CVs][1]-cent[1]);
                 // c_num[i] = cond_num(A[i]); // c_num[i] =1.0;
                 c_num[i] = cond_num2(A,i); // c_num[i] =1.0;
             }

             alltri_HR_sten_2[tri->id].HR_sten_set_1st_pt = YES;
             alltri_HR_sten_2[tri->id].c_num_pt = c_num;
             alltri_HR_sten_2[tri->id].A_pt = A;
         }
         else
         {
             A = alltri_HR_sten_2[tri->id].A_pt;
             c_num = alltri_HR_sten_2[tri->id].c_num_pt;
         }

         if(comput_mat == YES)
         {
             //// Is this correct ???????, 2nd degree used
             //// comp_CV_mass_matrix_1st_row()
             for(i = 0; i < num_CVs; i++)
                 comp_mass_matrix_1st_row(MAX_N_COEF,tris[i],dim,fg_centroid(tri),mass_1st_rows[i]);
         }

         if(rk_iter == RK_STEP)
         {
             for(i = 0; i < num_CVs; i++)
             {
                 //// POINT value
                 for(j = 0; j < 3; j++)
                     pcrds[j] = Coords( Point_of_tri(tris[i])[j] );
                 if(nbcv_side[i] == nbcv_indx[i])
                 {
                     for(j = 0; j < dim; j++)
                        qcrds[j] = (pcrds[(nbcv_side[i]+1)%3][j] + pcrds[nbcv_side[i]][j])/2.0 +
                           (pcrds[(nbcv_side[i]+1)%3][j] - pcrds[nbcv_side[i]][j])/2.0*tmpq[0];
                 }
                 else
                 {
                     for(j = 0; j < dim; j++)
                        qcrds[j] = (pcrds[(nbcv_side[i]+1)%3][j] + pcrds[nbcv_side[i]][j])/2.0 +
                           (pcrds[(nbcv_side[i]+1)%3][j] - pcrds[nbcv_side[i]][j])/2.0*tmpq[1];
                 }
                 con_u_at_CV_pt(tris[i], nbcv_indx[i], qcrds, tris[i]->CVcent[nbcv_indx[i]], nbuave[i]);
                 //// END POINT value
                 //// avg_st_on_cv_ver2(tris[i], nbcv_indx[i], NULL, nbuave[i]);
             }
         }
         else
         {
             for(i = 0; i < num_CVs; i++)
             {
                 //// POINT value
                 for(j = 0; j < 3; j++)
                     pcrds[j] = Coords( Point_of_tri(tris[i])[j] );
                 if(nbcv_side[i] == nbcv_indx[i])
                 {
                     for(j = 0; j < dim; j++)
                        qcrds[j] = (pcrds[(nbcv_side[i]+1)%3][j] + pcrds[nbcv_side[i]][j])/2.0 +
                           (pcrds[(nbcv_side[i]+1)%3][j] - pcrds[nbcv_side[i]][j])/2.0*tmpq[0];
                 }
                 else
                 {
                     for(j = 0; j < dim; j++)
                        qcrds[j] = (pcrds[(nbcv_side[i]+1)%3][j] + pcrds[nbcv_side[i]][j])/2.0 +
                           (pcrds[(nbcv_side[i]+1)%3][j] - pcrds[nbcv_side[i]][j])/2.0*tmpq[1];
                 }
                 con_u_at_CV_pt_from_store(tris[i], nbcv_indx[i], qcrds, tris[i]->CVcent[nbcv_indx[i]],
                         store[rk_iter], nbuave[i]);
                 //// END POINT value
                 //// avg_st_on_cv_from_store(tris[i], nbcv_indx[i], NULL, store[rk_iter], nbuave[i]);
             }
         }

         //// POINT value of remainder
         for(i = 0; i < num_CVs; i++)
         {
             for(j = 0; j < 3; j++)
                 pcrds[j] = Coords( Point_of_tri(tris[i])[j] );
             if(nbcv_side[i] == nbcv_indx[i])
             {
                 for(j = 0; j < dim; j++)
                    qcrds[j] = (pcrds[(nbcv_side[i]+1)%3][j] + pcrds[nbcv_side[i]][j])/2.0 +
                       (pcrds[(nbcv_side[i]+1)%3][j] - pcrds[nbcv_side[i]][j])/2.0*tmpq[0];
             }
             else
             {
                 for(j = 0; j < dim; j++)
                    qcrds[j] = (pcrds[(nbcv_side[i]+1)%3][j] + pcrds[nbcv_side[i]][j])/2.0 +
                       (pcrds[(nbcv_side[i]+1)%3][j] - pcrds[nbcv_side[i]][j])/2.0*tmpq[1];
             }
             R_degree2_above_term_P3_pt(tris[i], st2, NULL, qcrds, cent, nbRave[i]);
         }

         for(k = 0; k < N_EQN; k++)
         {
             Lave[k] = uave[k]-Rave[k];
             for(i = 0; i < num_CVs; i++)
                 nbLave[i][k] = nbuave[i][k]-nbRave[i][k];
         }

         if(detect_extr == YES)
             NEW_extrema_detec(Lave,nbLave,num_CVs,is_bad_sten);

         for(k = 0; k < N_EQN; k++)
         {
             // linear part of polynomial
             for(i = 0; i < num_CVs; i++)
             {
                 rside[0] = nbLave[i][k] - Lave[k];
                 rside[1] = nbLave[(i+1)%num_CVs][k] - Lave[k];
                 // comp_coef(A[i],rside,coef[i]);
                 comp_coef2(A,rside,coef[i],i);
             }
             if(debugging("weno_w"))
             {
                 for(i = 0; i < num_CVs; i++)
                 {
                     arrya[i] = coef[i][0];
                     arryb[i] = coef[i][1];
                 }
                 //// use every other stencil
                 /*
                 for(i = 0; i < num_CVs; i++)
                 {
                     arrya[i+num_CVs] = coefb[i][0];
                     arryb[i+num_CVs] = coefb[i][1];
                 }
                 */
                 //////END: use every other stencil
                 /***
                 // TMP
                 c_num[num_CVs] = 1.0;
                 if(k == 0)
                 {
                     coef[num_CVs][0] = arrya[num_CVs] = dg_Dens(st)[1];
                     coef[num_CVs][1] = arryb[num_CVs] = dg_Dens(st)[2];
                 }
                 else if(k == 1)
                 {
                     coef[num_CVs][0] = arrya[num_CVs] = dg_Mom(st)[0][1];
                     coef[num_CVs][1] = arryb[num_CVs] = dg_Mom(st)[0][2];
                 }
                 else if(k == 2)
                 {
                     coef[num_CVs][0] = arrya[num_CVs] = dg_Mom(st)[1][1];
                     coef[num_CVs][1] = arryb[num_CVs] = dg_Mom(st)[1][2];
                 }
                 else
                 {
                     coef[num_CVs][0] = arrya[num_CVs] = dg_Energy(st)[1];
                     coef[num_CVs][1] = arryb[num_CVs] = dg_Energy(st)[2];
                 }
                 // END TMP
                 **/
                 WENO_mod_1(arrya, arryb, c_num, num_CVs, w);
                 u1 = u2 = 0.0;
                 for(i = 0; i < num_CVs; i++)
                 {
                     u1 += w[i]*coef[i][0];
                     u2 += w[i]*coef[i][1];
                 }
                 /// use every other stencil
                 /**
                 for(i = 0; i < num_CVs; i++)
                 {
                     u1 += w[i+num_CVs]*coefb[i][0];
                     u2 += w[i+num_CVs]*coefb[i][1];
                 }
                 **/
                 //////END: use every other stencil
                 if(is_bad_sten[k] == YES && detect_extr == YES)
                 {
                     u1 = u2 = 0.0;
                 }
                 u0 = Lave[k];
             }
             else if(debugging("cent_bias"))
             {
                 avg1 = 0.0; u1 = coef[0][0];
                 avg2 = 0.0; u2 = coef[0][1];
                 for(i = 0; i < num_CVs; i++)
                 {
                     avg1 += coef[i][0];
                     u1 = minmod(coef[i][0],u1);
                     avg2 += coef[i][1];
                     u2 = minmod(coef[i][1],u2);
                 }
                 avg1 /= num_CVs;
                 avg2 /= num_CVs;
                 u1 = minmod(((1+eps)*u1), avg1);
                 u2 = minmod(((1+eps)*u2), avg2);
                 u0 = Lave[k];
             }
             switch(k)
             {
             case 0:
                 dg_Dens(st2)[0] = u0;
                 dg_Dens(st2)[1] = u1;
                 dg_Dens(st2)[2] = u2;
             break;
             case 1:
                 dg_Mom(st2)[0][0] = u0;
                 dg_Mom(st2)[0][1] = u1;
                 dg_Mom(st2)[0][2] = u2;
             break;
             case 2:
                 dg_Mom(st2)[1][0] = u0;
                 dg_Mom(st2)[1][1] = u1;
                 dg_Mom(st2)[1][2] = u2;
             break;
             case 3:
                 dg_Energy(st2)[0] = u0;
                 dg_Energy(st2)[1] = u1;
                 dg_Energy(st2)[2] = u2;
             break;
             }
         }
         Dens(st2) = Dens(st);
         Mom(st2)[0] = Mom(st)[0];
         Mom(st2)[1] = Mom(st)[1];
         Energy(st2) = Energy(st);

         if(check_quadr == YES && N_EQN == 4 && YES == unphysical_st_at_quadrature(tri, st2))
             tri->redo_limiting = YES;
         else
             tri->redo_limiting = NO;
}

// Reconstructed coeffs. are stored in limit_store for CV polynomials
// Reconstruct by partial neighboring cells
LOCAL void Subcell_limiting_1st_degreeP3_PNC(
         TRI       *tri,
         TRI       *nbtri[3],
         Mid_soln  *midsoln,
         Limiting_store **store,
         int       rk_iter,
         int       comput_mat,
	 int       detect_extr,
         int       check_quadr)
{
         Locstate st, nbst[30], st2, nbst2[30], tmpst;
         float    uave[8], nbuave[30][8], tmpuave[8], tmpnbuave[30][8];
         float    Rave[8], nbRave[30][8];
         float    Lave[8], nbLave[30][8], tmpnbLave[20][8];
         int      i, j, dim = 2, indx, k, is_bad_sten[20], num_CVs;
         int      on_SV_side, on_SV_side2, nbcv_indx[20], nbcv_side[20], cv_indx;
         double   *cent, *nbcent[20], rside[3], rside2[3], least_soln1[3], area, sqrt_area;
         double   *c_num, ***A;
         float    coef[30][2];
         float    u0, u1, u2, avg1, avg2, w[20], arrya[20], arryb[20];
         double   diam, eps = 0.05;
         float    A_edge[20][2], mid[20][2], sv_coef[20];
         int      debug = NO;
         TRI      *tris[30];
         float    qcrds[MAXD], *pcrds[4];
         static float tmpq[2] = {-0.5, 0.5};
         float    coefb[30][2], Ab[30][2][2];

         if(NULL == midsoln || rk_iter == RK_STEP)
             st = tri->st;
         else
             st = midsoln[tri->id].st[rk_iter];

         st2 = midsoln[tri->id].st[0];

         uave[0] = Dens(st);
         for(i = 0; i < 3; i++)
             uave[i+1] = Mom(st)[i];
         uave[4] = Energy(st);
         uave[4] = Energy(st);
         uave[5] = Mag(st)[0];
         uave[6] = Mag(st)[1];
         uave[7] = Mag(st)[2];

         R_degree2_above_term_averageP3(tri,st2,tri->Bmass_matrix,Rave);

         diam = fg_diam(tri);
         cent = fg_centroid(tri);
         sqrt_area = sqrt(fg_area(tri));

         // num_CVs = get_tri_edge_vert_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_near_neighbr_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         num_CVs = get_tri_neighbr_CV_stencil_from_set_HR_sten(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_near_neighbr_CV_stencil_ver2(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);

         // num_CVs = get_tri_center_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_near_neighbr_overlap_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs--;
         // num_CVs = get_tri_liu_neighbr_overlap_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_CV_stencil_unstable_SV_P2_partition_no_regrouping(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         for(i = 0; i < num_CVs; i++)
             nbcent[i] = tris[i]->CVcent[nbcv_indx[i]];
         
         if(alltri_HR_sten_2[tri->id].HR_sten_set_1st == NO)
         {
             tri_array(&A,num_CVs,2,2,sizeof(double));
             vector(&c_num,num_CVs,sizeof(double));

             for(i = 0; i < num_CVs; i++)
             {
                 A[i][0][0] = (nbcent[i][0]-cent[0])/sqrt_area;
                 A[i][0][1] = (nbcent[i][1]-cent[1])/sqrt_area;
                 A[i][1][0] = (nbcent[(i+1)%num_CVs][0]-cent[0])/sqrt_area;
                 A[i][1][1] = (nbcent[(i+1)%num_CVs][1]-cent[1])/sqrt_area;
                 // c_num[i] = cond_num(A[i]); // c_num[i] =1.0;
                 c_num[i] = cond_num2(A,i); // c_num[i] =1.0;
             }
             alltri_HR_sten_2[tri->id].HR_sten_set_1st = YES;
             alltri_HR_sten_2[tri->id].c_num = c_num;
             alltri_HR_sten_2[tri->id].A = A;
         }
         else
         {
             A = alltri_HR_sten_2[tri->id].A;
             c_num = alltri_HR_sten_2[tri->id].c_num;
         }

         if(comput_mat == YES)
         {
             if(alltri_mass_1st_rows[tri->id].mass_1st_rows[0] == NULL)
             {
                 for(i = 0; i < 30; i++)
                     matrix(&(alltri_mass_1st_rows[tri->id].mass_1st_rows[i]), 1, 
                               MAX_N_COEF,sizeof(double));
             }

             for(i = 0; i < num_CVs; i++)
                 comp_CV_Mag_mass_matrix_1st_row(MAX_N_COEF,tris[i],dim,nbcv_indx[i],
                        cent,sqrt_area,alltri_mass_1st_rows[tri->id].mass_1st_rows[i]);
         }

         for(i = 0; i < num_CVs; i++)
         {
             if(rk_iter == RK_STEP)
                 tmpst = tris[i]->st;
             else
                 tmpst = midsoln[tris[i]->id].st[rk_iter];
             avg_st_on_cv_ver3_MHD(tris[i], nbcv_indx[i], tmpst, nbuave[i]);
         }

         for(i = 0; i < num_CVs; i++)
         {
             //// POINT value
             // for(k = 0; k < N_EQN; k++)
             //     nbRave[i][k] = Rave[k];
             //// END POINT value
             // R_degree2_above_term_averageP3(tris[i],st2,mass_1st_rows[i],nbRave[i]);
             R_degree2_above_term_averageP3(tris[i],st2,
                   alltri_mass_1st_rows[tri->id].mass_1st_rows[i],nbRave[i]);
         }

         for(k = 0; k < N_EQN; k++)
             Lave[k] = uave[k]-Rave[k];

         for(i = 0; i < num_CVs; i++)
         {
             for(k = 0; k < N_EQN; k++)
                 nbLave[i][k] = nbuave[i][k]-nbRave[i][k];
         }

         if(detect_extr == YES)
         {
             /** 
             tmpuave[0] = Dens(st);
             tmpuave[1] = Mom(st)[0];
             tmpuave[2] = Mom(st)[1];
             tmpuave[3] = Energy(st);
             for(i = 0; i < 3; i++)
             {
                 tmpnbuave[i][0] = Dens(nbst[i]);
                 tmpnbuave[i][1] = Mom(nbst[i])[0];
                 tmpnbuave[i][2] = Mom(nbst[i])[1];
                 tmpnbuave[i][3] = Energy(nbst[i]);
             }
             **/
             // NEW_extrema_detec(tmpuave,tmpnbuave,3,is_bad_sten);
             NEW_extrema_detec(Lave,nbLave,num_CVs,is_bad_sten);
	     /*
             j = 0;
             for(i = 0; i < num_CVs; i++)
             {
                 if(i == 0 || i == 3 || i == 6)
                     continue;
                 for(k = 0; k < N_EQN; k++)
                     tmpnbLave[j][k] = nbLave[i][k];
                 j++;
             }
             NEW_extrema_detec(Lave,tmpnbLave,num_CVs-3,is_bad_sten);
	     */
         }

         //// POINT value
         /**
         {
             for(i = 0; i < num_CVs; i++)
             {
                 for(j = 0; j < 3; j++)
                     pcrds[j] = Coords( Point_of_tri(tris[i])[j] );
                 if(nbcv_side[i] == nbcv_indx[i])
                 {
                     for(j = 0; j < dim; j++)
                        qcrds[j] = (pcrds[(nbcv_side[i]+1)%3][j] + pcrds[nbcv_side[i]][j])/2.0 +
                           (pcrds[(nbcv_side[i]+1)%3][j] - pcrds[nbcv_side[i]][j])/2.0*tmpq[0];
                 }
                 else
                 {
                     for(j = 0; j < dim; j++)
                        qcrds[j] = (pcrds[(nbcv_side[i]+1)%3][j] + pcrds[nbcv_side[i]][j])/2.0 +
                           (pcrds[(nbcv_side[i]+1)%3][j] - pcrds[nbcv_side[i]][j])/2.0*tmpq[1];
                 }
                 nbcent[i][0] =  qcrds[0];
                 nbcent[i][1] =  qcrds[1];
             }
         }
         **/
         //// end POINT value

         ////// use every other stencil
	 /*
         for(i = 0; i < num_CVs; i++)
         {
             Ab[i][0][0] = (nbcent[i][0]-cent[0]);
             Ab[i][0][1] = (nbcent[i][1]-cent[1]);
             Ab[i][1][0] = (nbcent[(i+2)%num_CVs][0]-cent[0]);
             Ab[i][1][1] = (nbcent[(i+2)%num_CVs][1]-cent[1]);
             c_num[i+num_CVs] = cond_num(Ab[i]); // c_num[i] =1.0;
         }
	 */
         //////END: use every other stencil

         for(k = 0; k < N_EQN; k++)
         {
             // linear part of polynomial
             for(i = 0; i < num_CVs; i++)
             {
                 rside[0] = nbLave[i][k] - Lave[k];
                 rside[1] = nbLave[(i+1)%num_CVs][k] - Lave[k];
                 // comp_coef(A[i],rside,coef[i]);
                 comp_coef2(A,rside,coef[i],i);
                 //// use every other stencil
		 /*
                 rside[0] = nbLave[i][k] - Lave[k];
                 rside[1] = nbLave[(i+2)%num_CVs][k] - Lave[k];
                 comp_coef(Ab[i],rside,coefb[i]);
		 */
                 //////END: use every other stencil
             }
             // if(debugging("weno_w"))
             {
                 for(i = 0; i < num_CVs; i++)
                 {
                     arrya[i] = coef[i][0];
                     arryb[i] = coef[i][1];
                 }
                 //// use every other stencil
		 /*
                 for(i = 0; i < num_CVs; i++)
                 {
                     arrya[i+num_CVs] = coefb[i][0];
                     arryb[i+num_CVs] = coefb[i][1];
                 }
		 */
                 //////END: use every other stencil
                 /***
                 // TMP
                 c_num[num_CVs] = 1.0;
                 if(k == 0)
                 {
                     coef[num_CVs][0] = arrya[num_CVs] = dg_Dens(st)[1];
                     coef[num_CVs][1] = arryb[num_CVs] = dg_Dens(st)[2];
                 }
                 else if(k == 1)
                 {
                     coef[num_CVs][0] = arrya[num_CVs] = dg_Mom(st)[0][1];
                     coef[num_CVs][1] = arryb[num_CVs] = dg_Mom(st)[0][2];
                 }
                 else if(k == 2)
                 {
                     coef[num_CVs][0] = arrya[num_CVs] = dg_Mom(st)[1][1];
                     coef[num_CVs][1] = arryb[num_CVs] = dg_Mom(st)[1][2];
                 }
                 else
                 {
                     coef[num_CVs][0] = arrya[num_CVs] = dg_Energy(st)[1];
                     coef[num_CVs][1] = arryb[num_CVs] = dg_Energy(st)[2];
                 }
                 // END TMP
                 **/
                 WENO_mod_1(arrya, arryb, c_num, num_CVs, w);
                 u1 = u2 = 0.0;
                 for(i = 0; i < num_CVs; i++)
                 {
                     u1 += w[i]*coef[i][0];
                     u2 += w[i]*coef[i][1];
                 }
                 /// use every other stencil
		 /**
                 for(i = 0; i < num_CVs; i++)
                 {
                     u1 += w[i+num_CVs]*coefb[i][0];
                     u2 += w[i+num_CVs]*coefb[i][1];
                 }
		 **/
                 //////END: use every other stencil
                 if(is_bad_sten[k] == YES && detect_extr == YES)
                 {
                     u1 = u2 = 0.0;
                 }
                 u0 = Lave[k];
             }
             /****
             else if(debugging("cent_bias"))
             {
                 avg1 = 0.0; u1 = coef[0][0];
                 avg2 = 0.0; u2 = coef[0][1];
                 for(i = 0; i < num_CVs; i++)
                 {
                     avg1 += coef[i][0];
                     u1 = minmod(coef[i][0],u1);
                     avg2 += coef[i][1];
                     u2 = minmod(coef[i][1],u2);
                 }
                 avg1 /= num_CVs;
                 avg2 /= num_CVs;
                 u1 = minmod(((1+eps)*u1), avg1);
                 u2 = minmod(((1+eps)*u2), avg2);
                 u0 = Lave[k];
             }
             ****/
             switch(k)
             {
             case 0:
                 dg_Dens(st2)[0] = u0;
                 dg_Dens(st2)[1] = u1;
                 dg_Dens(st2)[2] = u2;
             break;
             case 1:
                 dg_Mom(st2)[0][0] = u0;
                 dg_Mom(st2)[0][1] = u1;
                 dg_Mom(st2)[0][2] = u2;
             break;
             case 2:
                 dg_Mom(st2)[1][0] = u0;
                 dg_Mom(st2)[1][1] = u1;
                 dg_Mom(st2)[1][2] = u2;
             break;
             case 3:
                 dg_Mom(st2)[2][0] = u0;
                 dg_Mom(st2)[2][1] = u1;
                 dg_Mom(st2)[2][2] = u2;
             break;
             case 4:
                 dg_Energy(st2)[0] = u0;
                 dg_Energy(st2)[1] = u1;
                 dg_Energy(st2)[2] = u2;
             break;
             case 5:
                 dg_B(st2)[0][0] = u0;
                 dg_B(st2)[0][1] = u1;
                 dg_B(st2)[0][2] = u2;
             break;
             case 6:
                 dg_B(st2)[1][0] = u0;
                 dg_B(st2)[1][1] = u1;
                 dg_B(st2)[1][2] = u2;
             break;
             case 7:
                 dg_B(st2)[2][0] = u0;
                 dg_B(st2)[2][1] = u1;
                 dg_B(st2)[2][2] = u2;
             break;
             }
         }
         /****
         Dens(st2) = Dens(st);
         Mom(st2)[0] = Mom(st)[0];
         Mom(st2)[1] = Mom(st)[1];
         Energy(st2) = Energy(st);

         if(check_quadr == YES && N_EQN == 4 && YES == unphysical_st_at_quadrature(tri, st2))
             tri->redo_limiting = YES;
         else
             tri->redo_limiting = NO;
         ****/

         Dens(st2) = Dens(st);
         Energy(st2) = Energy(st);
         for(i = 0; i < 3; i++)
             Mom(st2)[i] = Mom(st)[i];
         for(i = 0; i < 3; i++)
             Mag(st2)[i] = Mag(st)[i];

         ///// Re-enforce divergence-free property
         if(dg_B(st2)[0][1] >= 0.0)
             dg_B(st2)[0][1] = min(fabs(dg_B(st2)[0][1]), fabs(dg_B(st2)[1][2]));
         else
             dg_B(st2)[0][1] = -min(fabs(dg_B(st2)[0][1]), fabs(dg_B(st2)[1][2]));
         dg_B(st2)[1][2] = -dg_B(st2)[0][1];
}

// Reconstructed coeffs. are stored in limit_store for CV polynomials
// Reconstruct by partial neighboring cells
LOCAL void Subcell_limiting_1st_degreeP3_PNC_grouping(
         TRI       *tri,
         TRI       *nbtri[3],
         Mid_soln  *midsoln,
         Limiting_store **store,
         int       rk_iter,
         int       comput_mat,
	 int       detect_extr,
         int       check_quadr)
{
         Locstate st, nbst[3], st2, nbst2[3];
         float    uave[4], nbuave[30][4], tmpuave[4], tmpnbuave[30][4];
         float    Rave[4], nbRave[30][4];
         float    Lave[4], nbLave[30][8], tmpnbLave[20][4];
         int      i, j, dim = 2, indx, k, is_bad_sten[20], num_CVs;
         int      on_SV_side, on_SV_side2, nbcv_indx[20], nbcv_side[20], cv_indx;
         double    *cent, *nbcent[20], rside[3], rside2[3], least_soln1[3], diam, eps = 0.05;
         // float    c_num[30];
         // double   A[30][2][2], ***tmpA, *c_num;
         double   ***A, *c_num;
         float    coef[30][2];
         float    u0, u1, u2, avg1, avg2, w[20], arrya[20], arryb[20];
         double **Lmass_matrix = tri->Lmass_matrix;
         float    A_edge[20][2], mid[20][2], sv_coef[20];
         int      debug = NO;
         TRI      *tris[30];
         float    qcrds[MAXD], *pcrds[4];
         static float tmpq[2] = {-0.5, 0.5};
         float    coefb[30][2], Ab[30][2][2];
         float    gnbcent[20][3], area;
         float    gnbuave[30][4];
         float    gnbRave[30][4];

         if(rk_iter == RK_STEP)
         {
             st = tri->st;
             for(i = 0; i < 3; i++)
                 nbst[i] = nbtri[i]->st;
         }
         else
         {
             st = midsoln[tri->id].st[rk_iter];
             for(i = 0; i < 3; i++)
                 nbst[i] = midsoln[nbtri[i]->id].st[rk_iter];
         }

         st2 = midsoln[tri->id].st[0];
         uave[0] = Dens(st);
         uave[1] = Mom(st)[0];
         uave[2] = Mom(st)[1];
         uave[3] = Energy(st);
         R_degree2_above_term_averageP3(tri,st2,tri->Lmass_matrix,Rave);

         diam = fg_diam(tri);
         cent = fg_centroid(tri);

         // num_CVs = get_tri_edge_vert_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_near_neighbr_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_near_neighbr_CV_stencil_ver2(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_center_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         ///// currently used stencil for regrouping
         num_CVs = get_tri_near_neighbr_overlap_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_liu_neighbr_overlap_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);

         // num_CVs = get_tri_CV_stencil_unstable_SV_P2_partition(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         for(i = 0; i < num_CVs; i++)
             nbcent[i] = tris[i]->CVcent[nbcv_indx[i]];
         
         if(comput_mat == YES)
         {
             ///// check this..... 2nd degree used comp_CV_mass_matrix_1st_row
             // for(i = 0; i < num_CVs; i++)
             //     comp_CV_mass_matrix_1st_row(MAX_N_COEF,tris[i],dim,nbcv_indx[i],cent,
             //              alltri_mass_1st_rows[tri->id].mass_1st_rows[i]);
             for(i = 0; i < num_CVs; i++)
                 comp_mass_matrix_1st_row(MAX_N_COEF,tris[i],dim,fg_centroid(tri),mass_1st_rows[i]);
         }

         if(rk_iter == RK_STEP)
         {
             for(i = 0; i < num_CVs; i++)
                 avg_st_on_cv_ver2(tris[i], nbcv_indx[i], NULL, nbuave[i]);
         }
         else
         {
             for(i = 0; i < num_CVs; i++)
                 avg_st_on_cv_from_store(tris[i], nbcv_indx[i], NULL, store[rk_iter], nbuave[i]);
         }

         for(i = 0; i < num_CVs; i++)
             R_degree2_above_term_averageP3(tris[i],st2,mass_1st_rows[i],nbRave[i]);

         for(i = 0; i < gnum_CVs; i++)
         { 
              if(grouping_n[i] == 1)
              {
                  if(alltri_HR_sten_2_grp[tri->id].HR_sten_set_1st == NO)
                  {
                      for(j = 0; j < dim; j++)
                          gnbcent[i][j] = nbcent[grouping_indx[i][0]][j];
                  }
                  for(k = 0; k < N_EQN; k++)
                  {
                      gnbuave[i][k] = nbuave[grouping_indx[i][0]][k];
                      gnbRave[i][k] = nbRave[grouping_indx[i][0]][k];
                  }
              }
              else if(grouping_n[i] == 2)
              {
                  area = tris[grouping_indx[i][0]]->CVmass_matrix[nbcv_indx[grouping_indx[i][0]]][0] +
                         tris[grouping_indx[i][1]]->CVmass_matrix[nbcv_indx[grouping_indx[i][1]]][0];
                  if(alltri_HR_sten_2_grp[tri->id].HR_sten_set_1st == NO)
                  {
                      for(j = 0; j < dim; j++)
                      {
                          gnbcent[i][j] = (
                            nbcent[grouping_indx[i][0]][j]*tris[grouping_indx[i][0]]->CVmass_matrix[nbcv_indx[grouping_indx[i][0]]][0] +
                            nbcent[grouping_indx[i][1]][j]*tris[grouping_indx[i][1]]->CVmass_matrix[nbcv_indx[grouping_indx[i][1]]][0]
                                      )/area;
                      }
                  }
                  for(k = 0; k < N_EQN; k++)
                  {
                      gnbuave[i][k] = (nbuave[grouping_indx[i][0]][k]*tris[grouping_indx[i][0]]->CVmass_matrix[nbcv_indx[grouping_indx[i][0]]][0] +
                                       nbuave[grouping_indx[i][1]][k]*tris[grouping_indx[i][1]]->CVmass_matrix[nbcv_indx[grouping_indx[i][1]]][0]
                                      )/area;
                      gnbRave[i][k] = (nbRave[grouping_indx[i][0]][k]*tris[grouping_indx[i][0]]->CVmass_matrix[nbcv_indx[grouping_indx[i][0]]][0] +
                                       nbRave[grouping_indx[i][1]][k]*tris[grouping_indx[i][1]]->CVmass_matrix[nbcv_indx[grouping_indx[i][1]]][0]
                                      )/area;
                  }
              }
              else if(grouping_n[i] == 3)
              {
                  area = tris[grouping_indx[i][0]]->CVmass_matrix[nbcv_indx[grouping_indx[i][0]]][0] +
                         tris[grouping_indx[i][1]]->CVmass_matrix[nbcv_indx[grouping_indx[i][1]]][0] +
                         tris[grouping_indx[i][2]]->CVmass_matrix[nbcv_indx[grouping_indx[i][2]]][0];
                  if(alltri_HR_sten_2_grp[tri->id].HR_sten_set_1st == NO)
                  {
                      for(j = 0; j < dim; j++)
                      { 
                          gnbcent[i][j] = (nbcent[grouping_indx[i][0]][j]*tris[grouping_indx[i][0]]->CVmass_matrix[nbcv_indx[grouping_indx[i][0]]][0] +
                                       nbcent[grouping_indx[i][1]][j]*tris[grouping_indx[i][1]]->CVmass_matrix[nbcv_indx[grouping_indx[i][1]]][0] +
                                       nbcent[grouping_indx[i][2]][j]*tris[grouping_indx[i][2]]->CVmass_matrix[nbcv_indx[grouping_indx[i][2]]][0]
                                      )/area;
                      }
                  }
                  for(k = 0; k < N_EQN; k++)
                  {
                      gnbuave[i][k] = (nbuave[grouping_indx[i][0]][k]*tris[grouping_indx[i][0]]->CVmass_matrix[nbcv_indx[grouping_indx[i][0]]][0] +
                                       nbuave[grouping_indx[i][1]][k]*tris[grouping_indx[i][1]]->CVmass_matrix[nbcv_indx[grouping_indx[i][1]]][0] +
                                       nbuave[grouping_indx[i][2]][k]*tris[grouping_indx[i][2]]->CVmass_matrix[nbcv_indx[grouping_indx[i][2]]][0]
                                      )/area;
                      gnbRave[i][k] = (nbRave[grouping_indx[i][0]][k]*tris[grouping_indx[i][0]]->CVmass_matrix[nbcv_indx[grouping_indx[i][0]]][0] +
                                       nbRave[grouping_indx[i][1]][k]*tris[grouping_indx[i][1]]->CVmass_matrix[nbcv_indx[grouping_indx[i][1]]][0] +
                                       nbRave[grouping_indx[i][2]][k]*tris[grouping_indx[i][2]]->CVmass_matrix[nbcv_indx[grouping_indx[i][2]]][0]
                                      )/area;
                  }
              }
         }

         for(k = 0; k < N_EQN; k++)
         {
             Lave[k] = uave[k]-Rave[k];
             for(i = 0; i < gnum_CVs; i++)
                 nbLave[i][k] = gnbuave[i][k]-gnbRave[i][k];
         }

         if(detect_extr == YES)
         {
             /*
             tmpuave[0] = Dens(st);
             tmpuave[1] = Mom(st)[0];
             tmpuave[2] = Mom(st)[1];
             tmpuave[3] = Energy(st);
             for(i = 0; i < 3; i++)
             {
                 tmpnbuave[i][0] = Dens(nbst[i]);
                 tmpnbuave[i][1] = Mom(nbst[i])[0];
                 tmpnbuave[i][2] = Mom(nbst[i])[1];
                 tmpnbuave[i][3] = Energy(nbst[i]);
             }
             NEW_extrema_detec(tmpuave,tmpnbuave,3,is_bad_sten);
             */

             NEW_extrema_detec(Lave,nbLave,gnum_CVs,is_bad_sten);
	     /*
             j = 0;
             for(i = 0; i < num_CVs; i++)
             {
                 if(i == 0 || i == 3 || i == 6)
                     continue;
                 for(k = 0; k < N_EQN; k++)
                     tmpnbLave[j][k] = nbLave[i][k];
                 j++;
             }
             NEW_extrema_detec(Lave,tmpnbLave,num_CVs-3,is_bad_sten);
	     */
         }

         /*****************
         for(i = 0; i < gnum_CVs; i++)
         {
             A[i][0][0] = (gnbcent[i][0]-cent[0]);
             A[i][0][1] = (gnbcent[i][1]-cent[1]);
             A[i][1][0] = (gnbcent[(i+1)%gnum_CVs][0]-cent[0]);
             A[i][1][1] = (gnbcent[(i+1)%gnum_CVs][1]-cent[1]);
             c_num[i] = cond_num(A[i]); // c_num[i] =1.0;
         }
         *****************/
         if(alltri_HR_sten_2_grp[tri->id].HR_sten_set_1st == NO)
         {
             tri_array(&A,gnum_CVs,2,2,sizeof(double));
             vector(&c_num,gnum_CVs,sizeof(double));

             for(i = 0; i < gnum_CVs; i++)
             {
                 A[i][0][0] = (gnbcent[i][0]-cent[0]);
                 A[i][0][1] = (gnbcent[i][1]-cent[1]);
                 A[i][1][0] = (gnbcent[(i+1)%gnum_CVs][0]-cent[0]);
                 A[i][1][1] = (gnbcent[(i+1)%gnum_CVs][1]-cent[1]);
                 // c_num[i] = cond_num(A[i]); // c_num[i] =1.0;
                 c_num[i] = cond_num2(A,i); // c_num[i] =1.0;
             }
             alltri_HR_sten_2_grp[tri->id].HR_sten_set_1st = YES;
             alltri_HR_sten_2_grp[tri->id].c_num = c_num;
             alltri_HR_sten_2_grp[tri->id].A = A;
         }
         else
         {
             A = alltri_HR_sten_2_grp[tri->id].A;
             c_num = alltri_HR_sten_2_grp[tri->id].c_num;
         }

         for(k = 0; k < N_EQN; k++)
         {
             for(i = 0; i < gnum_CVs; i++)
             {
                 rside[0] = nbLave[i][k] - Lave[k];
                 rside[1] = nbLave[(i+1)%gnum_CVs][k] - Lave[k];
                 // comp_coef(A[i],rside,coef[i]);
                 comp_coef2(A,rside,coef[i],i);
             }
             if(debugging("weno_w"))
             {
                 for(i = 0; i < gnum_CVs; i++)
                 {
                     arrya[i] = coef[i][0];
                     arryb[i] = coef[i][1];
                 }

                 WENO_mod_1(arrya, arryb, c_num, gnum_CVs, w);
                 u1 = u2 = 0.0;
                 for(i = 0; i < gnum_CVs; i++)
                 {
                     u1 += w[i]*coef[i][0];
                     u2 += w[i]*coef[i][1];
                 }
                 if(is_bad_sten[k] == YES && detect_extr == YES)
                 {
                     u1 = u2 = 0.0;
                 }
                 u0 = Lave[k];
             }
             else if(debugging("cent_bias"))
             {
                 avg1 = 0.0; u1 = coef[0][0];
                 avg2 = 0.0; u2 = coef[0][1];
                 for(i = 0; i < num_CVs; i++)
                 {
                     avg1 += coef[i][0];
                     u1 = minmod(coef[i][0],u1);
                     avg2 += coef[i][1];
                     u2 = minmod(coef[i][1],u2);
                 }
                 avg1 /= num_CVs;
                 avg2 /= num_CVs;
                 u1 = minmod(((1+eps)*u1), avg1);
                 u2 = minmod(((1+eps)*u2), avg2);
                 u0 = Lave[k];
             }
             switch(k)
             {
             case 0:
                 dg_Dens(st2)[0] = u0;
                 dg_Dens(st2)[1] = u1;
                 dg_Dens(st2)[2] = u2;
             break;
             case 1:
                 dg_Mom(st2)[0][0] = u0;
                 dg_Mom(st2)[0][1] = u1;
                 dg_Mom(st2)[0][2] = u2;
             break;
             case 2:
                 dg_Mom(st2)[1][0] = u0;
                 dg_Mom(st2)[1][1] = u1;
                 dg_Mom(st2)[1][2] = u2;
             break;
             case 3:
                 dg_Energy(st2)[0] = u0;
                 dg_Energy(st2)[1] = u1;
                 dg_Energy(st2)[2] = u2;
             break;
             }
         }
         Dens(st2) = Dens(st);
         Mom(st2)[0] = Mom(st)[0];
         Mom(st2)[1] = Mom(st)[1];
         Energy(st2) = Energy(st);

         if(check_quadr == YES && N_EQN == 4 && YES == unphysical_st_at_quadrature(tri, st2))
             tri->redo_limiting = YES;
         else
             tri->redo_limiting = NO;
}




LOCAL void CV_R_degree3_term_averageP3(
         TRI       *tri,
         int       cv_indx,
         double **mass_1st_row,
         int       diff, // diff = 0, w.r.t. x; diff = 1, w.r.t. y.
         float     *ave)
{        
         float      area;
         
         area = mass_1st_row[0][0];
         if(diff == 0)
         {
             ave[0] =(3.0*tri->cv_soln->cv_dg_rho[cv_indx][6]*mass_1st_row[0][3] +
                      2.0*tri->cv_soln->cv_dg_rho[cv_indx][7]*mass_1st_row[0][4] +
                          tri->cv_soln->cv_dg_rho[cv_indx][8]*mass_1st_row[0][5])/area;
         
             if(N_EQN == 1) return;
         
             ave[1] =(3.0*tri->cv_soln->cv_dg_m[0][cv_indx][6]*mass_1st_row[0][3] +
                      2.0*tri->cv_soln->cv_dg_m[0][cv_indx][7]*mass_1st_row[0][4] +
                          tri->cv_soln->cv_dg_m[0][cv_indx][8]*mass_1st_row[0][5])/area;
         
             ave[2] =(3.0*tri->cv_soln->cv_dg_m[1][cv_indx][6]*mass_1st_row[0][3] +
                      2.0*tri->cv_soln->cv_dg_m[1][cv_indx][7]*mass_1st_row[0][4] +
                          tri->cv_soln->cv_dg_m[1][cv_indx][8]*mass_1st_row[0][5])/area;

             ave[3] =(3.0*tri->cv_soln->cv_dg_e[cv_indx][6]*mass_1st_row[0][3] +
                      2.0*tri->cv_soln->cv_dg_e[cv_indx][7]*mass_1st_row[0][4] +
                          tri->cv_soln->cv_dg_e[cv_indx][8]*mass_1st_row[0][5])/area;
         }
         else
         {
             ave[0] =(tri->cv_soln->cv_dg_rho[cv_indx][7]*mass_1st_row[0][3] +
                  2.0*tri->cv_soln->cv_dg_rho[cv_indx][8]*mass_1st_row[0][4] +
                  3.0*tri->cv_soln->cv_dg_rho[cv_indx][9]*mass_1st_row[0][5])/area;

             if(N_EQN == 1) return;

             ave[1] =(tri->cv_soln->cv_dg_m[0][cv_indx][7]*mass_1st_row[0][3] +
                  2.0*tri->cv_soln->cv_dg_m[0][cv_indx][8]*mass_1st_row[0][4] +
                  3.0*tri->cv_soln->cv_dg_m[0][cv_indx][9]*mass_1st_row[0][5])/area;

             ave[2] =(tri->cv_soln->cv_dg_m[1][cv_indx][7]*mass_1st_row[0][3] +
                  2.0*tri->cv_soln->cv_dg_m[1][cv_indx][8]*mass_1st_row[0][4] +
                  3.0*tri->cv_soln->cv_dg_m[1][cv_indx][9]*mass_1st_row[0][5])/area;

             ave[3] =(tri->cv_soln->cv_dg_e[cv_indx][7]*mass_1st_row[0][3] +
                  2.0*tri->cv_soln->cv_dg_e[cv_indx][8]*mass_1st_row[0][4] +
                  3.0*tri->cv_soln->cv_dg_e[cv_indx][9]*mass_1st_row[0][5])/area;
         }
}

// average of 3rd degree terms of polynomial
// after differenation.
LOCAL void CV_R_degree3_term_averageP3_store(
         TRI       *tri,
         int       cv_indx,
         Limiting_store limit_st,
         float     **mass_1st_row,
         int       diff, // diff = 0, w.r.t. x; diff = 1, w.r.t. y.
         float     *ave)
{
         float     area = mass_1st_row[0][0];
         if(diff == 0)
         {
             ave[0] =(3.0*limit_st.cv_dg_rho[cv_indx][6]*mass_1st_row[0][3] +
                      2.0*limit_st.cv_dg_rho[cv_indx][7]*mass_1st_row[0][4] +
                          limit_st.cv_dg_rho[cv_indx][8]*mass_1st_row[0][5])/area;

             if(N_EQN == 1) return;

             ave[1] =(3.0*limit_st.cv_dg_m[0][cv_indx][6]*mass_1st_row[0][3] +
                      2.0*limit_st.cv_dg_m[0][cv_indx][7]*mass_1st_row[0][4] +
                          limit_st.cv_dg_m[0][cv_indx][8]*mass_1st_row[0][5])/area;

             ave[2] =(3.0*limit_st.cv_dg_m[1][cv_indx][6]*mass_1st_row[0][3] +
                      2.0*limit_st.cv_dg_m[1][cv_indx][7]*mass_1st_row[0][4] +
                          limit_st.cv_dg_m[1][cv_indx][8]*mass_1st_row[0][5])/area;

             ave[3] =(3.0*limit_st.cv_dg_e[cv_indx][6]*mass_1st_row[0][3] +
                      2.0*limit_st.cv_dg_e[cv_indx][7]*mass_1st_row[0][4] +
                          limit_st.cv_dg_e[cv_indx][8]*mass_1st_row[0][5])/area;
         }
         else
         {
             ave[0] =(limit_st.cv_dg_rho[cv_indx][7]*mass_1st_row[0][3] +
                  2.0*limit_st.cv_dg_rho[cv_indx][8]*mass_1st_row[0][4] +
                  3.0*limit_st.cv_dg_rho[cv_indx][9]*mass_1st_row[0][5])/area;

             if(N_EQN == 1) return;

             ave[1] =(limit_st.cv_dg_m[0][cv_indx][7]*mass_1st_row[0][3] +
                  2.0*limit_st.cv_dg_m[0][cv_indx][8]*mass_1st_row[0][4] +
                  3.0*limit_st.cv_dg_m[0][cv_indx][9]*mass_1st_row[0][5])/area;

             ave[2] =(limit_st.cv_dg_m[1][cv_indx][7]*mass_1st_row[0][3] +
                  2.0*limit_st.cv_dg_m[1][cv_indx][8]*mass_1st_row[0][4] +
                  3.0*limit_st.cv_dg_m[1][cv_indx][9]*mass_1st_row[0][5])/area;

             ave[3] =(limit_st.cv_dg_e[cv_indx][7]*mass_1st_row[0][3] +
                  2.0*limit_st.cv_dg_e[cv_indx][8]*mass_1st_row[0][4] +
                  3.0*limit_st.cv_dg_e[cv_indx][9]*mass_1st_row[0][5])/area;
         }
}

// Reconstructed coeffs. are stored in limit_store for CV polynomials
// reconstruct for CVs
LOCAL void Subcell_limiting_3rd_degreeP3(
         TRI       *tri,
         TRI       *nbtri[3],
         Mid_soln  *midsoln,
         Limiting_store **store,
         int       rk_iter)
{
         Locstate st, nbst[3], st2;
         int      cv_indx, num_CVs, i, k;
         int      on_SV_side, on_SV_side2, nbcv_indx[9], nbcv_side[9]; 
         TRI      *tris[9];
         float    *cent, *nbcent[9], diam;
         float    uxxave[4], nbuxxave[9][8]; // [cv_indx][eqn#]
         float    uxyave[4], nbuxyave[9][8];
         float    uyyave[4], nbuyyave[9][8]; 
         int      is_bad_stenxx[9], is_bad_stenxy[9], is_bad_stenyy[9];
         float    rside[3], Axx[30][2][2], Axy[30][2][2], Ayy[30][2][2];
         float    coefxx[30][2], coefxy[30][2], coefyy[30][2];
         float    c_num_xx[30], c_num_xy[30], c_num_yy[30]; // condition number of stencils  
         float    u6, u7, u8, u9, u7_0, u7_1, u8_0, u8_1,
                   avg3, avg4, avg5, arrya[30], arryb[30], w[30];

         if(rk_iter == RK_STEP)
         {
             st = tri->st;
             nbst[0] = nbtri[0]->st;
             nbst[1] = nbtri[1]->st;
             nbst[2] = nbtri[2]->st;
         }
         else
         {
             st = midsoln[tri->id].st[rk_iter];
             for(i = 0; i < 3; i++)
                 nbst[i] = midsoln[nbtri[i]->id].st[rk_iter];
         }
         st2 = midsoln[tri->id].st[0];
         Set_params(st2,st);
         set_type_of_state(st2,state_type(st));

         diam = fg_diam(tri)/3.0;

         for(cv_indx = 0; cv_indx < 4; cv_indx++)
         {
             cent = tri->CVcent[cv_indx];
             num_CVs = get_tri_CV_stencil(tri,nbtri,cv_indx,nbcv_indx,nbcv_side,tris,&num_CVs);
             for(i = 0; i < num_CVs; i++)
                 nbcent[i] = tris[i]->CVcent[nbcv_indx[i]];

             if(rk_iter == RK_STEP)
             {
                 CV_u_average_indx(tri,cv_indx,3,uxxave);
                 CV_u_average_indx(tri,cv_indx,4,uxyave);
                 CV_u_average_indx(tri,cv_indx,5,uyyave);                 
                 for(i = 0; i < num_CVs; i++)
                 {
                     CV_u_average_indx(tris[i],nbcv_indx[i],3,nbuxxave[i]);
                     CV_u_average_indx(tris[i],nbcv_indx[i],4,nbuxyave[i]);
                     CV_u_average_indx(tris[i],nbcv_indx[i],5,nbuyyave[i]);                 
                 }
             }
             else
             {
                 CV_u_average_indx_from_store(tri,cv_indx,3,store[rk_iter],uxxave);
                 CV_u_average_indx_from_store(tri,cv_indx,4,store[rk_iter],uxyave);
                 CV_u_average_indx_from_store(tri,cv_indx,5,store[rk_iter],uyyave);
                 for(i = 0; i < num_CVs; i++)
                 {
                     CV_u_average_indx_from_store(tris[i],nbcv_indx[i],3,store[rk_iter],nbuxxave[i]);  
                     CV_u_average_indx_from_store(tris[i],nbcv_indx[i],4,store[rk_iter],nbuxyave[i]);  
                     CV_u_average_indx_from_store(tris[i],nbcv_indx[i],5,store[rk_iter],nbuyyave[i]);  
                 }
             }
             for(k = 0; k < N_EQN; k++)
             {
                 uxxave[k] *= 2.0;
                 uyyave[k] *= 2.0;
             }
             for(i = 0; i < num_CVs; i++)
             {
                 for(k = 0; k < N_EQN; k++)
                 {
                     nbuxxave[i][k] *= 2.0;
                     nbuyyave[i][k] *= 2.0;
                 }
             }
             NEW_extrema_detec(uxxave,nbuxxave,num_CVs,is_bad_stenxx);
             NEW_extrema_detec(uxyave,nbuxyave,num_CVs,is_bad_stenxy);
             NEW_extrema_detec(uyyave,nbuyyave,num_CVs,is_bad_stenyy);

             for(i = 0; i < num_CVs; i++)
             {
                 Axx[i][0][0] = 6.0*(nbcent[i][0]-cent[0]);
                 Axx[i][0][1] = 2.0*(nbcent[i][1]-cent[1]);
                 Axx[i][1][0] = 6.0*(nbcent[(i+1)%num_CVs][0]-cent[0]);
                 Axx[i][1][1] = 2.0*(nbcent[(i+1)%num_CVs][1]-cent[1]);
                 c_num_xx[i] = cond_num(Axx[i]);

                 Axy[i][0][0] = 2.0*(nbcent[i][0]-cent[0]);
                 Axy[i][0][1] = 2.0*(nbcent[i][1]-cent[1]);
                 Axy[i][1][0] = 2.0*(nbcent[(i+1)%num_CVs][0]-cent[0]);
                 Axy[i][1][1] = 2.0*(nbcent[(i+1)%num_CVs][1]-cent[1]);
                 c_num_xy[i] = cond_num(Axy[i]);

                 Ayy[i][0][0] = 2.0*(nbcent[i][0]-cent[0]);
                 Ayy[i][0][1] = 6.0*(nbcent[i][1]-cent[1]);
                 Ayy[i][1][0] = 2.0*(nbcent[(i+1)%num_CVs][0]-cent[0]);
                 Ayy[i][1][1] = 6.0*(nbcent[(i+1)%num_CVs][1]-cent[1]);
                 c_num_yy[i] = cond_num(Ayy[i]);
             }
             for(k = 0; k < N_EQN; k++)
             {
                 for(i = 0; i < num_CVs; i++)
                 {
                     rside[0] = nbuxxave[i][k] - uxxave[k];
                     rside[1] = nbuxxave[(i+1)%num_CVs][k] - uxxave[k];
                     comp_coef(Axx[i],rside,coefxx[i]);

                     rside[0] = nbuxyave[i][k] - uxyave[k];
                     rside[1] = nbuxyave[(i+1)%num_CVs][k] - uxyave[k];
                     comp_coef(Axy[i],rside,coefxy[i]);

                     rside[0] = nbuyyave[i][k] - uyyave[k];
                     rside[1] = nbuyyave[(i+1)%num_CVs][k] - uyyave[k];
                     comp_coef(Ayy[i],rside,coefyy[i]);
                 }
                 /////////////////// WENO
                 ///// u_xx polynomial
                 for(i = 0; i < num_CVs; i++)
                 {
                     arrya[i] = coefxx[i][0];
                     arryb[i] = coefxx[i][1];
                 }
                 WENO_mod_on_3rd(arrya,arryb,c_num_xx,num_CVs,diam,w);
                 u7_0 = u6 = 0.0;
                 for(i = 0; i < num_CVs; i++)
                 {
                     u6   += w[i]*coefxx[i][0];
                     u7_0 += w[i]*coefxx[i][1];
                 }
                 if(is_bad_stenxx[k] == YES)
                 {
                     u6 = 0.0; u7_0 = 0.0;
                 }
                 ///// u_xy polynomial
                 for(i = 0; i < num_CVs; i++)
                 {
                     arrya[i] = coefxy[i][0];
                     arryb[i] = coefxy[i][1];
                 }
                 WENO_mod_on_3rd(arrya,arryb,c_num_xy,num_CVs,diam,w);
                 // area_WENO_mod_on_3rd(arrya,arryb,c_num_xy,sten_n,diam,area,w);
                 u7_1 = u8_0 = 0.0;
                 for(i = 0; i < num_CVs; i++)
                 {
                     u7_1 += w[i]*coefxy[i][0];
                     u8_0 += w[i]*coefxy[i][1];
                 }
                 if(is_bad_stenxy[k] == YES)
                 {
                     u7_1 = 0.0; u8_0 = 0.0;
                 }
                 ///// u_yy polynomial
                 for(i = 0; i < num_CVs; i++)
                 {
                     arrya[i] = coefyy[i][0];
                     arryb[i] = coefyy[i][1];
                 }
                 WENO_mod_on_3rd(arrya,arryb,c_num_yy,num_CVs,diam,w);
                 // area_WENO_mod_on_3rd(arrya,arryb,c_num_yy,sten_n,diam,area,w);
                 u8_1 = u9 = 0.0;
                 for(i = 0; i < num_CVs; i++)
                 {
                     u8_1 += w[i]*coefyy[i][0];
                     u9   += w[i]*coefyy[i][1];
                 }
                 if(is_bad_stenyy[k] == YES)
                 {
                     u8_1 = 0.0; u9 = 0.0;
                 }
                 u7 = minmod2((1+0.05)*minmod2(u7_0,u7_1), 0.5*(u7_0 + u7_1)); // eps = 0.05, 0.01
                 u8 = minmod2((1+0.05)*minmod2(u8_0,u8_1), 0.5*(u8_0 + u8_1));
                 /////////////////// End WENO
                 switch(k)
                 {
                  case 0:
                      store[0][tri->id].cv_dg_rho[cv_indx][6] = u6;
                      store[0][tri->id].cv_dg_rho[cv_indx][7] = u7;
                      store[0][tri->id].cv_dg_rho[cv_indx][8] = u8;
                      store[0][tri->id].cv_dg_rho[cv_indx][9] = u9;
                  break;
                  case 1:
                      store[0][tri->id].cv_dg_m[0][cv_indx][6] = u6;
                      store[0][tri->id].cv_dg_m[0][cv_indx][7] = u7;
                      store[0][tri->id].cv_dg_m[0][cv_indx][8] = u8;
                      store[0][tri->id].cv_dg_m[0][cv_indx][9] = u9;
                  break;
                  case 2:
                      store[0][tri->id].cv_dg_m[1][cv_indx][6] = u6;
                      store[0][tri->id].cv_dg_m[1][cv_indx][7] = u7;
                      store[0][tri->id].cv_dg_m[1][cv_indx][8] = u8;
                      store[0][tri->id].cv_dg_m[1][cv_indx][9] = u9;
                  break;
                  case 3:
                      store[0][tri->id].cv_dg_e[cv_indx][6] = u6;
                      store[0][tri->id].cv_dg_e[cv_indx][7] = u7;
                      store[0][tri->id].cv_dg_e[cv_indx][8] = u8;
                      store[0][tri->id].cv_dg_e[cv_indx][9] = u9;
                  break;
                 }
             }
         }
}

// Reconstructed coeffs. are stored in limit_store for CV polynomials
// reconstruct by partial neighboring cells
LOCAL void Subcell_limiting_3rd_degreeP3_PNC(
         TRI       *tri,
         TRI       *nbtri[3],
         Mid_soln  *midsoln,
         Limiting_store **store,
         int       rk_iter,
	 int       detect_extr)
{
         Locstate st, nbst[3], st2, tmpst;
         int      cv_indx, num_CVs, i, k, j;
         int      on_SV_side, on_SV_side2, nbcv_indx[20], nbcv_side[20]; 
         TRI      *tris[20];
         double   *cent, *nbcent[20], diam, area, sqrt_area, cube_factor, tmp_sqrt_area, tmp_area;
         float    uxxave[8], nbuxxave[20][8], tmpnbuxxave[20][8], tmpuxxave[8]; // [cv_indx][eqn#]
         float    uxyave[8], nbuxyave[20][8], tmpnbuxyave[20][8], tmpuxyave[8];
         float    uyyave[8], nbuyyave[20][8], tmpnbuyyave[20][8], tmpuyyave[8]; 
         int      is_bad_stenxx[20], is_bad_stenxy[20], is_bad_stenyy[20];
         float    coefxx[30][2], coefxy[30][2], coefyy[30][2], rside[3];
         // float    Axx[30][2][2], Axy[30][2][2], Ayy[30][2][2];
         // float    c_num_xx[30], c_num_xy[30], c_num_yy[30]; // condition number of stencils  
         double   ***Axx, ***Axy, ***Ayy;
         double   *c_num_xx, *c_num_xy, *c_num_yy; // condition number of stencils  
         float    u6, u7, u8, u9, u7_0, u7_1, u8_0, u8_1,
                   avg3, avg4, avg5, arrya[30], arryb[30], w[30];
         int      debug = NO, over_lap[3] = {0, 3, 6};
         float    Axxb[30][2][2], Axyb[30][2][2], Ayyb[30][2][2], 
                  c_num_xxb[30], c_num_xyb[30], c_num_yyb[30],
                  coefxxb[30][2], coefxyb[30][2], coefyyb[30][2];

         /***
         if(tri->id == 10)
         {
             printf("Subcell_limiting_3rd_degreeP3_PNC for tri[%d]\n", tri->id);
             debug = YES;
         }
         ***/

         if(NULL == midsoln || rk_iter == RK_STEP)
             st = tri->st;
         else
             st = midsoln[tri->id].st[rk_iter];

         st2 = midsoln[tri->id].st[0];
         Set_params(st2,st);
         set_type_of_state(st2,state_type(st));

         /////////// TMP, make P^2
         /**
         for(i = 6; i < MAX_N_COEF; i++)
         {
              dg_Dens(st2)[i] = 0.0; dg_Mom(st2)[0][i] = 0.0;
              dg_Mom(st2)[1][i] = 0.0; dg_Energy(st2)[i] = 0.0;
         }
         return;
         **/
         ////////// END TMP

         cent = fg_centroid(tri);
         // diam = sqr(fg_diam(tri));
         diam = fg_diam(tri);
	 // diam = 1.0;
         area = fg_area(tri);
         sqrt_area = sqrt(fg_area(tri));
         cube_factor = area*sqrt_area;
         
         if(debug == YES)
         {
             printf("neighboring CVs found = %d\n", num_CVs);
             for(i = 0; i < num_CVs; i++)
             {
                 printf("indx[%d] nbtri[%d], nbcv_indx[%d]\n",i, tris[i]->id, nbcv_indx[i]);
                 print_tri_crds(tris[i]);
             }
         }

         // num_CVs = get_tri_edge_vert_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_near_neighbr_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         num_CVs = get_tri_neighbr_CV_stencil_from_set_HR_sten(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         
         // current lax test
         // num_CVs = get_tri_near_neighbr_CV_stencil_ver2(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_center_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_near_neighbr_overlap_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs--;
         // num_CVs = get_tri_liu_neighbr_overlap_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         
         // num_CVs = get_tri_CV_stencil_unstable_SV_P2_partition_no_regrouping(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         for(i = 0; i < num_CVs; i++)
             nbcent[i] = tris[i]->CVcent[nbcv_indx[i]];

         if(alltri_HR_sten_2[tri->id].HR_sten_set_3rd == NO)
         {
             //// 3rd degree coef. reconstruction stencil storage
             tri_array(&(Axx),num_CVs,2,2,sizeof(double));
             tri_array(&(Axy),num_CVs,2,2,sizeof(double));
             tri_array(&(Ayy),num_CVs,2,2,sizeof(double));
             vector(&(c_num_xx),num_CVs,sizeof(double));
             vector(&(c_num_xy),num_CVs,sizeof(double));
             vector(&(c_num_yy),num_CVs,sizeof(double));

             for(i = 0; i < num_CVs; i++)
             {
                 Axx[i][0][0] = 6.0*(nbcent[i][0]-cent[0])/cube_factor;
                 Axx[i][0][1] = 2.0*(nbcent[i][1]-cent[1])/cube_factor;
                 Axx[i][1][0] = 6.0*(nbcent[(i+1)%num_CVs][0]-cent[0])/cube_factor;
                 Axx[i][1][1] = 2.0*(nbcent[(i+1)%num_CVs][1]-cent[1])/cube_factor;
                 c_num_xx[i] = cond_num2(Axx,i);

                 Axy[i][0][0] = 2.0*(nbcent[i][0]-cent[0])/cube_factor;
                 Axy[i][0][1] = 2.0*(nbcent[i][1]-cent[1])/cube_factor;
                 Axy[i][1][0] = 2.0*(nbcent[(i+1)%num_CVs][0]-cent[0])/cube_factor;
                 Axy[i][1][1] = 2.0*(nbcent[(i+1)%num_CVs][1]-cent[1])/cube_factor;
                 c_num_xy[i] = cond_num2(Axy,i);

                 Ayy[i][0][0] = 2.0*(nbcent[i][0]-cent[0])/cube_factor;
                 Ayy[i][0][1] = 6.0*(nbcent[i][1]-cent[1])/cube_factor;
                 Ayy[i][1][0] = 2.0*(nbcent[(i+1)%num_CVs][0]-cent[0])/cube_factor;
                 Ayy[i][1][1] = 6.0*(nbcent[(i+1)%num_CVs][1]-cent[1])/cube_factor;
                 c_num_yy[i] = cond_num2(Ayy,i);
             }
             alltri_HR_sten_2[tri->id].c_num_xx = c_num_xx;
             alltri_HR_sten_2[tri->id].c_num_xy = c_num_xy;
             alltri_HR_sten_2[tri->id].c_num_yy = c_num_yy;
             alltri_HR_sten_2[tri->id].Axx = Axx;
             alltri_HR_sten_2[tri->id].Axy = Axy;
             alltri_HR_sten_2[tri->id].Ayy = Ayy;
             alltri_HR_sten_2[tri->id].HR_sten_set_3rd = YES;
         }
         else
         {
             Axx = alltri_HR_sten_2[tri->id].Axx; Axy = alltri_HR_sten_2[tri->id].Axy;
             Ayy = alltri_HR_sten_2[tri->id].Ayy;
             c_num_xx = alltri_HR_sten_2[tri->id].c_num_xx; c_num_xy = alltri_HR_sten_2[tri->id].c_num_xy;
             c_num_yy = alltri_HR_sten_2[tri->id].c_num_yy;
         }

         u_average_indx(tri,st,3,uxxave);
         u_average_indx(tri,st,4,uxyave);
         u_average_indx(tri,st,5,uyyave);                 

         for(k = 0; k < N_EQN; k++)
             uxxave[k] *= (2.0/area);
         for(k = 0; k < N_EQN; k++)
             uyyave[k] *= (2.0/area);
         for(k = 0; k < N_EQN; k++)
             uxyave[k] /= area;

         //////////// NEW, whole cell to detect extreme
         /***
         for(i = 0; i < 3; i++)
         {
             u_average_indx(nbtri[i],nbst[i],3,nbuxxave[i]);
             u_average_indx(nbtri[i],nbst[i],4,nbuxyave[i]);
             u_average_indx(nbtri[i],nbst[i],5,nbuyyave[i]);
             for(k = 0; k < N_EQN; k++)
             {
                 nbuxxave[i][k] *= 2.0;
                 nbuyyave[i][k] *= 2.0;
             }
         }
         if(detect_extr == YES)
         {
             NEW_extrema_detec(uxxave,nbuxxave,3,is_bad_stenxx);
             NEW_extrema_detec(uxyave,nbuxyave,3,is_bad_stenxy);
             NEW_extrema_detec(uyyave,nbuyyave,3,is_bad_stenyy);
         }
         ***/
         //////////////////

         for(i = 0; i < num_CVs; i++)
         {
             tmp_area = fg_area(tris[i]);
             if(rk_iter == RK_STEP)
                 tmpst = tris[i]->st;
             else
                 tmpst = midsoln[tris[i]->id].st[rk_iter];

             CV_u_average_indx_MHD(tris[i],nbcv_indx[i],3,tmpst,nbuxxave[i]);
             for(k = 0; k < N_EQN; k++)
                 nbuxxave[i][k] /= tmp_area;
             CV_u_average_indx_MHD(tris[i],nbcv_indx[i],4,tmpst,nbuxyave[i]);
             for(k = 0; k < N_EQN; k++)
                 nbuxyave[i][k] /= tmp_area;
             CV_u_average_indx_MHD(tris[i],nbcv_indx[i],5,tmpst,nbuyyave[i]);                 
             for(k = 0; k < N_EQN; k++)
                 nbuyyave[i][k] /= tmp_area;
         }
         /*****
         if(rk_iter == RK_STEP)
         {
             for(i = 0; i < num_CVs; i++)
             {
                 CV_u_average_indx(tris[i],nbcv_indx[i],3,nbuxxave[i]);
                 CV_u_average_indx(tris[i],nbcv_indx[i],4,nbuxyave[i]);
                 CV_u_average_indx(tris[i],nbcv_indx[i],5,nbuyyave[i]);                 
             }
         }
         else
         {
             for(i = 0; i < num_CVs; i++)
             {
                 CV_u_average_indx_from_store(tris[i],nbcv_indx[i],3,store[rk_iter],nbuxxave[i]);  
                 CV_u_average_indx_from_store(tris[i],nbcv_indx[i],4,store[rk_iter],nbuxyave[i]);  
                 CV_u_average_indx_from_store(tris[i],nbcv_indx[i],5,store[rk_iter],nbuyyave[i]);  
                 if(debug == YES)
                 {
                     printf("sten[%d] tri[%d] cent(%g %g), cv[%d], xxave = (%g, %g, %g, %g)\n", i, 
                             tris[i]->id,   fg_centroid(tris[i])[0],  fg_centroid(tris[i])[1],
                                   nbcv_indx[i], nbuxxave[i][0], nbuxxave[i][1],
                                   nbuxxave[i][2], nbuxxave[i][3]);
                 }
             }
         }
         for(i = 0; i < num_CVs; i++)
         {
             for(k = 0; k < N_EQN; k++)
             {
                 nbuxxave[i][k] *= 2.0;
                 nbuyyave[i][k] *= 2.0;
             }
         }
         ****/
 
         ///////////// TMP
         if(debug == YES)
         {
             for(i = 0; i < num_CVs; i++)
                 printf("neighbr[%d], xx ave = %g, xy ave = %g, yy ave = %g\n",
                          i, nbuxxave[i][0], nbuxyave[i][0], nbuyyave[i][0]);
         }

         if(detect_extr == YES)
         {
             /**
             u_average_indx(tri,st,3,tmpuxxave);
             u_average_indx(tri,st,4,tmpuxyave);
             u_average_indx(tri,st,5,tmpuyyave);
             for(k = 0; k < N_EQN; k++)
             {
                 tmpuxxave[k] *= 2.0;
                 tmpuyyave[k] *= 2.0;
             }
             for(i = 0; i < 3; i++)
             {
                 u_average_indx(nbtri[i],nbst[i],3,tmpnbuxxave[i]);
                 u_average_indx(nbtri[i],nbst[i],4,tmpnbuxyave[i]);
                 u_average_indx(nbtri[i],nbst[i],5,tmpnbuyyave[i]);
                 for(k = 0; k < N_EQN; k++)
                 {
                     tmpnbuxxave[i][k] *= 2.0;
                     tmpnbuyyave[i][k] *= 2.0;
                 }
             }
             NEW_extrema_detec(tmpuxxave,tmpnbuxxave,3,is_bad_stenxx);
             NEW_extrema_detec(tmpuxyave,tmpnbuxyave,3,is_bad_stenxy);
             NEW_extrema_detec(tmpuyyave,tmpnbuyyave,3,is_bad_stenyy);
             **/

             NEW_extrema_detec(uxxave,nbuxxave,num_CVs,is_bad_stenxx);
             NEW_extrema_detec(uxyave,nbuxyave,num_CVs,is_bad_stenxy);
             NEW_extrema_detec(uyyave,nbuyyave,num_CVs,is_bad_stenyy);
	     /*
             j = 0;
             for(i = 0; i < num_CVs; i++)
             {
                 if(i == 0 || i == 3 || i == 6)
                     continue;
                 for(k = 0; k < N_EQN; k++)
                 {
                     tmpnbuxxave[j][k] = nbuxxave[i][k];
                     tmpnbuxyave[j][k] = nbuxyave[i][k];
                     tmpnbuyyave[j][k] = nbuyyave[i][k];
                 }
                 j++;
             }
             NEW_extrema_detec(uxxave,tmpnbuxxave,num_CVs-3,is_bad_stenxx);
             NEW_extrema_detec(uxyave,tmpnbuxyave,num_CVs-3,is_bad_stenxy);
             NEW_extrema_detec(uyyave,tmpnbuyyave,num_CVs-3,is_bad_stenyy);
	     */
         }

         /***
         for(i = 0; i < num_CVs; i++)
         {
             Ayy[i][0][0] = Axy[i][0][0] = Axx[i][0][0] = (nbcent[i][0]-cent[0]);
             Ayy[i][0][1] = Axy[i][0][1] = Axx[i][0][1] = (nbcent[i][1]-cent[1]);
             Ayy[i][1][0] = Axy[i][1][0] = Axx[i][1][0] = (nbcent[(i+1)%num_CVs][0]-cent[0]);
             Ayy[i][1][1] = Axy[i][1][1] = Axx[i][1][1] = (nbcent[(i+1)%num_CVs][1]-cent[1]);
             c_num_yy[i] = c_num_xy[i] = c_num_xx[i] = cond_num(Axx[i]);

             // Axy[i][0][0] = (nbcent[i][0]-cent[0]);
             // Axy[i][0][1] = (nbcent[i][1]-cent[1]);
             // Axy[i][1][0] = (nbcent[(i+1)%num_CVs][0]-cent[0]);
             // Axy[i][1][1] = (nbcent[(i+1)%num_CVs][1]-cent[1]);
             // c_num_xy[i] = cond_num(Axy[i]);

             // Ayy[i][0][0] = (nbcent[i][0]-cent[0]);
             // Ayy[i][0][1] = (nbcent[i][1]-cent[1]);
             // Ayy[i][1][0] = (nbcent[(i+1)%num_CVs][0]-cent[0]);
             // Ayy[i][1][1] = (nbcent[(i+1)%num_CVs][1]-cent[1]);
             //c_num_yy[i] = cond_num(Ayy[i]);
             if(debug == YES)
             {
                 printf("sten[%d] [%g %g], [%g %g], condi = %g\n",
                      i, Ayy[i][0][0], Ayy[i][0][1], Ayy[i][1][0], Ayy[i][1][1], c_num_yy[i]);
             }
         }
         **/

         ///// use very other one to make stencil
	 /**
         for(i = 0; i < num_CVs; i++)
         {
             Axxb[i][0][0] = 6.0*(nbcent[i][0]-cent[0]);
             Axxb[i][0][1] = 2.0*(nbcent[i][1]-cent[1]);
             Axxb[i][1][0] = 6.0*(nbcent[(i+2)%num_CVs][0]-cent[0]);
             Axxb[i][1][1] = 2.0*(nbcent[(i+2)%num_CVs][1]-cent[1]);
             c_num_xx[i+num_CVs] = cond_num(Axxb[i]);

             Axyb[i][0][0] = 2.0*(nbcent[i][0]-cent[0]);
             Axyb[i][0][1] = 2.0*(nbcent[i][1]-cent[1]);
             Axyb[i][1][0] = 2.0*(nbcent[(i+2)%num_CVs][0]-cent[0]);
             Axyb[i][1][1] = 2.0*(nbcent[(i+2)%num_CVs][1]-cent[1]);
             c_num_xy[i+num_CVs] = cond_num(Axyb[i]);

             Ayyb[i][0][0] = 2.0*(nbcent[i][0]-cent[0]);
             Ayyb[i][0][1] = 6.0*(nbcent[i][1]-cent[1]);
             Ayyb[i][1][0] = 2.0*(nbcent[(i+2)%num_CVs][0]-cent[0]);
             Ayyb[i][1][1] = 6.0*(nbcent[(i+2)%num_CVs][1]-cent[1]);
             c_num_yy[i+num_CVs] = cond_num(Ayyb[i]);
         }
	 **/
         ///// END: use very other one to make stencil
         for(k = 0; k < N_EQN; k++)
         {
             for(i = 0; i < num_CVs; i++)
             {
                 rside[0] = nbuxxave[i][k] - uxxave[k];
                 rside[1] = nbuxxave[(i+1)%num_CVs][k] - uxxave[k];
                 comp_coef2(Axx,rside,coefxx[i],i);

                 rside[0] = nbuxyave[i][k] - uxyave[k];
                 rside[1] = nbuxyave[(i+1)%num_CVs][k] - uxyave[k];
                 comp_coef2(Axy,rside,coefxy[i],i);

                 rside[0] = nbuyyave[i][k] - uyyave[k];
                 rside[1] = nbuyyave[(i+1)%num_CVs][k] - uyyave[k];
                 comp_coef2(Ayy,rside,coefyy[i],i);
                 //// TMP, debug
                 // if(k == 0 && debug == YES)
                 //     printf("sten[%d], solved xyy[%g] yyy[%g]\n", i, coefyy[i][0], coefyy[i][1]);

                 ////// use every other stencil
		 /*
                 rside[0] = nbuxxave[i][k] - uxxave[k];
                 rside[1] = nbuxxave[(i+2)%num_CVs][k] - uxxave[k];
                 comp_coef(Axxb[i],rside,coefxxb[i]);

                 rside[0] = nbuxyave[i][k] - uxyave[k];
                 rside[1] = nbuxyave[(i+2)%num_CVs][k] - uxyave[k];
                 comp_coef(Axyb[i],rside,coefxyb[i]);

                 rside[0] = nbuyyave[i][k] - uyyave[k];
                 rside[1] = nbuyyave[(i+2)%num_CVs][k] - uyyave[k];
                 comp_coef(Ayyb[i],rside,coefyyb[i]);
		 */
                 ////// END: use every other stencil
             }
             /////////////////// WENO
             ///// u_xx polynomial
             for(i = 0; i < num_CVs; i++)
             {
                 arrya[i] = coefxx[i][0];
                 arryb[i] = coefxx[i][1];
             }
             //// use every other stencil
	     /*
             for(i = 0; i < num_CVs; i++)
             {
                 arrya[i+num_CVs] = coefxxb[i][0];
                 arryb[i+num_CVs] = coefxxb[i][1];
             }
	     */
             ////END:  use every other stencil
             WENO_mod_on_3rd(arrya,arryb,c_num_xx,num_CVs,diam,w);
             u7_0 = u6 = 0.0;
             for(i = 0; i < num_CVs; i++)
             {
                 u6   += w[i]*coefxx[i][0];
                 u7_0 += w[i]*coefxx[i][1];
             }
             /// use every other stencil
	     /*
             for(i = 0; i < num_CVs; i++)
             {
                 u6   += w[i+num_CVs]*coefxxb[i][0];
                 u7_0 += w[i+num_CVs]*coefxxb[i][1];
             }
	     */
             ///END: use every other stencil
             if(is_bad_stenxx[k] == YES && detect_extr == YES)
             {
                 u6 = 0.0; u7_0 = 0.0;
             }
             ///// u_xy polynomial
             for(i = 0; i < num_CVs; i++)
             {
                 arrya[i] = coefxy[i][0];
                 arryb[i] = coefxy[i][1];
             }
             /// use every other stencil
	     /*
             for(i = 0; i < num_CVs; i++)
             {
                 arrya[i+num_CVs] = coefxyb[i][0];
                 arryb[i+num_CVs] = coefxyb[i][1];
             }
	     */
             ///END: use every other stencil
             WENO_mod_on_3rd(arrya,arryb,c_num_xy,num_CVs,diam,w);
             // area_WENO_mod_on_3rd(arrya,arryb,c_num_xy,sten_n,diam,area,w);
             u7_1 = u8_0 = 0.0;
             for(i = 0; i < num_CVs; i++)
             {
                 u7_1 += w[i]*coefxy[i][0];
                 u8_0 += w[i]*coefxy[i][1];
             }
             //// use every other stencil
	     /*
             for(i = 0; i < num_CVs; i++)
             {
                 u7_1 += w[i+num_CVs]*coefxyb[i][0];
                 u8_0 += w[i+num_CVs]*coefxyb[i][1];
             }
	     */
             ////END: use every other stencil
             if(is_bad_stenxy[k] == YES && detect_extr == YES)
             {
                 u7_1 = 0.0; u8_0 = 0.0;
             }
             ///// u_yy polynomial
             for(i = 0; i < num_CVs; i++)
             {
                 arrya[i] = coefyy[i][0];
                 arryb[i] = coefyy[i][1];
             }
             //// use every other stencil
	     /*
             for(i = 0; i < num_CVs; i++)
             {
                 arrya[i+num_CVs] = coefyyb[i][0];
                 arryb[i+num_CVs] = coefyyb[i][1];
             }
	     */
             ////END: use every other stencil

             WENO_mod_on_3rd(arrya,arryb,c_num_yy,num_CVs,diam,w);
             // area_WENO_mod_on_3rd(arrya,arryb,c_num_yy,sten_n,diam,area,w);
             
             /////// TMP
             if(k == 0 && debug == YES)
             {
                 for(i = 0; i < num_CVs; i++)
                     printf("sten[%d], xyy, yyy weight = %g\n", i, w[i]);
             }
             ////// END TMP
             u8_1 = u9 = 0.0;
              
             for(i = 0; i < num_CVs; i++)
             {
                 u8_1 += w[i]*coefyy[i][0];
                 u9   += w[i]*coefyy[i][1];
             }
             ////// use every other stencil
	     /*
             for(i = 0; i < num_CVs; i++)
             {
                 u8_1 += w[i+num_CVs]*coefyyb[i][0];
                 u9   += w[i+num_CVs]*coefyyb[i][1];
             }
	     */
             //////END: use every other stencil
             if(is_bad_stenyy[k] == YES && detect_extr == YES)
             {
                 u8_1 = 0.0; u9 = 0.0;
             }
             u7 = minmod((1+0.05)*minmod(u7_0,u7_1), 0.5*(u7_0 + u7_1)); // eps = 0.05, 0.01
             u8 = minmod((1+0.05)*minmod(u8_0,u8_1), 0.5*(u8_0 + u8_1));
             // u7 = minmod2((1+0.05)*minmod2(u7_0,u7_1), 0.5*(u7_0 + u7_1)); // eps = 0.05, 0.01
             // u8 = minmod2((1+0.05)*minmod2(u8_0,u8_1), 0.5*(u8_0 + u8_1));
             /////////////////// End WENO
             switch(k)
             {
              case 0:
                  dg_Dens(st2)[6] = u6;
                  dg_Dens(st2)[7] = u7;
                  dg_Dens(st2)[8] = u8;
                  dg_Dens(st2)[9] = u9;
              break;
              case 1:
                  dg_Mom(st2)[0][6] = u6;
                  dg_Mom(st2)[0][7] = u7;
                  dg_Mom(st2)[0][8] = u8;
                  dg_Mom(st2)[0][9] = u9;
              break;
              case 2:
                  dg_Mom(st2)[1][6] = u6;
                  dg_Mom(st2)[1][7] = u7;
                  dg_Mom(st2)[1][8] = u8;
                  dg_Mom(st2)[1][9] = u9;
              break;
              case 3:
                  dg_Mom(st2)[2][6] = u6;
                  dg_Mom(st2)[2][7] = u7;
                  dg_Mom(st2)[2][8] = u8;
                  dg_Mom(st2)[2][9] = u9;
              break;
              case 4:
                  dg_Energy(st2)[6] = u6;
                  dg_Energy(st2)[7] = u7;
                  dg_Energy(st2)[8] = u8;
                  dg_Energy(st2)[9] = u9;
              break;
              case 5:
                  dg_B(st2)[0][6] = u6;
                  dg_B(st2)[0][7] = u7;
                  dg_B(st2)[0][8] = u8;
                  dg_B(st2)[0][9] = u9;
              break;
              case 6:
                  dg_B(st2)[1][6] = u6;
                  dg_B(st2)[1][7] = u7;
                  dg_B(st2)[1][8] = u8;
                  dg_B(st2)[1][9] = u9;
              break;
              case 7:
                  dg_B(st2)[2][6] = u6;
                  dg_B(st2)[2][7] = u7;
                  dg_B(st2)[2][8] = u8;
                  dg_B(st2)[2][9] = u9;
              break;
             }
         }

         // re-enforce divergence-free 
        if(dg_B(st2)[0][6] >= 0.0)
            dg_B(st2)[0][6] = min(fabs(dg_B(st2)[0][6]), fabs(dg_B(st2)[1][7])/3.0);
        else
            dg_B(st2)[0][6] = -min(fabs(dg_B(st2)[0][6]), fabs(dg_B(st2)[1][7])/3.0);
        dg_B(st2)[1][7] = -3.0*dg_B(st2)[0][6];

        if(dg_B(st2)[0][7] >= 0.0)
            dg_B(st2)[0][7] = min(fabs(dg_B(st2)[0][7]), fabs(dg_B(st2)[1][8]));
        else
            dg_B(st2)[0][7] = -min(fabs(dg_B(st2)[0][7]), fabs(dg_B(st2)[1][8]));
        dg_B(st2)[1][8] = -dg_B(st2)[0][7];

        if(dg_B(st2)[0][8] >= 0.0)
            dg_B(st2)[0][8] = min(fabs(dg_B(st2)[0][8]), 3.0*fabs(dg_B(st2)[1][9]));
        else
            dg_B(st2)[0][8] = -min(fabs(dg_B(st2)[0][8]), 3.0*fabs(dg_B(st2)[1][9]));
        dg_B(st2)[1][9] = -dg_B(st2)[0][8]/3.0;

         if(debug == YES)
         {
             g_verbose_print_state(st2);
         }
}

// Reconstructed coeffs. are stored in limit_store for CV polynomials
// reconstruct by partial neighboring cells
LOCAL void Subcell_limiting_3rd_degreeP3_PNC_grouping(
         TRI       *tri,
         TRI       *nbtri[3],
         Mid_soln  *midsoln,
         Limiting_store **store,
         int       rk_iter,
	 int       detect_extr)
{
         Locstate st, nbst[3], st2;
         int      cv_indx, num_CVs, i, k, j, dim = 2;
         int      on_SV_side, on_SV_side2, nbcv_indx[20], nbcv_side[20]; 
         TRI      *tris[20];
         float    *cent, *nbcent[20], diam;
         float    uxxave[4], nbuxxave[20][4]; // tmpnbuxxave[20][4], tmpuxxave[4]; // [cv_indx][eqn#]
         float    uxyave[4], nbuxyave[20][4]; // tmpnbuxyave[20][4], tmpuxyave[4];
         float    uyyave[4], nbuyyave[20][4]; // tmpnbuyyave[20][4], tmpuyyave[4]; 
         int      is_bad_stenxx[20], is_bad_stenxy[20], is_bad_stenyy[20];
         float    rside[3];
         float    coefxx[30][2], coefxy[30][2], coefyy[30][2];
         // float    c_num_xx[30], c_num_xy[30], c_num_yy[30]; // condition number of stencils  
         double   *c_num_xx, *c_num_xy, *c_num_yy; // condition number of stencils  
         double   ***Axx, ***Axy, ***Ayy;
         float    u6, u7, u8, u9, u7_0, u7_1, u8_0, u8_1,
                   avg3, avg4, avg5, arrya[30], arryb[30], w[30];
         int      debug = NO, over_lap[3] = {0, 3, 6};
         float    Axxb[30][2][2], Axyb[30][2][2], Ayyb[30][2][2], 
                  c_num_xxb[30], c_num_xyb[30], c_num_yyb[30],
                  coefxxb[30][2], coefxyb[30][2], coefyyb[30][2];
         float    gnbcent[20][3], gnbuxxave[20][8], gnbuxyave[20][8], gnbuyyave[20][8], area;

         /**
         if(tri->id == 1178)
         {
             printf("Subcell_limiting_3rd_degreeP3_PNC for tri[%d]\n", tri->id);
             debug = YES;
         }
         ***/

         if(rk_iter == RK_STEP)
         {
             st = tri->st;
             for(i = 0; i < 3; i++)
                 nbst[i] = nbtri[i]->st;
         }
         else
         {
             st = midsoln[tri->id].st[rk_iter];
             for(i = 0; i < 3; i++)
                 nbst[i] = midsoln[nbtri[i]->id].st[rk_iter];
         }
         st2 = midsoln[tri->id].st[0];
         Set_params(st2,st);
         set_type_of_state(st2,state_type(st));

         /////////// TMP, make P^2
         /**
         for(i = 6; i < MAX_N_COEF; i++)
         {
              dg_Dens(st2)[i] = 0.0; dg_Mom(st2)[0][i] = 0.0;
              dg_Mom(st2)[1][i] = 0.0; dg_Energy(st2)[i] = 0.0;
         }
         return;
         **/
         ////////// END TMP

         cent = fg_centroid(tri);
         // diam = sqr(fg_diam(tri));
         // diam = fg_diam(tri);
	 diam = 1.0;
         
         /*
         if(debug == YES)
         {
             printf("neighboring CVs found = %d\n", num_CVs);
             for(i = 0; i < num_CVs; i++)
             {
                 printf("indx[%d] nbtri[%d], nbcv_indx[%d]\n",i, tris[i]->id, nbcv_indx[i]);
                 print_tri_crds(tris[i]);
             }
         }
         **/

         // num_CVs = get_tri_edge_vert_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_near_neighbr_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // current lax test
         // num_CVs = get_tri_near_neighbr_CV_stencil_ver2(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_center_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         num_CVs = get_tri_near_neighbr_overlap_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_liu_neighbr_overlap_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
  
         /// num_CVs = get_tri_CV_stencil_unstable_SV_P2_partition(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         for(i = 0; i < num_CVs; i++)
             nbcent[i] = tris[i]->CVcent[nbcv_indx[i]];

         u_average_indx(tri,st,3,uxxave);
         u_average_indx(tri,st,4,uxyave);
         u_average_indx(tri,st,5,uyyave);                 
         for(k = 0; k < N_EQN; k++)
         {
             uxxave[k] *= 2.0;
             uyyave[k] *= 2.0;
         }

         //////////// NEW, whole cell to detect extreme
         /***
         for(i = 0; i < 3; i++)
         {
             u_average_indx(nbtri[i],nbst[i],3,nbuxxave[i]);
             u_average_indx(nbtri[i],nbst[i],4,nbuxyave[i]);
             u_average_indx(nbtri[i],nbst[i],5,nbuyyave[i]);
             for(k = 0; k < N_EQN; k++)
             {
                 nbuxxave[i][k] *= 2.0;
                 nbuyyave[i][k] *= 2.0;
             }
         }
         if(detect_extr == YES)
         {
             NEW_extrema_detec(uxxave,nbuxxave,3,is_bad_stenxx);
             NEW_extrema_detec(uxyave,nbuxyave,3,is_bad_stenxy);
             NEW_extrema_detec(uyyave,nbuyyave,3,is_bad_stenyy);
         }
         ***/
         //////////////////

         if(rk_iter == RK_STEP)
         {
             for(i = 0; i < num_CVs; i++)
             {
                 CV_u_average_indx(tris[i],nbcv_indx[i],3,nbuxxave[i]);
                 CV_u_average_indx(tris[i],nbcv_indx[i],4,nbuxyave[i]);
                 CV_u_average_indx(tris[i],nbcv_indx[i],5,nbuyyave[i]);                 
             }
         }
         else
         {
             for(i = 0; i < num_CVs; i++)
             {
                 CV_u_average_indx_from_store(tris[i],nbcv_indx[i],3,store[rk_iter],nbuxxave[i]);  
                 CV_u_average_indx_from_store(tris[i],nbcv_indx[i],4,store[rk_iter],nbuxyave[i]);  
                 CV_u_average_indx_from_store(tris[i],nbcv_indx[i],5,store[rk_iter],nbuyyave[i]);  
             }
         }

         for(i = 0; i < num_CVs; i++)
         {
             for(k = 0; k < N_EQN; k++)
             {
                 nbuxxave[i][k] *= 2.0;
                 nbuyyave[i][k] *= 2.0;
             }
         }
 
         ///////////// TMP
         if(debug == YES)
         {
             for(i = 0; i < num_CVs; i++)
                 printf("neighbr[%d], xx ave = %g, xy ave = %g, yy ave = %g\n",
                          i, nbuxxave[i][0], nbuxyave[i][0], nbuyyave[i][0]);
         }

         ////////////// comput regrouping averages

         for(i = 0; i < gnum_CVs; i++)
         {
              if(grouping_n[i] == 1)
              {
                  if(alltri_HR_sten_2_grp[tri->id].HR_sten_set_3rd == NO)
                  {
                      for(j = 0; j < dim; j++)
                          gnbcent[i][j] = nbcent[grouping_indx[i][0]][j];
                  }
                  for(k = 0; k < N_EQN; k++)
                  {
                      gnbuxxave[i][k] = nbuxxave[grouping_indx[i][0]][k];
                      gnbuxyave[i][k] = nbuxyave[grouping_indx[i][0]][k];
                      gnbuyyave[i][k] = nbuyyave[grouping_indx[i][0]][k];
                  }
              }
              else if(grouping_n[i] == 2)
              {
                  area = tris[grouping_indx[i][0]]->CVmass_matrix[nbcv_indx[grouping_indx[i][0]]][0] +
                         tris[grouping_indx[i][1]]->CVmass_matrix[nbcv_indx[grouping_indx[i][1]]][0];
                  if(alltri_HR_sten_2_grp[tri->id].HR_sten_set_3rd == NO)
                  {
                      for(j = 0; j < dim; j++)
                      {
                          gnbcent[i][j] = (nbcent[grouping_indx[i][0]][j]*
                                         tris[grouping_indx[i][0]]->CVmass_matrix[nbcv_indx[grouping_indx[i][0]]][0] +
                                       nbcent[grouping_indx[i][1]][j]*
                                         tris[grouping_indx[i][1]]->CVmass_matrix[nbcv_indx[grouping_indx[i][1]]][0]
                                      )/area;
                      }
                  }
                  for(k = 0; k < N_EQN; k++)
                  {
                      gnbuxxave[i][k] = (nbuxxave[grouping_indx[i][0]][k]*
                                         tris[grouping_indx[i][0]]->CVmass_matrix[nbcv_indx[grouping_indx[i][0]]][0] +
                                         nbuxxave[grouping_indx[i][1]][k]*
                                         tris[grouping_indx[i][1]]->CVmass_matrix[nbcv_indx[grouping_indx[i][1]]][0]
                                        )/area;
                      gnbuxyave[i][k] = (nbuxyave[grouping_indx[i][0]][k]*
                                          tris[grouping_indx[i][0]]->CVmass_matrix[nbcv_indx[grouping_indx[i][0]]][0] +
                                         nbuxyave[grouping_indx[i][1]][k]*
                                          tris[grouping_indx[i][1]]->CVmass_matrix[nbcv_indx[grouping_indx[i][1]]][0]
                                        )/area;
                      gnbuyyave[i][k] = (nbuyyave[grouping_indx[i][0]][k]*
                                         tris[grouping_indx[i][0]]->CVmass_matrix[nbcv_indx[grouping_indx[i][0]]][0] +
                                         nbuyyave[grouping_indx[i][1]][k]*
                                         tris[grouping_indx[i][1]]->CVmass_matrix[nbcv_indx[grouping_indx[i][1]]][0]
                                        )/area;
                  }
              }
              else if(grouping_n[i] == 3)
              {
                  area = tris[grouping_indx[i][0]]->CVmass_matrix[nbcv_indx[grouping_indx[i][0]]][0] +
                         tris[grouping_indx[i][1]]->CVmass_matrix[nbcv_indx[grouping_indx[i][1]]][0] +
                         tris[grouping_indx[i][2]]->CVmass_matrix[nbcv_indx[grouping_indx[i][2]]][0];
                  if(alltri_HR_sten_2_grp[tri->id].HR_sten_set_3rd == NO)
                  {
                      for(j = 0; j < dim; j++)
                      {
                          gnbcent[i][j] = (nbcent[grouping_indx[i][0]][j]*
                                         tris[grouping_indx[i][0]]->CVmass_matrix[nbcv_indx[grouping_indx[i][0]]][0] +
                                       nbcent[grouping_indx[i][1]][j]*
                                         tris[grouping_indx[i][1]]->CVmass_matrix[nbcv_indx[grouping_indx[i][1]]][0] +
                                       nbcent[grouping_indx[i][2]][j]*
                                         tris[grouping_indx[i][2]]->CVmass_matrix[nbcv_indx[grouping_indx[i][2]]][0]
                                      )/area;
                      }
                  }
                  for(k = 0; k < N_EQN; k++)
                  {
                      gnbuxxave[i][k] = (nbuxxave[grouping_indx[i][0]][k]*
                                         tris[grouping_indx[i][0]]->CVmass_matrix[nbcv_indx[grouping_indx[i][0]]][0] +
                                         nbuxxave[grouping_indx[i][1]][k]*
                                         tris[grouping_indx[i][1]]->CVmass_matrix[nbcv_indx[grouping_indx[i][1]]][0] +
                                         nbuxxave[grouping_indx[i][2]][k]*
                                         tris[grouping_indx[i][2]]->CVmass_matrix[nbcv_indx[grouping_indx[i][2]]][0] 
                                        )/area;
                      gnbuxyave[i][k] = (nbuxyave[grouping_indx[i][0]][k]*
                                          tris[grouping_indx[i][0]]->CVmass_matrix[nbcv_indx[grouping_indx[i][0]]][0] +
                                         nbuxyave[grouping_indx[i][1]][k]*
                                          tris[grouping_indx[i][1]]->CVmass_matrix[nbcv_indx[grouping_indx[i][1]]][0] +
                                         nbuxyave[grouping_indx[i][2]][k]*
                                          tris[grouping_indx[i][2]]->CVmass_matrix[nbcv_indx[grouping_indx[i][2]]][0] 
                                        )/area;
                      gnbuyyave[i][k] = (nbuyyave[grouping_indx[i][0]][k]*
                                         tris[grouping_indx[i][0]]->CVmass_matrix[nbcv_indx[grouping_indx[i][0]]][0] +
                                         nbuyyave[grouping_indx[i][1]][k]*
                                         tris[grouping_indx[i][1]]->CVmass_matrix[nbcv_indx[grouping_indx[i][1]]][0] +
                                         nbuyyave[grouping_indx[i][2]][k]*
                                         tris[grouping_indx[i][2]]->CVmass_matrix[nbcv_indx[grouping_indx[i][2]]][0] 
                                        )/area;
                  }
              }
         } 

         if(detect_extr == YES)
         {
             /**
             u_average_indx(tri,st,3,tmpuxxave);
             u_average_indx(tri,st,4,tmpuxyave);
             u_average_indx(tri,st,5,tmpuyyave);
             for(k = 0; k < N_EQN; k++)
             {
                 tmpuxxave[k] *= 2.0;
                 tmpuyyave[k] *= 2.0;
             }
             for(i = 0; i < 3; i++)
             {
                 u_average_indx(nbtri[i],nbst[i],3,tmpnbuxxave[i]);
                 u_average_indx(nbtri[i],nbst[i],4,tmpnbuxyave[i]);
                 u_average_indx(nbtri[i],nbst[i],5,tmpnbuyyave[i]);
                 for(k = 0; k < N_EQN; k++)
                 {
                     tmpnbuxxave[i][k] *= 2.0;
                     tmpnbuyyave[i][k] *= 2.0;
                 }
             }
             **/
             // NEW_extrema_detec(uxxave,nbuxxave,3,is_bad_stenxx);
             // NEW_extrema_detec(uxyave,nbuxyave,3,is_bad_stenxy);
             // NEW_extrema_detec(uyyave,nbuyyave,3,is_bad_stenyy);

             NEW_extrema_detec(uxxave,gnbuxxave,gnum_CVs,is_bad_stenxx);
             NEW_extrema_detec(uxyave,gnbuxyave,gnum_CVs,is_bad_stenxy);
             NEW_extrema_detec(uyyave,gnbuyyave,gnum_CVs,is_bad_stenyy);
         }

         if(alltri_HR_sten_2_grp[tri->id].HR_sten_set_3rd == NO)
         {
             //// 3rd degree coef. reconstruction stencil storage
             tri_array(&(Axx),gnum_CVs,2,2,sizeof(double));
             tri_array(&(Axy),gnum_CVs,2,2,sizeof(double));
             tri_array(&(Ayy),gnum_CVs,2,2,sizeof(double));
             vector(&(c_num_xx),gnum_CVs,sizeof(double));
             vector(&(c_num_xy),gnum_CVs,sizeof(double));
             vector(&(c_num_yy),gnum_CVs,sizeof(double));

             for(i = 0; i < gnum_CVs; i++)
             {
                 Axx[i][0][0] = 6.0*(gnbcent[i][0]-cent[0]);
                 Axx[i][0][1] = 2.0*(gnbcent[i][1]-cent[1]);
                 Axx[i][1][0] = 6.0*(gnbcent[(i+1)%gnum_CVs][0]-cent[0]);
                 Axx[i][1][1] = 2.0*(gnbcent[(i+1)%gnum_CVs][1]-cent[1]);
                 // c_num_xx[i] = cond_num(Axx[i]);
                 c_num_xx[i] = cond_num2(Axx,i);

                 Axy[i][0][0] = 2.0*(gnbcent[i][0]-cent[0]);
                 Axy[i][0][1] = 2.0*(gnbcent[i][1]-cent[1]);
                 Axy[i][1][0] = 2.0*(gnbcent[(i+1)%gnum_CVs][0]-cent[0]);
                 Axy[i][1][1] = 2.0*(gnbcent[(i+1)%gnum_CVs][1]-cent[1]);
                 // c_num_xy[i] = cond_num(Axy[i]);
                 c_num_xy[i] = cond_num2(Axy,i);

                 Ayy[i][0][0] = 2.0*(gnbcent[i][0]-cent[0]);
                 Ayy[i][0][1] = 6.0*(gnbcent[i][1]-cent[1]);
                 Ayy[i][1][0] = 2.0*(gnbcent[(i+1)%gnum_CVs][0]-cent[0]);
                 Ayy[i][1][1] = 6.0*(gnbcent[(i+1)%gnum_CVs][1]-cent[1]);
                 // c_num_yy[i] = cond_num(Ayy[i]);
                 c_num_yy[i] = cond_num2(Ayy,i);
             }
             alltri_HR_sten_2_grp[tri->id].c_num_xx = c_num_xx;
             alltri_HR_sten_2_grp[tri->id].c_num_xy = c_num_xy;
             alltri_HR_sten_2_grp[tri->id].c_num_yy = c_num_yy;
             alltri_HR_sten_2_grp[tri->id].Axx = Axx;
             alltri_HR_sten_2_grp[tri->id].Axy = Axy;
             alltri_HR_sten_2_grp[tri->id].Ayy = Ayy;
             alltri_HR_sten_2_grp[tri->id].HR_sten_set_3rd = YES;
         }
         else
         {
             Axx = alltri_HR_sten_2_grp[tri->id].Axx; 
             Axy = alltri_HR_sten_2_grp[tri->id].Axy;
             Ayy = alltri_HR_sten_2_grp[tri->id].Ayy;
             c_num_xx = alltri_HR_sten_2_grp[tri->id].c_num_xx; 
             c_num_xy = alltri_HR_sten_2_grp[tri->id].c_num_xy;
             c_num_yy = alltri_HR_sten_2_grp[tri->id].c_num_yy;
         }

         for(k = 0; k < N_EQN; k++)
         {
             for(i = 0; i < gnum_CVs; i++)
             {
                 rside[0] = gnbuxxave[i][k] - uxxave[k];
                 rside[1] = gnbuxxave[(i+1)%gnum_CVs][k] - uxxave[k];
                 // comp_coef(Axx[i],rside,coefxx[i]);
                 comp_coef2(Axx,rside,coefxx[i],i);

                 rside[0] = gnbuxyave[i][k] - uxyave[k];
                 rside[1] = gnbuxyave[(i+1)%gnum_CVs][k] - uxyave[k];
                 // comp_coef(Axy[i],rside,coefxy[i]);
                 comp_coef2(Axy,rside,coefxy[i],i);

                 rside[0] = gnbuyyave[i][k] - uyyave[k];
                 rside[1] = gnbuyyave[(i+1)%gnum_CVs][k] - uyyave[k];
                 comp_coef2(Ayy,rside,coefyy[i],i);
                 //// TMP, debug
                 // if(k == 0 && debug == YES)
                 //     printf("sten[%d], solved xyy[%g] yyy[%g]\n", i, coefyy[i][0], coefyy[i][1]);
             }
             /////////////////// WENO
             ///// u_xx polynomial
             for(i = 0; i < gnum_CVs; i++)
             {
                 arrya[i] = coefxx[i][0];
                 arryb[i] = coefxx[i][1];
             }
             //// use every other stencil
	     /*
             for(i = 0; i < num_CVs; i++)
             {
                 arrya[i+num_CVs] = coefxxb[i][0];
                 arryb[i+num_CVs] = coefxxb[i][1];
             }
	     */
             ////END:  use every other stencil
             WENO_mod_on_3rd(arrya,arryb,c_num_xx,gnum_CVs,diam,w);
             u7_0 = u6 = 0.0;
             for(i = 0; i < gnum_CVs; i++)
             {
                 u6   += w[i]*coefxx[i][0];
                 u7_0 += w[i]*coefxx[i][1];
             }
             /// use every other stencil
	     /*
             for(i = 0; i < num_CVs; i++)
             {
                 u6   += w[i+num_CVs]*coefxxb[i][0];
                 u7_0 += w[i+num_CVs]*coefxxb[i][1];
             }
	     */
             ///END: use every other stencil
             if(is_bad_stenxx[k] == YES && detect_extr == YES)
             {
                 u6 = 0.0; u7_0 = 0.0;
             }
             ///// u_xy polynomial
             for(i = 0; i < gnum_CVs; i++)
             {
                 arrya[i] = coefxy[i][0];
                 arryb[i] = coefxy[i][1];
             }
             /// use every other stencil
	     /*
             for(i = 0; i < num_CVs; i++)
             {
                 arrya[i+num_CVs] = coefxyb[i][0];
                 arryb[i+num_CVs] = coefxyb[i][1];
             }
	     */
             ///END: use every other stencil
             WENO_mod_on_3rd(arrya,arryb,c_num_xy,gnum_CVs,diam,w);
             // area_WENO_mod_on_3rd(arrya,arryb,c_num_xy,sten_n,diam,area,w);
             u7_1 = u8_0 = 0.0;
             for(i = 0; i < gnum_CVs; i++)
             {
                 u7_1 += w[i]*coefxy[i][0];
                 u8_0 += w[i]*coefxy[i][1];
             }
             if(is_bad_stenxy[k] == YES && detect_extr == YES)
             {
                 u7_1 = 0.0; u8_0 = 0.0;
             }
             ///// u_yy polynomial
             for(i = 0; i < gnum_CVs; i++)
             {
                 arrya[i] = coefyy[i][0];
                 arryb[i] = coefyy[i][1];
             }

             WENO_mod_on_3rd(arrya,arryb,c_num_yy,gnum_CVs,diam,w);
             // area_WENO_mod_on_3rd(arrya,arryb,c_num_yy,sten_n,diam,area,w);
             
             /////// TMP
             if(k == 0 && debug == YES)
             {
                 for(i = 0; i < gnum_CVs; i++)
                     printf("sten[%d], xyy, yyy weight = %g\n", i, w[i]);
             }
             ////// END TMP
             u8_1 = u9 = 0.0;
              
             for(i = 0; i < gnum_CVs; i++)
             {
                 u8_1 += w[i]*coefyy[i][0];
                 u9   += w[i]*coefyy[i][1];
             }
             if(is_bad_stenyy[k] == YES && detect_extr == YES)
             {
                 u8_1 = 0.0; u9 = 0.0;
             }
             u7 = minmod((1+0.05)*minmod(u7_0,u7_1), 0.5*(u7_0 + u7_1)); // eps = 0.05, 0.01
             u8 = minmod((1+0.05)*minmod(u8_0,u8_1), 0.5*(u8_0 + u8_1));
             // u7 = minmod2((1+0.05)*minmod2(u7_0,u7_1), 0.5*(u7_0 + u7_1)); // eps = 0.05, 0.01
             // u8 = minmod2((1+0.05)*minmod2(u8_0,u8_1), 0.5*(u8_0 + u8_1));
             /////////////////// End WENO
             switch(k)
             {
              case 0:
                  if(fabs(dg_Dens(st)[6]) < 1.0E-13)
                      dg_Dens(st2)[6] = 0.0;
                  else
                      dg_Dens(st2)[6] = u6;
                  if(fabs(dg_Dens(st)[7]) < 1.0E-13)
                      dg_Dens(st2)[7] = 0.0;
                  else
                      dg_Dens(st2)[7] = u7;
                  if(fabs(dg_Dens(st)[8]) < 1.0E-13)
                      dg_Dens(st2)[8] = 0.0;
                  else
                      dg_Dens(st2)[8] = u8;
                  if(fabs(dg_Dens(st)[9]) < 1.0E-13)
                      dg_Dens(st2)[9] = 0.0;
                  else
                      dg_Dens(st2)[9] = u9;
              break;
              case 1:
                  if(fabs(dg_Mom(st)[0][6]) < 1.0E-13)
                      dg_Mom(st2)[0][6] = 0.0;
                  else
                      dg_Mom(st2)[0][6] = u6;
                  if(fabs(dg_Mom(st)[0][7]) < 1.0E-13)
                      dg_Mom(st2)[0][7] = 0.0;
                  else
                      dg_Mom(st2)[0][7] = u7;
                  if(fabs(dg_Mom(st)[0][8]) < 1.0E-13)
                      dg_Mom(st2)[0][8] = 0.0;
                  else
                      dg_Mom(st2)[0][8] = u8;
                  if(fabs(dg_Mom(st)[0][9]) < 1.0E-13)
                      dg_Mom(st2)[0][9] = 0.0;
                  else
                      dg_Mom(st2)[0][9] = u9;
              break;
              case 2:
                  if(fabs(dg_Mom(st)[1][6]) < 1.0E-13)
                      dg_Mom(st2)[1][6] = 0.0;
                  else
                      dg_Mom(st2)[1][6] = u6;
                  if(fabs(dg_Mom(st)[1][7]) < 1.0E-13)
                      dg_Mom(st2)[1][7] = 0.0;
                  else
                      dg_Mom(st2)[1][7] = u7;
                  if(fabs(dg_Mom(st)[1][8]) < 1.0E-13)
                      dg_Mom(st2)[1][8] = 0.0;
                  else
                      dg_Mom(st2)[1][8] = u8;
                  if(fabs(dg_Mom(st)[1][9]) < 1.0E-13)
                      dg_Mom(st2)[1][9] = 0.0;
                  else
                      dg_Mom(st2)[1][9] = u9;
              break;
              case 3:
                  if(fabs(dg_Energy(st)[6]) < 1.0E-13)
                      dg_Energy(st2)[6] = 0.0;
                  else
                      dg_Energy(st2)[6] = u6;
                  if(fabs(dg_Energy(st)[7]) < 1.0E-13)
                      dg_Energy(st2)[7] = 0.0;
                  else
                      dg_Energy(st2)[7] = u7;
                  if(fabs(dg_Energy(st)[8]) < 1.0E-13)
                      dg_Energy(st2)[8] = 0.0;
                  else
                      dg_Energy(st2)[8] = u8;
                  if(fabs(dg_Energy(st)[9]) < 1.0E-13)
                      dg_Energy(st2)[9] = 0.0;
                  else
                      dg_Energy(st2)[9] = u9;
              break;
             }
         }
}

// average of d_x u polynomial
LOCAL void CV_u_average_indx_from_store(
         TRI       *tri,
         int       cv_indx,
         int       indx,
         Limiting_store *store,
         float     *uave)
{
         uave[0] = store[tri->id].cv_dg_rho[cv_indx][indx];
         if(N_EQN == 1) return;

         uave[1] = store[tri->id].cv_dg_m[0][cv_indx][indx];
         uave[2] = store[tri->id].cv_dg_m[1][cv_indx][indx];
         uave[3] = store[tri->id].cv_dg_e[cv_indx][indx];
}

// d_x u polynomial
LOCAL void CV_du_indx_from_store(
         TRI       *tri,
         int       cv_indx,
         int       indx,
         Limiting_store *store,
         double    *cent,
         double    *pt,
         float     *uave)
{
         if(indx == 1)
         {
             uave[0] = store[tri->id].cv_dg_rho[cv_indx][indx] +
                       2.0*store[tri->id].cv_dg_rho[cv_indx][3]*(cent[0]-pt[0]) +
                       store[tri->id].cv_dg_rho[cv_indx][4]*(cent[1]-pt[1]) +
                       3.0*store[tri->id].cv_dg_rho[cv_indx][6]*sqr(cent[0]-pt[0]) +
                       2.0*store[tri->id].cv_dg_rho[cv_indx][7]*(cent[0]-pt[0])*(cent[1]-pt[1]) +
                       store[tri->id].cv_dg_rho[cv_indx][8]*sqr(cent[1]-pt[1]);

             if(N_EQN == 1) return;

             uave[1] = store[tri->id].cv_dg_m[0][cv_indx][indx] +
                       2.0*store[tri->id].cv_dg_m[0][cv_indx][3]*(cent[0]-pt[0]) +
                       store[tri->id].cv_dg_m[0][cv_indx][4]*(cent[1]-pt[1]) +
                       3.0*store[tri->id].cv_dg_m[0][cv_indx][6]*sqr(cent[0]-pt[0]) +
                       2.0*store[tri->id].cv_dg_m[0][cv_indx][7]*(cent[0]-pt[0])*(cent[1]-pt[1]) +
                       store[tri->id].cv_dg_m[0][cv_indx][8]*sqr(cent[1]-pt[1]);

             uave[2] = store[tri->id].cv_dg_m[1][cv_indx][indx] +
                       2.0*store[tri->id].cv_dg_m[1][cv_indx][3]*(cent[0]-pt[0]) +
                       store[tri->id].cv_dg_m[1][cv_indx][4]*(cent[1]-pt[1]) +
                       3.0*store[tri->id].cv_dg_m[1][cv_indx][6]*sqr(cent[0]-pt[0]) +
                       2.0*store[tri->id].cv_dg_m[1][cv_indx][7]*(cent[0]-pt[0])*(cent[1]-pt[1]) +
                       store[tri->id].cv_dg_m[1][cv_indx][8]*sqr(cent[1]-pt[1]);

             uave[3] = store[tri->id].cv_dg_e[cv_indx][indx] +
                       2.0*store[tri->id].cv_dg_e[cv_indx][3]*(cent[0]-pt[0]) +
                       store[tri->id].cv_dg_e[cv_indx][4]*(cent[1]-pt[1]) +
                       3.0*store[tri->id].cv_dg_e[cv_indx][6]*sqr(cent[0]-pt[0]) +
                       2.0*store[tri->id].cv_dg_e[cv_indx][7]*(cent[0]-pt[0])*(cent[1]-pt[1]) +
                       store[tri->id].cv_dg_e[cv_indx][8]*sqr(cent[1]-pt[1]);
         }
         else if(indx == 2)
         {
             uave[0] = store[tri->id].cv_dg_rho[cv_indx][indx] +
                       store[tri->id].cv_dg_rho[cv_indx][4]*(cent[0]-pt[0]) +
                       2.0*store[tri->id].cv_dg_rho[cv_indx][5]*(cent[1]-pt[1]) +
                       store[tri->id].cv_dg_rho[cv_indx][7]*sqr(cent[0]-pt[0]) +
                       2.0*store[tri->id].cv_dg_rho[cv_indx][8]*(cent[0]-pt[0])*(cent[1]-pt[1]) +
                       3.0*store[tri->id].cv_dg_rho[cv_indx][9]*sqr(cent[1]-pt[1]);

             if(N_EQN == 1) return;

             uave[1] = store[tri->id].cv_dg_m[0][cv_indx][indx] +
                       store[tri->id].cv_dg_m[0][cv_indx][4]*(cent[0]-pt[0]) +
                       2.0*store[tri->id].cv_dg_m[0][cv_indx][5]*(cent[1]-pt[1]) +
                       store[tri->id].cv_dg_m[0][cv_indx][7]*sqr(cent[0]-pt[0]) +
                       2.0*store[tri->id].cv_dg_m[0][cv_indx][8]*(cent[0]-pt[0])*(cent[1]-pt[1]) +
                       3.0*store[tri->id].cv_dg_m[0][cv_indx][9]*sqr(cent[1]-pt[1]);

             uave[2] = store[tri->id].cv_dg_m[1][cv_indx][indx] +
                       store[tri->id].cv_dg_m[1][cv_indx][4]*(cent[0]-pt[0]) +
                       2.0*store[tri->id].cv_dg_m[1][cv_indx][5]*(cent[1]-pt[1]) +
                       store[tri->id].cv_dg_m[1][cv_indx][7]*sqr(cent[0]-pt[0]) +
                       2.0*store[tri->id].cv_dg_m[1][cv_indx][8]*(cent[0]-pt[0])*(cent[1]-pt[1]) +
                       3.0*store[tri->id].cv_dg_m[1][cv_indx][9]*sqr(cent[1]-pt[1]);

             uave[3] = store[tri->id].cv_dg_e[cv_indx][indx] +
                       store[tri->id].cv_dg_e[cv_indx][4]*(cent[0]-pt[0]) +
                       2.0*store[tri->id].cv_dg_e[cv_indx][5]*(cent[1]-pt[1]) +
                       store[tri->id].cv_dg_e[cv_indx][7]*sqr(cent[0]-pt[0]) +
                       2.0*store[tri->id].cv_dg_e[cv_indx][8]*(cent[0]-pt[0])*(cent[1]-pt[1]) +
                       3.0*store[tri->id].cv_dg_e[cv_indx][9]*sqr(cent[1]-pt[1]);
         }
         else
         {
             printf("ERROR: CV_du_indx_from_store()\n");
             clean_up(ERROR);
         }

}


// average of d_x u polynomial
LOCAL void CV_u_average_indx(
         TRI       *tri,
         int       cv_indx,
         int       indx,
         float     *uave)
{
         uave[0] = tri->cv_soln->cv_dg_rho[cv_indx][indx];

         if(N_EQN == 1) return;

         uave[1] = tri->cv_soln->cv_dg_m[0][cv_indx][indx];
         uave[2] = tri->cv_soln->cv_dg_m[1][cv_indx][indx];
         uave[3] = tri->cv_soln->cv_dg_e[cv_indx][indx];
}

// only support P3 polynomial case
// compute average of derivatives.
LOCAL void CV_u_average_indx_MHD(
         TRI       *tri,
         int       cv_indx,
         int       indx,
         Locstate  st,
         float     *uave)
{
         int i;
         double  **mass = tri->CVmass_matrix;

         if(indx == 1)
         {
             uave[0] = dg_Dens(st)[1]*mass[cv_indx][0] + 2.0*dg_Dens(st)[3]*mass[cv_indx][1] 
                     + dg_Dens(st)[4]*mass[cv_indx][2] + 3.0*dg_Dens(st)[6]*mass[cv_indx][3]
                 + 2.0*dg_Dens(st)[7]*mass[cv_indx][4] +     dg_Dens(st)[8]*mass[cv_indx][5];
             uave[4] = dg_Energy(st)[1]*mass[cv_indx][0] + 2.0*dg_Energy(st)[3]*mass[cv_indx][1] 
                     + dg_Energy(st)[4]*mass[cv_indx][2] + 3.0*dg_Energy(st)[6]*mass[cv_indx][3]
                 + 2.0*dg_Energy(st)[7]*mass[cv_indx][4] +     dg_Energy(st)[8]*mass[cv_indx][5];
             for(i = 0; i < 3; i++)
                 uave[i+1] = dg_Mom(st)[i][1]*mass[cv_indx][0] + 2.0*dg_Mom(st)[i][3]*mass[cv_indx][1] 
                           + dg_Mom(st)[i][4]*mass[cv_indx][2] + 3.0*dg_Mom(st)[i][6]*mass[cv_indx][3]
                       + 2.0*dg_Mom(st)[i][7]*mass[cv_indx][4] +     dg_Mom(st)[i][8]*mass[cv_indx][5];
             for(i = 0; i < 3; i++)
                 uave[i+5] = dg_B(st)[i][1]*mass[cv_indx][0] + 2.0*dg_B(st)[i][3]*mass[cv_indx][1] 
                           + dg_B(st)[i][4]*mass[cv_indx][2] + 3.0*dg_B(st)[i][6]*mass[cv_indx][3]
                           + 2.0*dg_B(st)[i][7]*mass[cv_indx][4] + dg_B(st)[i][8]*mass[cv_indx][5];
         }
         else if(indx == 2)
         {
             uave[0] = dg_Dens(st)[2]*mass[cv_indx][0] +     dg_Dens(st)[4]*mass[cv_indx][1] 
                 + 2.0*dg_Dens(st)[5]*mass[cv_indx][2] +     dg_Dens(st)[7]*mass[cv_indx][3]
                 + 2.0*dg_Dens(st)[8]*mass[cv_indx][4] + 3.0*dg_Dens(st)[9]*mass[cv_indx][5];
             uave[4] = dg_Energy(st)[2]*mass[cv_indx][0] +     dg_Energy(st)[4]*mass[cv_indx][1] 
                 + 2.0*dg_Energy(st)[5]*mass[cv_indx][2] +     dg_Energy(st)[7]*mass[cv_indx][3]
                 + 2.0*dg_Energy(st)[8]*mass[cv_indx][4] + 3.0*dg_Energy(st)[9]*mass[cv_indx][5];
             for(i = 0; i < 3; i++)
                 uave[i+1] = dg_Mom(st)[i][2]*mass[cv_indx][0] +     dg_Mom(st)[i][4]*mass[cv_indx][1] 
                       + 2.0*dg_Mom(st)[i][5]*mass[cv_indx][2] +     dg_Mom(st)[i][7]*mass[cv_indx][3]
                       + 2.0*dg_Mom(st)[i][8]*mass[cv_indx][4] + 3.0*dg_Mom(st)[i][9]*mass[cv_indx][5];
             for(i = 0; i < 3; i++)
                 uave[i+5] = dg_B(st)[i][2]*mass[cv_indx][0] +     dg_B(st)[i][4]*mass[cv_indx][1] 
                       + 2.0*dg_B(st)[i][5]*mass[cv_indx][2] +     dg_B(st)[i][7]*mass[cv_indx][3]
                       + 2.0*dg_B(st)[i][8]*mass[cv_indx][4] + 3.0*dg_B(st)[i][9]*mass[cv_indx][5];
         }
         else if(indx == 3)
         { 
             uave[0] = 2.0*dg_Dens(st)[3]*mass[cv_indx][0] + 6.0*dg_Dens(st)[6]*mass[cv_indx][1]
                     + 2.0*dg_Dens(st)[7]*mass[cv_indx][2];
             uave[4] = 2.0*dg_Energy(st)[3]*mass[cv_indx][0] + 6.0*dg_Energy(st)[6]*mass[cv_indx][1]
                     + 2.0*dg_Energy(st)[7]*mass[cv_indx][2];
             for(i = 0; i < 3; i++)
                 uave[i+1] = 2.0*dg_Mom(st)[i][3]*mass[cv_indx][0] + 6.0*dg_Mom(st)[i][6]*mass[cv_indx][1]
                           + 2.0*dg_Mom(st)[i][7]*mass[cv_indx][2];
             for(i = 0; i < 3; i++)
                 uave[i+5] = 2.0*dg_B(st)[i][3]*mass[cv_indx][0] + 6.0*dg_B(st)[i][6]*mass[cv_indx][1]
                           + 2.0*dg_B(st)[i][7]*mass[cv_indx][2];
         }
         else if(indx == 4)
         {
             uave[0] = dg_Dens(st)[4]*mass[cv_indx][0] + 2.0*dg_Dens(st)[7]*mass[cv_indx][1]
                 + 2.0*dg_Dens(st)[8]*mass[cv_indx][2];
             uave[4] = dg_Energy(st)[4]*mass[cv_indx][0] + 2.0*dg_Energy(st)[7]*mass[cv_indx][1]
                 + 2.0*dg_Energy(st)[8]*mass[cv_indx][2];
             for(i = 0; i < 3; i++)
                 uave[i+1] = dg_Mom(st)[i][4]*mass[cv_indx][0] + 2.0*dg_Mom(st)[i][7]*mass[cv_indx][1]
                       + 2.0*dg_Mom(st)[i][8]*mass[cv_indx][2];
             for(i = 0; i < 3; i++)
                 uave[i+5] = dg_B(st)[i][4]*mass[cv_indx][0] + 2.0*dg_B(st)[i][7]*mass[cv_indx][1]
                       + 2.0*dg_B(st)[i][8]*mass[cv_indx][2];
         }
         else if(indx == 5)
         {
             uave[0] = 2.0*dg_Dens(st)[5]*mass[cv_indx][0] + 2.0*dg_Dens(st)[8]*mass[cv_indx][1]
                     + 6.0*dg_Dens(st)[9]*mass[cv_indx][2];
             uave[4] = 2.0*dg_Energy(st)[5]*mass[cv_indx][0] + 2.0*dg_Energy(st)[8]*mass[cv_indx][1]
                     + 6.0*dg_Energy(st)[9]*mass[cv_indx][2];
             for(i = 0; i < 3; i++)
                 uave[i+1] = 2.0*dg_Mom(st)[i][5]*mass[cv_indx][0] + 2.0*dg_Mom(st)[i][8]*mass[cv_indx][1]
                           + 6.0*dg_Mom(st)[i][9]*mass[cv_indx][2];
             for(i = 0; i < 3; i++)
                 uave[i+5] = 2.0*dg_B(st)[i][5]*mass[cv_indx][0] + 2.0*dg_B(st)[i][8]*mass[cv_indx][1]
                           + 6.0*dg_B(st)[i][9]*mass[cv_indx][2];
         }
         else
         {
             printf("ERROR: CV_u_average_indx_MHD()\n");
             clean_up(ERROR);
         }

         for(i = 0; i < N_EQN; i++)
             uave[i] /= mass[cv_indx][0];
}


// average of d_x u polynomial
LOCAL void CV_du_indx(
         TRI       *tri,
         int       cv_indx,
         int       indx,
         double    *cent,
         double    *pt,
         float     *uave)
{
         if(indx == 1)
         {
             uave[0] = tri->cv_soln->cv_dg_rho[cv_indx][indx] +
                       2.0*tri->cv_soln->cv_dg_rho[cv_indx][3]*(cent[0]-pt[0]) +
                       tri->cv_soln->cv_dg_rho[cv_indx][4]*(cent[1]-pt[1]) +
                       3.0*tri->cv_soln->cv_dg_rho[cv_indx][6]*sqr(cent[0]-pt[0]) +
                       2.0*tri->cv_soln->cv_dg_rho[cv_indx][7]*(cent[0]-pt[0])*(cent[1]-pt[1]) +
                       tri->cv_soln->cv_dg_rho[cv_indx][8]*sqr(cent[1]-pt[1]);

             if(N_EQN == 1) return;

             uave[1] = tri->cv_soln->cv_dg_m[0][cv_indx][indx] +
                       2.0*tri->cv_soln->cv_dg_m[0][cv_indx][3]*(cent[0]-pt[0]) +
                       tri->cv_soln->cv_dg_m[0][cv_indx][4]*(cent[1]-pt[1]) +
                       3.0*tri->cv_soln->cv_dg_m[0][cv_indx][6]*sqr(cent[0]-pt[0]) +
                       2.0*tri->cv_soln->cv_dg_m[0][cv_indx][7]*(cent[0]-pt[0])*(cent[1]-pt[1]) +
                       tri->cv_soln->cv_dg_m[0][cv_indx][8]*sqr(cent[1]-pt[1]);

             uave[2] = tri->cv_soln->cv_dg_m[1][cv_indx][indx] +
                       2.0*tri->cv_soln->cv_dg_m[1][cv_indx][3]*(cent[0]-pt[0]) +
                       tri->cv_soln->cv_dg_m[1][cv_indx][4]*(cent[1]-pt[1]) +
                       3.0*tri->cv_soln->cv_dg_m[1][cv_indx][6]*sqr(cent[0]-pt[0]) +
                       2.0*tri->cv_soln->cv_dg_m[1][cv_indx][7]*(cent[0]-pt[0])*(cent[1]-pt[1]) +
                       tri->cv_soln->cv_dg_m[1][cv_indx][8]*sqr(cent[1]-pt[1]);

             uave[3] = tri->cv_soln->cv_dg_e[cv_indx][indx] +
                       2.0*tri->cv_soln->cv_dg_e[cv_indx][3]*(cent[0]-pt[0]) +
                       tri->cv_soln->cv_dg_e[cv_indx][4]*(cent[1]-pt[1]) +
                       3.0*tri->cv_soln->cv_dg_e[cv_indx][6]*sqr(cent[0]-pt[0]) +
                       2.0*tri->cv_soln->cv_dg_e[cv_indx][7]*(cent[0]-pt[0])*(cent[1]-pt[1]) +
                       tri->cv_soln->cv_dg_e[cv_indx][8]*sqr(cent[1]-pt[1]);
         }
         else if(indx == 2)
         {
             uave[0] = tri->cv_soln->cv_dg_rho[cv_indx][indx] +
                       tri->cv_soln->cv_dg_rho[cv_indx][4]*(cent[0]-pt[0]) +
                       2.0*tri->cv_soln->cv_dg_rho[cv_indx][5]*(cent[1]-pt[1]) +
                       tri->cv_soln->cv_dg_rho[cv_indx][7]*sqr(cent[0]-pt[0]) +
                       2.0*tri->cv_soln->cv_dg_rho[cv_indx][8]*(cent[0]-pt[0])*(cent[1]-pt[1]) +
                       3.0*tri->cv_soln->cv_dg_rho[cv_indx][9]*sqr(cent[1]-pt[1]);

             if(N_EQN == 1) return;

             uave[1] = tri->cv_soln->cv_dg_m[0][cv_indx][indx] +
                       tri->cv_soln->cv_dg_m[0][cv_indx][4]*(cent[0]-pt[0]) +
                       2.0*tri->cv_soln->cv_dg_m[0][cv_indx][5]*(cent[1]-pt[1]) +
                       tri->cv_soln->cv_dg_m[0][cv_indx][7]*sqr(cent[0]-pt[0]) +
                       2.0*tri->cv_soln->cv_dg_m[0][cv_indx][8]*(cent[0]-pt[0])*(cent[1]-pt[1]) +
                       3.0*tri->cv_soln->cv_dg_m[0][cv_indx][9]*sqr(cent[1]-pt[1]);

             uave[2] = tri->cv_soln->cv_dg_m[1][cv_indx][indx] +
                       tri->cv_soln->cv_dg_m[1][cv_indx][4]*(cent[0]-pt[0]) +
                       2.0*tri->cv_soln->cv_dg_m[1][cv_indx][5]*(cent[1]-pt[1]) +
                       tri->cv_soln->cv_dg_m[1][cv_indx][7]*sqr(cent[0]-pt[0]) +
                       2.0*tri->cv_soln->cv_dg_m[1][cv_indx][8]*(cent[0]-pt[0])*(cent[1]-pt[1]) +
                       3.0*tri->cv_soln->cv_dg_m[1][cv_indx][9]*sqr(cent[1]-pt[1]);

             uave[3] = tri->cv_soln->cv_dg_e[cv_indx][indx] +
                       tri->cv_soln->cv_dg_e[cv_indx][4]*(cent[0]-pt[0]) +
                       2.0*tri->cv_soln->cv_dg_e[cv_indx][5]*(cent[1]-pt[1]) +
                       tri->cv_soln->cv_dg_e[cv_indx][7]*sqr(cent[0]-pt[0]) +
                       2.0*tri->cv_soln->cv_dg_e[cv_indx][8]*(cent[0]-pt[0])*(cent[1]-pt[1]) +
                       3.0*tri->cv_soln->cv_dg_e[cv_indx][9]*sqr(cent[1]-pt[1]);
         }
         else
         {
             printf("ERROR: CV_du_indx()\n");
             clean_up(ERROR);
         }
}

// Collect partial neighboring cells from 9 cells configuration
EXPORT int get_tri_neighbr_CV_stencil_from_set_HR_sten(
         TRI       *tri,
         TRI       *nbtri[],
         int       cv_indx, // useless
         int       *nbcv_indx,
         int       *nbcv_side,
         TRI       **tris,
         int       *n_tri)
{
         POINT  *vert = NULL;
         int    on_SV_side, nbSV_side, next_side, side;
         TRI    *nntri[40], *tmp, *tmp2, *prevtri = NULL;
         int    nn_num, cv_count = 0, i;

         //////////////// CVs on nbtri[0] in the counter-clock-wise direction
         for(nbSV_side = 0; nbSV_side < 3; nbSV_side++)
         {
             if(tri == Tri_on_side(nbtri[0],nbSV_side))
                 break;
         }
         tris[cv_count] = nbtri[0];
         nbcv_indx[cv_count] = (nbSV_side+1)%3;
         nbcv_side[cv_count] = nbSV_side;
         cv_count++;

         tris[cv_count] = nbtri[0];
         nbcv_indx[cv_count] = 3;
         nbcv_side[cv_count] = -1;
         cv_count++;

         tris[cv_count] = nbtri[0];
         nbcv_indx[cv_count] = nbSV_side;
         nbcv_side[cv_count] = nbSV_side;
         cv_count++;

         /// find nbtri[0] upper side tri, 2222 (in counter clock-wise direction)
         tmp = Tri_on_side(nbtri[0],(nbSV_side+2)%3);
 
         for(side = 0; side < 3; side++)
         {
             if(nbtri[0] == Tri_on_side(tmp,side))
                 break;  
         }
         tris[cv_count] = tmp;
         nbcv_indx[cv_count] = side;
         nbcv_side[cv_count] = -1;
         cv_count++;
         
         // find nbtri[1] lower side tri, 3333
         for(side = 0; side < 3; side++)
         { 
             if(tri == Tri_on_side(nbtri[1],side))
                 break;
         }
         tmp = Tri_on_side(nbtri[1],(side+1)%3);

         for(nbSV_side = 0; nbSV_side < 3; nbSV_side++)
         {
             if(nbtri[1] == Tri_on_side(tmp,nbSV_side))
                 break;
         }
         tris[cv_count] = tmp;
         nbcv_indx[cv_count] = (nbSV_side+1)%3;
         nbcv_side[cv_count] = -1;
         cv_count++;

         /// CVs on nbtri[1] in the counter-clock-wise direction
         for(nbSV_side = 0; nbSV_side < 3; nbSV_side++)
         {
             if(tri == Tri_on_side(nbtri[1],nbSV_side))
                 break;
         }
         tris[cv_count] = nbtri[1];
         nbcv_indx[cv_count] = (nbSV_side+1)%3;
         nbcv_side[cv_count] = nbSV_side;
         cv_count++;

         tris[cv_count] = nbtri[1];
         nbcv_indx[cv_count] = 3;
         nbcv_side[cv_count] = -1;
         cv_count++;

         tris[cv_count] = nbtri[1];
         nbcv_indx[cv_count] = nbSV_side;
         nbcv_side[cv_count] = nbSV_side;
         cv_count++;

         // find nbtri[1] upper side tri, 5555
         tmp = Tri_on_side(nbtri[1],(nbSV_side+2)%3);
         for(nbSV_side = 0; nbSV_side < 3; nbSV_side++)
         {
             if(nbtri[1] == Tri_on_side(tmp,nbSV_side))
                 break;
         }
         tris[cv_count] = tmp;
         nbcv_indx[cv_count] = nbSV_side;
         nbcv_side[cv_count] = -1;
         cv_count++;

         /// find nbtri[2], lower side tri, 6666
         for(side = 0; side < 3; side++)
         {
             if(tri == Tri_on_side(nbtri[2],side))
                 break;
         }
         tmp = Tri_on_side(nbtri[2],(side+1)%3);
         for(nbSV_side = 0; nbSV_side < 3; nbSV_side++)
         {
             if(nbtri[2] == Tri_on_side(tmp,nbSV_side))
                 break;
         }
         tris[cv_count] = tmp;
         nbcv_indx[cv_count] = (nbSV_side+1)%3;
         nbcv_side[cv_count] = -1;
         cv_count++;

         /// CVs on nbtri[2] in the counter-clock-wise direction
         for(nbSV_side = 0; nbSV_side < 3; nbSV_side++)
         {
             if(tri == Tri_on_side(nbtri[2],nbSV_side))
                 break;
         }
         tris[cv_count] = nbtri[2];
         nbcv_indx[cv_count] = (nbSV_side+1)%3;
         nbcv_side[cv_count] = nbSV_side;
         cv_count++;

         tris[cv_count] = nbtri[2];
         nbcv_indx[cv_count] = 3;
         nbcv_side[cv_count] = -1;
         cv_count++;

         tris[cv_count] = nbtri[2];
         nbcv_indx[cv_count] = nbSV_side;
         nbcv_side[cv_count] = nbSV_side;
         cv_count++;

         // find nbtri[2], upper side tri, 8888
         tmp = Tri_on_side(nbtri[2],(nbSV_side+2)%3);
         for(nbSV_side = 0; nbSV_side < 3; nbSV_side++)
         {
             if(nbtri[2] == Tri_on_side(tmp,nbSV_side))
                 break;
         }
         tris[cv_count] = tmp;
         nbcv_indx[cv_count] = nbSV_side;
         nbcv_side[cv_count] = -1;
         cv_count++;

         // find nbtri[0] lower side tri, 9999
         for(side = 0; side < 3; side++)
         {
             if(tri == Tri_on_side(nbtri[0],side))
                 break;
         }
         tmp = Tri_on_side(nbtri[0],(side+1)%3);
         for(nbSV_side = 0; nbSV_side < 3; nbSV_side++)
         {
             if(nbtri[0] == Tri_on_side(tmp,nbSV_side))
                 break;
         }
         tris[cv_count] = tmp;
         nbcv_indx[cv_count] = (nbSV_side+1)%3;
         nbcv_side[cv_count] = -1;
         cv_count++;
        
         *n_tri = cv_count;
         
         return *n_tri;
}

/// get tri type CV edge adjacent neighbors.
///  In the counter clockwise direction
/// In total, 6 CVs. Tri is divided into 4 self similar CVs
/// by connecting edge midpts.
EXPORT int get_tri_near_neighbr_CV_stencil(
         TRI       *tri,
         TRI       *nbtri[3],
         int       cv_indx,
         int       *nbcv_indx,
         int       *nbcv_side,
         TRI       **tris,
         int       *n_tri)
{
         POINT  *vert = NULL;
         int    on_SV_side, nbSV_side, next_side;
         TRI    *nntri[40], *tmptri, *prevtri = NULL;
         int    nn_num, cv_count, i;

         // if(cv_indx < 3)
         //     vert = Point_of_tri(tri)[cv_indx];

         //////////////// 1st neighbor in the counter-clock-wise direction
         for(nbSV_side = 0; nbSV_side < 3; nbSV_side++)
         {
             if(tri == Tri_on_side(nbtri[0],nbSV_side))
                 break;
         }
         tris[0] = nbtri[0];
         nbcv_indx[0] = (nbSV_side+1)%3;
         nbcv_side[0] = nbSV_side;
         tris[1] = nbtri[0];
         nbcv_indx[1] = nbSV_side;
         nbcv_side[1] = nbSV_side;

         //////////////// 2nd neighbor in the counter-clock-wise direction
         for(nbSV_side = 0; nbSV_side < 3; nbSV_side++)
         {
             if(tri == Tri_on_side(nbtri[1],nbSV_side))
                 break;
         }
         tris[2] = nbtri[1];
         nbcv_indx[2] = (nbSV_side+1)%3;
         nbcv_side[2] = nbSV_side;
         tris[3] = nbtri[1];
         nbcv_indx[3] = nbSV_side;
         nbcv_side[3] = nbSV_side;
         //////////////// 3rd neighbor in the counter-clock-wise direction
         for(nbSV_side = 0; nbSV_side < 3; nbSV_side++)
         {
             if(tri == Tri_on_side(nbtri[2],nbSV_side))
                 break;
         }
         tris[4] = nbtri[2];
         nbcv_indx[4] = (nbSV_side+1)%3;
         nbcv_side[4] = nbSV_side;
         tris[5] = nbtri[2];
         nbcv_indx[5] = nbSV_side;
         nbcv_side[5] = nbSV_side;

         *n_tri = 6;
         return *n_tri;
}

/// get tri type CV edge adjacent neighbors and
//  and edge adjacent to these neighbors' neighors
/// (also vertex adjacent to CV). In the counter clockwise direction
/// the self contained vertex CV is included as well
///  **** important: partial cell 3 in the current
//// triangle is added to the stencil
EXPORT int get_tri_near_neighbr_overlap_CV_stencil(
         TRI       *tri,
         TRI       *nbtri[3],
         int       cv_indx,
         int       *nbcv_indx,
         int       *nbcv_side,
         TRI       **tris,
         int       *n_tri)
{
         POINT  *vert = NULL;
         int    on_SV_side, nbSV_side, next_side;
         TRI    *nntri[40], *tmptri, *prevtri = NULL;
         int    nn_num, cv_count = 0, i;

         // if(cv_indx < 3)
         //     vert = Point_of_tri(tri)[cv_indx];

         //////////////// 1st neighbor in the counter-clock-wise direction
         for(nbSV_side = 0; nbSV_side < 3; nbSV_side++)
         {
             if(tri == Tri_on_side(nbtri[0],nbSV_side))
                 break;
         }
         tris[cv_count] = tri;
         nbcv_indx[cv_count] = 0;
         nbcv_side[cv_count] = -1;
         cv_count++;

         tris[cv_count] = nbtri[0];
         nbcv_indx[cv_count] = (nbSV_side+1)%3;
         nbcv_side[cv_count] = nbSV_side;
         cv_count++;

         ///// 012009, add cent CV in nbtri[0]
         tris[cv_count] = nbtri[0];
         nbcv_indx[cv_count] = 3;
         nbcv_side[cv_count] = -1;
         cv_count++;
         ///// End: 012009, add cent CV in nbtri[0]

         tris[cv_count] = nbtri[0];
         nbcv_indx[cv_count] = nbSV_side;
         nbcv_side[cv_count] = nbSV_side;
         cv_count++;

         tris[cv_count] = tri;
         nbcv_indx[cv_count] = 1;
         nbcv_side[cv_count] = -1;
         cv_count++;

         //////////////// 2nd neighbor in the counter-clock-wise direction
         for(nbSV_side = 0; nbSV_side < 3; nbSV_side++)
         {
             if(tri == Tri_on_side(nbtri[1],nbSV_side))
                 break;
         }
         tris[cv_count] = nbtri[1];
         nbcv_indx[cv_count] = (nbSV_side+1)%3;
         nbcv_side[cv_count] = nbSV_side;
         cv_count++;

         ///// 012009, add cent CV in nbtri[1]
         tris[cv_count] = nbtri[1];
         nbcv_indx[cv_count] = 3;
         nbcv_side[cv_count] = -1;
         cv_count++;
         ///// End: 012009, add cent CV in nbtri[1]

         tris[cv_count] = nbtri[1];
         nbcv_indx[cv_count] = nbSV_side;
         nbcv_side[cv_count] = nbSV_side;
         cv_count++;

         tris[cv_count] = tri;
         nbcv_indx[cv_count] = 2;
         nbcv_side[cv_count] = -1;
         cv_count++;

         //////////////// 3rd neighbor in the counter-clock-wise direction
         for(nbSV_side = 0; nbSV_side < 3; nbSV_side++)
         {
             if(tri == Tri_on_side(nbtri[2],nbSV_side))
                 break;
         }
         tris[cv_count] = nbtri[2];
         nbcv_indx[cv_count] = (nbSV_side+1)%3;
         nbcv_side[cv_count] = nbSV_side;
         cv_count++;

         ///// 012009, add cent CV in nbtri[2]
         tris[cv_count] = nbtri[2];
         nbcv_indx[cv_count] = 3;
         nbcv_side[cv_count] = -1;
         cv_count++;
         ///// End: 012009, add cent CV in nbtri[2]

         tris[cv_count] = nbtri[2];
         nbcv_indx[cv_count] = nbSV_side;
         nbcv_side[cv_count] = nbSV_side;
         cv_count++;

         //***// add partial cell 3 in the current tri
         tris[cv_count] = tri;
         nbcv_indx[cv_count] = 3;
         nbcv_side[cv_count] = -1;
         cv_count++;
         //***// END: add partial cell 3 in the current tri

         *n_tri = cv_count;
         return *n_tri;
}

////// the partition follows from unstable P^2 SV partition.
////// We just use it here to do regrouping.
////// The partition is as follows: connect centriod and edge midpts and
////// centriod and vertices to make 6 partial cells
EXPORT int get_tri_CV_stencil_unstable_SV_P2_partition(
         TRI       *tri,
         TRI       *nbtri[3],
         int       cv_indx,
         int       *nbcv_indx,
         int       *nbcv_side,
         TRI       **tris,
         int       *n_tri)
{
         POINT  *vert = NULL;
         int    on_SV_side, nbSV_side, next_side;
         TRI    *nntri[40], *tmptri, *prevtri = NULL;
         int    nn_num, cv_count = 0, i;

         ///// add partial cells on 0th edge
         tris[cv_count] = tri;
         nbcv_indx[cv_count] = 0;
         nbcv_side[cv_count] = 0;
         cv_count++;

         tris[cv_count] = tri;
         nbcv_indx[cv_count] = 1;
         nbcv_side[cv_count] = 0;
         cv_count++;

         //////////////// 1st neighbor in the counter-clock-wise direction
         for(nbSV_side = 0; nbSV_side < 3; nbSV_side++)
         {
             if(tri == Tri_on_side(nbtri[0],nbSV_side))
                 break;
         }

         tris[cv_count] = nbtri[0];
         nbcv_indx[cv_count] = nbSV_side*2;
         nbcv_side[cv_count] = nbSV_side;
         cv_count++;
         tris[cv_count] = nbtri[0];
         nbcv_indx[cv_count] = nbSV_side*2+1;
         nbcv_side[cv_count] = nbSV_side;
         cv_count++;

         /////////// add partial cells on side 1
         tris[cv_count] = tri;
         nbcv_indx[cv_count] = 2;
         nbcv_side[cv_count] = 1;
         cv_count++;
         tris[cv_count] = tri;
         nbcv_indx[cv_count] = 3;
         nbcv_side[cv_count] = 1;
         cv_count++;

         //////////////// 2nd neighbor in the counter-clock-wise direction
         for(nbSV_side = 0; nbSV_side < 3; nbSV_side++)
         {
             if(tri == Tri_on_side(nbtri[1],nbSV_side))
                 break;
         }
         tris[cv_count] = nbtri[1];
         nbcv_indx[cv_count] = (nbSV_side)*2;
         nbcv_side[cv_count] = nbSV_side;
         cv_count++;
         tris[cv_count] = nbtri[1];
         nbcv_indx[cv_count] = (nbSV_side)*2+1;
         nbcv_side[cv_count] = nbSV_side;
         cv_count++;

         /////////// add partial cells on 2nd edge
         tris[cv_count] = tri;
         nbcv_indx[cv_count] = 4;
         nbcv_side[cv_count] = 2;
         cv_count++;
         tris[cv_count] = tri;
         nbcv_indx[cv_count] = 5;
         nbcv_side[cv_count] = 2;
         cv_count++;

         //////////////// 3rd neighbor in the counter-clock-wise direction
         for(nbSV_side = 0; nbSV_side < 3; nbSV_side++)
         {
             if(tri == Tri_on_side(nbtri[2],nbSV_side))
                 break;
         }
         tris[cv_count] = nbtri[2];
         nbcv_indx[cv_count] = (nbSV_side)*2;
         nbcv_side[cv_count] = nbSV_side;
         cv_count++;
         tris[cv_count] = nbtri[2];
         nbcv_indx[cv_count] = (nbSV_side)*2+1;
         nbcv_side[cv_count] = nbSV_side;
         cv_count++;

         *n_tri = cv_count;
         return *n_tri;
}

////// the partition follows from unstable P^2 SV partition.
////// The partition is as follows: connect centriod and edge midpts and
////// centriod and vertices to make 6 partial cells. 
/////  We do not group partial cells together
EXPORT int get_tri_CV_stencil_unstable_SV_P2_partition_no_regrouping(
         TRI       *tri,
         TRI       *nbtri[3],
         int       cv_indx,
         int       *nbcv_indx,
         int       *nbcv_side,
         TRI       **tris,
         int       *n_tri)
{
         POINT  *vert = NULL;
         int    on_SV_side, nbSV_side, next_side;
         TRI    *nntri[40], *tmptri, *prevtri = NULL;
         int    nn_num, cv_count = 0, i;

         //////////////// 1st neighbor in the counter-clock-wise direction
         for(nbSV_side = 0; nbSV_side < 3; nbSV_side++)
         {
             if(tri == Tri_on_side(nbtri[0],nbSV_side))
                 break;
         }

         tris[cv_count] = nbtri[0];
         nbcv_indx[cv_count] = nbSV_side*2+1;
         nbcv_side[cv_count] = nbSV_side;
         cv_count++;
         tris[cv_count] = nbtri[0];
         nbcv_indx[cv_count] = nbSV_side*2;
         nbcv_side[cv_count] = nbSV_side;
         cv_count++;

         //////////////// 2nd neighbor in the counter-clock-wise direction
         for(nbSV_side = 0; nbSV_side < 3; nbSV_side++)
         {
             if(tri == Tri_on_side(nbtri[1],nbSV_side))
                 break;
         }
         tris[cv_count] = nbtri[1];
         nbcv_indx[cv_count] = (nbSV_side)*2+1;
         nbcv_side[cv_count] = nbSV_side;
         cv_count++;
         tris[cv_count] = nbtri[1];
         nbcv_indx[cv_count] = (nbSV_side)*2;
         nbcv_side[cv_count] = nbSV_side;
         cv_count++;

         //////////////// 3rd neighbor in the counter-clock-wise direction
         for(nbSV_side = 0; nbSV_side < 3; nbSV_side++)
         {
             if(tri == Tri_on_side(nbtri[2],nbSV_side))
                 break;
         }
         tris[cv_count] = nbtri[2];
         nbcv_indx[cv_count] = (nbSV_side)*2+1;
         nbcv_side[cv_count] = nbSV_side;
         cv_count++;
         tris[cv_count] = nbtri[2];
         nbcv_indx[cv_count] = (nbSV_side)*2;
         nbcv_side[cv_count] = nbSV_side;
         cv_count++;

         *n_tri = cv_count;
         return *n_tri;
}

/// neighbor partial cell is set by
//// connecting midpts on cent_line and edge points at 1/4, and 3/4.
EXPORT int get_tri_liu_neighbr_overlap_CV_stencil(
         TRI       *tri,
         TRI       *nbtri[3],
         int       cv_indx,
         int       *nbcv_indx,
         int       *nbcv_side,
         TRI       **tris,
         int       *n_tri)
{
         POINT  *vert = NULL;
         int    on_SV_side, nbSV_side, next_side;
         TRI    *nntri[40], *tmptri, *prevtri = NULL;
         int    nn_num, cv_count = 0, i;

         //////////////// 1st neighbor in the counter-clock-wise direction
         for(nbSV_side = 0; nbSV_side < 3; nbSV_side++)
         {
             if(tri == Tri_on_side(nbtri[0],nbSV_side))
                 break;
         }
         tris[cv_count] = tri;
         nbcv_indx[cv_count] = 0;
         nbcv_side[cv_count] = -1;
         cv_count++;

         tris[cv_count] = nbtri[0];
         nbcv_indx[cv_count] = (nbSV_side+2)%3 + 4;
         nbcv_side[cv_count] = nbSV_side;
         cv_count++;

         tris[cv_count] = tri;
         nbcv_indx[cv_count] = 1;
         nbcv_side[cv_count] = -1;
         cv_count++;

         //////////////// 2nd neighbor in the counter-clock-wise direction
         for(nbSV_side = 0; nbSV_side < 3; nbSV_side++)
         {
             if(tri == Tri_on_side(nbtri[1],nbSV_side))
                 break;
         }
         tris[cv_count] = nbtri[1];
         nbcv_indx[cv_count] = (nbSV_side+2)%3 + 4;
         nbcv_side[cv_count] = nbSV_side;
         cv_count++;

         tris[cv_count] = tri;
         nbcv_indx[cv_count] = 2;
         nbcv_side[cv_count] = -1;
         cv_count++;

         //////////////// 3rd neighbor in the counter-clock-wise direction
         for(nbSV_side = 0; nbSV_side < 3; nbSV_side++)
         {
             if(tri == Tri_on_side(nbtri[2],nbSV_side))
                 break;
         }
         tris[cv_count] = nbtri[2];
         nbcv_indx[cv_count] = (nbSV_side+2)%3 + 4;
         nbcv_side[cv_count] = nbSV_side;
         cv_count++;

         *n_tri = cv_count;
         return *n_tri;
}

/// get tri type CV edge adjacent neighbors and
/// partial cell 3. In the counter clockwise direction.
/// Tri is divided into 4 self similar CVs
/// by connecting edge midpts. In total, 9 CVs
EXPORT int get_tri_near_neighbr_CV_stencil_ver2(
         TRI       *tri,
         TRI       *nbtri[3],
         int       cv_indx,
         int       *nbcv_indx,
         int       *nbcv_side,
         TRI       **tris,
         int       *n_tri)
{
         POINT  *vert = NULL;
         int    on_SV_side, nbSV_side, next_side;
         TRI    *nntri[40], *tmptri, *prevtri = NULL;
         int    nn_num, cv_count, i;

         //////////////// 1st neighbor in the counter-clock-wise direction
         for(nbSV_side = 0; nbSV_side < 3; nbSV_side++)
         {
             if(tri == Tri_on_side(nbtri[0],nbSV_side))
                 break;
         }
         tris[0] = nbtri[0];
         nbcv_indx[0] = (nbSV_side+1)%3;
         nbcv_side[0] = nbSV_side;

         tris[1] = nbtri[0];
         nbcv_indx[1] = 3;
         nbcv_side[1] = nbSV_side;
         tris[2] = nbtri[0];
         nbcv_indx[2] = nbSV_side;
         nbcv_side[2] = nbSV_side;

         //////////////// 2nd neighbor in the counter-clock-wise direction
         for(nbSV_side = 0; nbSV_side < 3; nbSV_side++)
         {
             if(tri == Tri_on_side(nbtri[1],nbSV_side))
                 break;
         }
         tris[3] = nbtri[1];
         nbcv_indx[3] = (nbSV_side+1)%3;
         nbcv_side[3] = nbSV_side;

         tris[4] = nbtri[1];
         nbcv_indx[4] = 3;
         nbcv_side[4] = nbSV_side;
         tris[5] = nbtri[1];
         nbcv_indx[5] = nbSV_side;
         nbcv_side[5] = nbSV_side;
         //////////////// 3rd neighbor in the counter-clock-wise direction
         for(nbSV_side = 0; nbSV_side < 3; nbSV_side++)
         {
             if(tri == Tri_on_side(nbtri[2],nbSV_side))
                 break;
         }
         tris[6] = nbtri[2];
         nbcv_indx[6] = (nbSV_side+1)%3;
         nbcv_side[6] = nbSV_side;
         tris[7] = nbtri[2];
         nbcv_indx[7] = 3;
         nbcv_side[7] = nbSV_side;
         tris[8] = nbtri[2];
         nbcv_indx[8] = nbSV_side;
         nbcv_side[8] = nbSV_side;

         *n_tri = 9;
         return *n_tri;
}

/// Use CV 3 only to make stencil. In the counter clockwise direction
/// 4 self-similar CVs are obtained by connecting edge mitpts.
/// 
LOCAL int get_tri_center_CV_stencil(
         TRI       *tri,
         TRI       *nbtri[3],
         int       cv_indx,
         int       *nbcv_indx,
         int       *nbcv_side,
         TRI       **tris,
         int       *n_tri)
{
         POINT  *vert = NULL;
         int    on_SV_side, nbSV_side, next_side;
         TRI    *nntri[40], *tmptri, *prevtri = NULL;
         int    nn_num, cv_count, i;

         tris[0] = nbtri[0];
         nbcv_indx[0] = 3;
         nbcv_side[0] = -1;

         tris[1] = nbtri[1];
         nbcv_indx[1] = 3;
         nbcv_side[1] = -1;

         tris[2] = nbtri[2];
         nbcv_indx[2] = 3;
         nbcv_side[2] = -1;

         *n_tri = 3;
         return *n_tri;
}

///// Differences between avg_st_on_cv_ver3() and 
///// avg_st_on_cv_ver2().
///// In avg_st_on_cv_ver3(): CVmass_matrix is about the triangle centroid
///// In avg_st_on_cv_ver2(): CVmass_matrix is about the partial cell center.
LOCAL void avg_st_on_cv_ver3_MHD(
        TRI           *tri, 
        int           cv_indx, 
        Locstate      st,   
        float         *conu)
{
        int i;
        double  tmpu[10]; 
        double  **mass_matrix = tri->CVmass_matrix;

        for(i = 0; i < N_EQN; i++)
            tmpu[i] = 0.0;

        for(i = 0; i < MAX_N_COEF; i++)
            tmpu[0] += dg_Dens(st)[i]*mass_matrix[cv_indx][i];
        for(i = 0; i < MAX_N_COEF; i++)
            tmpu[1] += dg_Mom(st)[0][i]*mass_matrix[cv_indx][i];
        for(i = 0; i < MAX_N_COEF; i++)
            tmpu[2] += dg_Mom(st)[1][i]*mass_matrix[cv_indx][i];
        for(i = 0; i < MAX_N_COEF; i++)
            tmpu[3] += dg_Mom(st)[2][i]*mass_matrix[cv_indx][i];

        for(i = 0; i < MAX_N_COEF; i++)
            tmpu[4] += dg_Energy(st)[i]*mass_matrix[cv_indx][i];

        for(i = 0; i < MAX_N_COEF; i++)
            tmpu[5] += dg_B(st)[0][i]*mass_matrix[cv_indx][i];
        for(i = 0; i < MAX_N_COEF; i++)
            tmpu[6] += dg_B(st)[1][i]*mass_matrix[cv_indx][i];
        for(i = 0; i < MAX_N_COEF; i++)
            tmpu[7] += dg_B(st)[2][i]*mass_matrix[cv_indx][i];

        // for(i = 0; i < 8; i++)
        //     tmpu[i] /= mass_matrix[indx][0];

        for(i = 0; i < N_EQN; i++)
            conu[i] = tmpu[i]/mass_matrix[cv_indx][0];
}


LOCAL void avg_st_on_cv_ver2(
        TRI           *tri,
        int           indx, 
        Locstate      st,
        float         *conu)
{
        int i;
        float  tmpu[8]; 
        double  **mass_matrix = tri->CVmass_matrix;

        for(i = 0; i < 4; i++)
            tmpu[i] = 0.0;

        for(i = 0; i < MAX_N_COEF; i++)
        {
            tmpu[0] += tri->cv_soln->cv_dg_rho[indx][i]*mass_matrix[indx][i];
            tmpu[1] += tri->cv_soln->cv_dg_m[0][indx][i]*mass_matrix[indx][i];
            tmpu[2] += tri->cv_soln->cv_dg_m[1][indx][i]*mass_matrix[indx][i];
            tmpu[3] += tri->cv_soln->cv_dg_e[indx][i]*mass_matrix[indx][i];
        }
        tmpu[0] /= mass_matrix[indx][0];
        tmpu[1] /= mass_matrix[indx][0];
        tmpu[2] /= mass_matrix[indx][0];
        tmpu[3] /= mass_matrix[indx][0];

        if(st != NULL)
        {
            Dens(st) = tmpu[0];
            Mom(st)[0] = tmpu[1];
            Mom(st)[1] = tmpu[2];
            Energy(st) = tmpu[3];
        }
        if(conu != NULL)
        {
            for(i = 0; i < N_EQN; i++)
                conu[i] = tmpu[i];
        }
}

LOCAL void avg_st_for_mapped_p2(
        TRI           *tri,
        CV_Soln       cv_soln,
        int           indx,
        float         *conu)
{
        int i;
        float  tmpu[4];
        double  **mass_matrix = tri->CVmass_matrix;

        for(i = 0; i < 4; i++)
            tmpu[i] = 0.0;

        for(i = 0; i < 6; i++)
        {
            tmpu[0] += cv_soln.cv_dg_rho[0][i]*mass_matrix[indx][i];
            tmpu[1] += cv_soln.cv_dg_m[0][0][i]*mass_matrix[indx][i];
            tmpu[2] += cv_soln.cv_dg_m[1][0][i]*mass_matrix[indx][i];
            tmpu[3] += cv_soln.cv_dg_e[0][i]*mass_matrix[indx][i];
        }
        tmpu[0] /= mass_matrix[indx][0];
        tmpu[1] /= mass_matrix[indx][0];
        tmpu[2] /= mass_matrix[indx][0];
        tmpu[3] /= mass_matrix[indx][0];

        if(conu != NULL)
        {
            for(i = 0; i < N_EQN; i++)
                conu[i] = tmpu[i];
        }
}

LOCAL void avg_st_on_cv_from_store(
        TRI           *tri,
        int           indx,
        Locstate      st,
        Limiting_store *store,
        float         *conu)
{
        int i;
        float  tmpu[4];
        double  **mass_matrix = tri->CVmass_matrix;

        for(i = 0; i < 4; i++)
            tmpu[i] = 0.0;

        for(i = 0; i < MAX_N_COEF; i++)
        {
            tmpu[0] += store[tri->id].cv_dg_rho[indx][i]*mass_matrix[indx][i];
            tmpu[1] += store[tri->id].cv_dg_m[0][indx][i]*mass_matrix[indx][i];
            tmpu[2] += store[tri->id].cv_dg_m[1][indx][i]*mass_matrix[indx][i];
            tmpu[3] += store[tri->id].cv_dg_e[indx][i]*mass_matrix[indx][i];
        }
        tmpu[0] /= mass_matrix[indx][0];
        tmpu[1] /= mass_matrix[indx][0];
        tmpu[2] /= mass_matrix[indx][0];
        tmpu[3] /= mass_matrix[indx][0];

        if(st != NULL)
        {
            Dens(st) = tmpu[0];
            Mom(st)[0] = tmpu[1];
            Mom(st)[1] = tmpu[2];
            Energy(st) = tmpu[3];
        }
        if(conu != NULL)
        {
            for(i = 0; i < N_EQN; i++)
                conu[i] = tmpu[i];
        }
}

EXPORT void Subcell_limiting_soln_accuracy_test(
        Front    *fr,
        Mid_soln *midsoln,
        Limiting_store **limit_store,
        int      rk_step,
        int      HR_times,
        int      detect_extr,
        int      total_tri)
{
        TRI       *tri, *crsp_tri, *nbtri[3];
        SURFACE   **surf;
        int       dim = 2, i, j, side, tmpi, cv_indx, indx;
        double     *cent;
        size_t    sizest = fr->sizest; 
        Locstate  st2;
        float     max_dt;

        int       loop_num = 0, comput_time = NO;
        static Locstate Roe_st[3] = {NULL, NULL, NULL}, sta;  //Roe mean value
        static float  **L[3], **R[3];
        float     conu[3][4][MAX_N_COEF];  // [side][# eqn][coef]
        float     outcome[4][MAX_N_COEF];
        char      s[256];
        int       debug = NO, bigHR_round;
        Locstate  st, nst;
        float     old_avg[4], new_avg[4];
        TRI       **limit_tris, **row_limit_tris[1500];
        int       N_alloc = 800, N_row, N_use =0, N, Row_alloc = 1500, local_iter_N = 1;

        if(Roe_st[0] == NULL)
        {
             for(i = 0; i < 3; i++)
             {
                 matrix(&L[i], 4, 4, sizeof(float));
                 matrix(&R[i], 4, 4, sizeof(float));
                 g_alloc_state(&Roe_st[i], fr->sizest);
             }
             g_alloc_state(&sta, fr->sizest);
        }
        if(mass_1st_row == NULL)
        {
            vector(&alltri_mass_1st_rows,total_tri,sizeof(Tri_mass_1st_rows));
            for(i = 0; i < total_tri; i++)
                alltri_mass_1st_rows[i].mass_1st_rows[0] = NULL;

            matrix(&mass_1st_row, 1, MAX_N_COEF,sizeof(double));
            // for(i = 0; i < 30; i++)
            //     matrix(&mass_1st_rows[i], 1, MAX_N_COEF,sizeof(double));

            vector(&alltri_HR_sten_2,total_tri,sizeof(Tri_HR_sten));
            for(i = 0; i < total_tri; i++)
                alltri_HR_sten_2[i].HR_sten_set_3rd = alltri_HR_sten_2[i].HR_sten_set_2nd =
                alltri_HR_sten_2[i].HR_sten_set_1st = alltri_HR_sten_2[i].HR_sten_set_1st_pt = NO;
        }

        vector(&limit_tris, N_alloc, sizeof(TRI*));
        row_limit_tris[0] = limit_tris;
        N_row = 1;

        /*******************************************/
        /*******************************************/
        /*******************************************/
        /****
        /////// fix 3rd and 2nd degree terms
        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf);
             !at_end_of_tri_list(tri,*surf); tri = tri->next)
            {
                if(tri->BC_type == SUBDOMAIN)
                    continue;
                if(Boundary_tri(tri) || tri_on_phy_bdry(tri))
                {
                    if(debugging("g_sine"))
                        attach_g_sine_buffer_states_ver2(fr,midsoln,rk_step,tri,limit_store);
                    else if(debugging("Burgers"))
                    {
                        if(!debugging("diag_tri"))
                            attach_Burgers_buffer_states_ver2(fr,midsoln,rk_step,tri,limit_store);
                    }
                }
                for(i = 0; i < 3; i++)
                    nbtri[i] = Tri_on_side(tri,i);
                Subcell_limiting_3rd_degreeP3_PNC(tri,nbtri,
                               midsoln,limit_store,rk_step,detect_extr);
                Subcell_limiting_2nd_degreeP3_PNC(tri,nbtri,
                               midsoln,limit_store,rk_step,detect_extr);
            }
        }
        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf);
             !at_end_of_tri_list(tri,*surf); tri = tri->next)
            {
                if(tri->BC_type == SUBDOMAIN)
                    continue;
                if(rk_step == RK_STEP)
                    st = tri->st;
                else
                    st = midsoln[tri->id].st[rk_step];
                nst = midsoln[tri->id].st[0];

                old_avg[0] = old_avg[1] = old_avg[2] = old_avg[3] = 0.0;
                new_avg[0] = new_avg[1] = new_avg[2] = new_avg[3] = 0.0;

                for(tmpi = 3; tmpi < MAX_N_COEF; tmpi++)
                // for(tmpi = 6; tmpi < MAX_N_COEF; tmpi++)
                {
                    old_avg[0] += dg_Dens(st)[tmpi]*tri->Lmass_matrix[0][tmpi];
                    old_avg[1] += dg_Mom(st)[0][tmpi]*tri->Lmass_matrix[0][tmpi];
                    old_avg[2] += dg_Mom(st)[1][tmpi]*tri->Lmass_matrix[0][tmpi];
                    old_avg[3] += dg_Energy(st)[tmpi]*tri->Lmass_matrix[0][tmpi];
                    new_avg[0] += dg_Dens(nst)[tmpi]*tri->Lmass_matrix[0][tmpi];
                    new_avg[1] += dg_Mom(nst)[0][tmpi]*tri->Lmass_matrix[0][tmpi];
                    new_avg[2] += dg_Mom(nst)[1][tmpi]*tri->Lmass_matrix[0][tmpi];
                    new_avg[3] += dg_Energy(nst)[tmpi]*tri->Lmass_matrix[0][tmpi];
                }
                for(tmpi = 0; tmpi < N_EQN; tmpi++)
                {
                    old_avg[tmpi] /= tri->Lmass_matrix[0][0];
                    new_avg[tmpi] /= tri->Lmass_matrix[0][0];
                }
                // printf("tri[%d] recomputed 3rd degree %g %g %g %g, avg %g\n", tri->id,
                //      dg_Dens(nst)[6], dg_Dens(nst)[7], dg_Dens(nst)[8], dg_Dens(nst)[9], new_avg[0]);
                // printf("tri[%d] old        3rd degree %g %g %g %g, avg %g\n\n", tri->id,
                //      dg_Dens(st)[6], dg_Dens(st)[7], dg_Dens(st)[8], dg_Dens(st)[9], old_avg[0]);

                dg_Dens(st)[0] +=   (old_avg[0] - new_avg[0]);
                dg_Mom(st)[0][0] += (old_avg[1] - new_avg[1]);
                dg_Mom(st)[1][0] += (old_avg[2] - new_avg[2]);
                dg_Energy(st)[0] += (old_avg[3] - new_avg[3]);
                for(tmpi = 3; tmpi < MAX_N_COEF; tmpi++)
                // for(tmpi = 6; tmpi < MAX_N_COEF; tmpi++)
                {
                    dg_Dens(st)[tmpi] = dg_Dens(nst)[tmpi];
                    dg_Mom(st)[0][tmpi] = dg_Mom(nst)[0][tmpi];
                    dg_Mom(st)[1][tmpi] = dg_Mom(nst)[1][tmpi];
                    dg_Energy(st)[tmpi] = dg_Energy(nst)[tmpi];
                }
                update_tri_CVs(tri,midsoln,limit_store,rk_step);
            }
        }
        /////// END:  fix 3rd and 2nd degree terms
        ****/
        /*******************************************/
        /*******************************************/
        /*******************************************/

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf);
             !at_end_of_tri_list(tri,*surf); tri = tri->next)
            {
                cent = fg_centroid(tri);
                if(tri->BC_type == SUBDOMAIN)
                    continue;

                if(Boundary_tri(tri) || tri_on_phy_bdry(tri))
                {
                    if(debugging("g_sine"))
                        attach_g_sine_buffer_states_ver2(fr,midsoln,rk_step,tri,limit_store);
                    else if(debugging("Burgers"))
                    {
                        if(!debugging("diag_tri"))
                            attach_Burgers_buffer_states_ver2(fr,midsoln,rk_step,tri,limit_store);
                    }
                }
                limit_tris[N_use] = tri;
                N_use++;
                if(N_use == N_alloc)
                {
                    if(N_row +1 >= Row_alloc)
                    {
                        printf("ERROR: identify_limiting_candidates, exceed alloc. limit\n");
                        clean_up(ERROR);
                    }
                    vector(&limit_tris, N_alloc, sizeof(TRI*));
                    row_limit_tris[N_row] = limit_tris;
                    N_row++;
                    N_use = 0;
                }
                /***
                for(i = 0; i < 3; i++)
                    nbtri[i] = Tri_on_side(tri,i);
                if(MAX_N_COEF == 10)
                {
                    if(debugging("old_way"))
                    {
                        Subcell_limiting_3rd_degreeP3_PNC(tri,nbtri,
                               midsoln,limit_store,rk_step,detect_extr);
                        Subcell_limiting_2nd_degreeP3_PNC(tri,nbtri,
                               midsoln,limit_store,rk_step,detect_extr);
                        Subcell_limiting_1st_degreeP3_PNC(tri,nbtri,
                               midsoln,limit_store,rk_step,NO,detect_extr,NO);
                    }
                    else
                    {
                        Subcell_limiting_3rd_degreeP3_PNC_grouping(tri,nbtri,
                               midsoln,limit_store,rk_step,YES);
                        Subcell_limiting_2nd_degreeP3_PNC_grouping(tri,nbtri,
                               midsoln,limit_store,rk_step,YES);
                        Subcell_limiting_1st_degreeP3_PNC_grouping(tri,nbtri,
                               midsoln,limit_store,rk_step,NO,YES,YES);
                    }
                }
                else if(MAX_N_COEF == 6)
                {
                    // Subcell_limiting_2nd_degreeP2(tri,nbtri,midsoln,limit_store,rk_step);
                    // Subcell_limiting_1st_degreeP2(tri,nbtri,midsoln,limit_store,rk_step);
                }
                ***/ 
            }
        }


        for(bigHR_round = 0;  bigHR_round < 0; bigHR_round++)
        {
            preprocess_P3_times_rearrange_order(fr,midsoln,limit_store,
                  row_limit_tris,N_alloc,N_row,N_use,rk_step,HR_times);
        }

        /*******************************************/
        /*******************************************/
        /// Now update 3rd and 2nd degree terms
        for(bigHR_round = 0;  bigHR_round < 0; bigHR_round++)
        {
            subcell_update_high_degree_terms(fr,midsoln,limit_store,
                  row_limit_tris,N_alloc,N_row,N_use,rk_step,HR_times);
        }
        //// END: update 3rd and 2nd degree degree terms
        /*******************************************/
        /*******************************************/


        for(i = 0; i < N_row; i++)
        {
            if(i == N_row-1) N = N_use;
            else N = N_alloc;
            for(j = 0; j < N; j++)
            {
                // TMP
                // printf(" Subcell_limiting_soln_with_buffer_tris_multiple_times, on tri[%d],[%d][%d]\n",
                //      row_limit_tris[i][j]->id, i, j);
                for(side = 0; side < 3; side++)
                    nbtri[side] = Tri_on_side(row_limit_tris[i][j],side);

                if(MAX_N_COEF == 3)
                {
                    // Subcell_limiting_1st_degreeP1(row_limit_tris[i][j],nbtri,midsoln,rk_step);
                    printf("ERROR: Subcell_limiting_soln_accuracy_test(), unknown case\n");
                    clean_up(ERROR);
                }
                else if(MAX_N_COEF == 6)
                { 
                    Subcell_limiting_2nd_degreeP2(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step);
                    Subcell_limiting_1st_degreeP2(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step);
                    // limiting_2nd_degree(row_limit_tris[i][j],nbtri,midsoln,rk_step);
                    // Subcell_limiting_1st_degreeP2_multi_pt_limit(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step);
                }
                else if(MAX_N_COEF == 10)
                {
                    limiting_3rd_degreeP3(row_limit_tris[i][j],nbtri,3,midsoln,rk_step,YES);
                    // Subcell_limiting_3rd_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                    //                 midsoln,limit_store,rk_step,YES);

                    limiting_2nd_degreeP3_multi_pt(row_limit_tris[i][j],nbtri,midsoln,rk_step,YES);
                    // limiting_2nd_degreeP3(row_limit_tris[i][j],nbtri,midsoln,rk_step,YES);

                    // Subcell_limiting_2nd_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                    //                 midsoln,limit_store,rk_step,YES);
                    // Subcell_limiting_2nd_degreeP3_PNC_multi_pt_limit(row_limit_tris[i][j],nbtri,
                    //                 midsoln,limit_store,rk_step,YES);

                    // Subcell_limiting_1st_degreeP3_PNC_2nd_degree_tech(row_limit_tris[i][j],nbtri,
                    //                     midsoln,limit_store,rk_step,NO,NO,NO);
                    // Subcell_limiting_1st_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                    //                 midsoln,limit_store,rk_step,NO,YES,YES);
                    Subcell_limiting_1st_degreeP3_PNC_multi_pt_limit(row_limit_tris[i][j],nbtri,
                                    midsoln,limit_store,rk_step,NO,YES,YES);
                    /*******
                    ///// save states before HR
                    if(rk_step == RK_STEP)
                        assign(sta, row_limit_tris[i][j]->st, fr->sizest);
                    else
                        assign(sta, midsoln[row_limit_tris[i][j]->id].st[rk_step], fr->sizest);
                    ///// end:::::save states before HR
                    for(tmpi = 0; tmpi < local_iter_N; tmpi++)
                    {
                        Subcell_limiting_3rd_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                                        midsoln,limit_store,rk_step,YES);

                        Subcell_limiting_2nd_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                                        midsoln,limit_store,rk_step,YES);

                        Subcell_limiting_1st_degreeP3_PNC(row_limit_tris[i][j],nbtri,
                                       midsoln,limit_store,rk_step,NO,YES, (tmpi == local_iter_N-1?YES:NO));
                        update_coef(row_limit_tris[i][j],midsoln,rk_step,fr,fr->sizest,NO);
                        update_tri_CVs(row_limit_tris[i][j],midsoln,limit_store,rk_step);
                    }
                    ///// restore original (before HR) states
                    if(rk_step == RK_STEP)
                        assign(row_limit_tris[i][j]->st, sta, fr->sizest);
                    else
                        assign(midsoln[row_limit_tris[i][j]->id].st[rk_step], sta, fr->sizest);
                    update_tri_CVs(row_limit_tris[i][j],midsoln,limit_store,rk_step);
                    ///// end:::: restore original (before HR) states
                    *********/
                }
                else
                {
                    printf("ERROR: Subcell_limiting_soln_accuracy_test(), unknown case\n");
                    clean_up(ERROR);
                }
            }
        }

        if(rk_step == RK_STEP)
            comput_time = YES;

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf);
                 !at_end_of_tri_list(tri,*surf); tri = tri->next)
            {
                if(tri->BC_type == SUBDOMAIN)
                    continue;

                // update coefficient
                update_coef(tri,midsoln,rk_step,fr,fr->sizest,comput_time);
                update_tri_CVs(tri,midsoln,limit_store,rk_step);
            }
        }

        for(i = 0; i < N_row; i++)
            free(row_limit_tris[i]);
}

EXPORT void WENO_FV_limiting_soln(
        Front    *fr,
        Mid_soln *midsoln,
        Limiting_store **limit_store,
        int      rk_step,
        int      HR_times,
        int      detect_extr,
        int      total_tri)
{
        TRI       *tri, *crsp_tri, *nbtri[3], *sten_tri[30];
        SURFACE   **surf;
        int       dim = 2, i, j, side, tmpi, cv_indx, indx;
        double     *cent;
        size_t    sizest = fr->sizest; 
        Locstate  st2;
        float     max_dt;

        int       loop_num = 0, comput_time = NO;
        static Locstate Roe_st[3] = {NULL, NULL, NULL}, sta;  //Roe mean value
        static float  **L[3], **R[3];
        float     conu[3][4][MAX_N_COEF];  // [side][# eqn][coef]
        float     outcome[4][MAX_N_COEF];
        char      s[1028];
        int       debug = NO, bigHR_round;
        Locstate  st, nst;
        float     old_avg[4], new_avg[4], max_vz = -1.0e10, max_Bz = -1.0e10;
        TRI       **limit_tris, **row_limit_tris[1500];
        int       N_alloc = 800, N_row, N_use =0, N, Row_alloc = 1500, local_iter_N = 1;
        int       sten_tri_num;

        if(Roe_st[0] == NULL)
        {
             for(i = 0; i < 3; i++)
             {
                 matrix(&L[i], 4, 4, sizeof(float));
                 matrix(&R[i], 4, 4, sizeof(float));
                 g_alloc_state(&Roe_st[i], fr->sizest);
             }
             g_alloc_state(&sta, fr->sizest);
        }
        if(mass_1st_row == NULL)
        {
            vector(&alltri_mass_1st_rows,total_tri,sizeof(Tri_mass_1st_rows));
            for(i = 0; i < total_tri; i++)
                alltri_mass_1st_rows[i].mass_1st_rows[0] = NULL;

            matrix(&mass_1st_row, 1, MAX_N_COEF,sizeof(double));
            // for(i = 0; i < 30; i++)
            //     matrix(&mass_1st_rows[i], 1, MAX_N_COEF,sizeof(double));

            vector(&alltri_HR_sten_2,total_tri,sizeof(Tri_HR_sten));
            for(i = 0; i < total_tri; i++)
            {
                alltri_HR_sten_2[i].HR_sten_set_3rd = alltri_HR_sten_2[i].HR_sten_set_2nd =
                alltri_HR_sten_2[i].HR_sten_set_1st = alltri_HR_sten_2[i].HR_sten_set_1st_pt = NO;
                alltri_HR_sten_2[i].WENO_cent_set = alltri_HR_sten_2[i].WENO_side_set[0] =
                alltri_HR_sten_2[i].WENO_side_set[1] = alltri_HR_sten_2[i].WENO_side_set[2] =
                alltri_HR_sten_2[i].WENO_rev_set[0] = alltri_HR_sten_2[i].WENO_rev_set[1] = 
                alltri_HR_sten_2[i].WENO_rev_set[2] = NO;
                ;
            }
        }

        vector(&limit_tris, N_alloc, sizeof(TRI*));
        row_limit_tris[0] = limit_tris;
        N_row = 1;

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf);
             !at_end_of_tri_list(tri,*surf); tri = tri->next)
            {
                cent = fg_centroid(tri);
                if(tri->BC_type == SUBDOMAIN)
                    continue;

                if(Boundary_tri(tri) || tri_on_phy_bdry(tri))
                {
                    if(debugging("g_sine"))
                        attach_g_sine_buffer_states_ver2(fr,midsoln,rk_step,tri,limit_store);
                    else if(debugging("Burgers"))
                    {
                        if(!debugging("diag_tri"))
                            attach_Burgers_buffer_states_ver2(fr,midsoln,rk_step,tri,limit_store);
                    }
                }
                limit_tris[N_use] = tri;
                N_use++;
                if(N_use == N_alloc)
                {
                    if(N_row +1 >= Row_alloc)
                    {
                        printf("ERROR: identify_limiting_candidates, exceed alloc. limit\n");
                        clean_up(ERROR);
                    }
                    vector(&limit_tris, N_alloc, sizeof(TRI*));
                    row_limit_tris[N_row] = limit_tris;
                    N_row++;
                    N_use = 0;
                }
            }
        }

        /***
        for(i = 0; i < N_row; i++)
        {
            if(i == N_row-1) N = N_use;
            else N = N_alloc;
            for(j = 0; j < N; j++)
            {
                for(side = 0; side < 3; side++)
                    nbtri[side] = Tri_on_side(row_limit_tris[i][j],side);

                negative_therm_press_after_B_reconstruction(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step,fr);
            }
        }
        ***/

        for(i = 0; i < N_row; i++)
        {
            if(i == N_row-1) N = N_use;
            else N = N_alloc;
            for(j = 0; j < N; j++)
            {
                // TMP
                // printf(" Subcell_limiting_soln_with_buffer_tris_multiple_times, on tri[%d],[%d][%d]\n",
                //      row_limit_tris[i][j]->id, i, j);
                for(side = 0; side < 3; side++)
                    nbtri[side] = Tri_on_side(row_limit_tris[i][j],side);

                if(MAX_N_COEF == 1)
                {
                    FV_P0(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step,fr);
                    continue;
                }
                else if(MAX_N_COEF == 3)
                {
                    // all_neighboring_cells(row_limit_tris[i][j],sten_tri,&sten_tri_num);
                    // set_HR_sten(nbtri, row_limit_tris[i][j],sten_tri,&sten_tri_num);
                    // limiting_P1_MHD(row_limit_tris[i][j],sten_tri, sten_tri_num, midsoln, rk_step, fr);
                    WENO_FV_P1(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step,fr);
                    // Subcell_limiting_1st_degreeP1_MHD_FV(row_limit_tris[i][j],nbtri,midsoln,rk_step,fr); // no good for finite volume
                }
                else if(MAX_N_COEF == 6)
                { 
                    // Subcell_limiting_2nd_degreeP2(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step);
                    // Subcell_limiting_1st_degreeP2(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step);

                    // limiting_2nd_degree(row_limit_tris[i][j],nbtri,midsoln,rk_step);
                    // Subcell_limiting_1st_degreeP2_multi_pt_limit(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step);
                    WENO_FV_P2(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step,fr);
                    // FV_P0(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step,fr);
                    // fix_unphysical_st();
                }
                else
                {
                    printf("ERROR: Subcell_limiting_soln_accuracy_test(), unknown case\n");
                    clean_up(ERROR);
                }
            }
        }

        /*******
        if(MAX_N_COEF == 6 || MAX_N_COEF == 3)
        // if(MAX_N_COEF == 6)
        {
            for(i = 0; i < N_row; i++)
            {
                if(i == N_row-1) N = N_use;
                else N = N_alloc;
                for(j = 0; j < N; j++)
                {
                    // if(MAX_N_COEF == 6)
                        Compute_flattener(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step,fr);
                    // else
                    // {
                    //     printf("ERROR: Subcell_limiting_soln_accuracy_test(), unknown case\n");
                    //     clean_up(ERROR);
                    // }
                }
            }

            for(i = 0; i < N_row; i++)
            {
                if(i == N_row-1) N = N_use;
                else N = N_alloc;
                for(j = 0; j < N; j++)
                {
                    // if(MAX_N_COEF == 6)
                        Positivity_preserve(row_limit_tris[i][j],nbtri,midsoln,limit_store,rk_step,fr);
                    // else
                    // {
                    //     printf("ERROR: Subcell_limiting_soln_accuracy_test(), unknown case\n");
                    //     clean_up(ERROR);
                    // }
                }
            }
        }
        *******/

        /*** Do this within the WENO_FV reconstruction
        for(i = 0; i < N_row; i++)
        {
            if(i == N_row-1) N = N_use;
            else N = N_alloc;
            for(j = 0; j < N; j++)
            {
                if(row_limit_tris[i][j]->redo_limiting == YES)
                     fix_unphysical_st(row_limit_tris[i][j],midsoln,rk_step,fr);
            }
        }
        ***/

        if(rk_step == RK_STEP)
        {
            for(surf = fr->mesh->surfaces; surf && *surf; surf++)
            {
                for (tri = first_tri(*surf);
                     !at_end_of_tri_list(tri,*surf); tri = tri->next)
                {
                    if(tri->BC_type == SUBDOMAIN)
                        continue;

                    // update coefficient
                    // update_coef(tri,midsoln,rk_step,fr,fr->sizest,comput_time);
                    // update_tri_CVs(tri,midsoln,limit_store,rk_step);
                    max_dt = (*fr->_time_step_on_tri)(fr, tri);
                    if(max_dt < newdt)
                        time_on_tri = tri;
                    newdt = min(newdt, max_dt);
                }
            }
        }

        if(debugging("decay_alfven") && fr->step % 80 == 0 && rk_step == RK_STEP)
        {
            for(surf = fr->mesh->surfaces; surf && *surf; surf++)
            {
                for (tri = first_tri(*surf);
                     !at_end_of_tri_list(tri,*surf); tri = tri->next)
                {
                    if(tri->BC_type == SUBDOMAIN)
                        continue; 
                    if(fabs( Mag(tri->st)[2]) >= max_Bz)
                        max_Bz = fabs( Mag(tri->st)[2]);

                    if(fabs( Mom(tri->st)[2]/Dens(tri->st)) >= max_vz)
                        max_vz = fabs(Mom(tri->st)[2]/Dens(tri->st));
                }
            }

            printf("At time = %g max_Vz = %g max_Bz = %g log10_max_Vz = %g\n", fr->time, max_vz, max_Bz, log10(max_vz)); 
        }

        for(i = 0; i < N_row; i++)
            free(row_limit_tris[i]);
}

LOCAL void negative_therm_press_after_B_reconstruction(
         TRI       *tri,
         TRI       *nbtri[],
         Mid_soln  *midsoln,
         Limiting_store **store,
         int       rk_step,
         Front     *fr)
{
         TRI       *crsp_tri, *sten_tris[50], *tmptri, *new_cand[50];
         int       i, j, side, nn_num, k, sten_n, num_tris_vertex, debug = NO;
         static int    dim = 2;
         double    OI[20], tmp_u[20], sum, wei[20], final_u[10][20], alpha[20], tmp;
         Locstate  st;
         double    entr, press, Gam, ke, new_engy, press2;
         double    u[MAXD], den, B_sqr, tmpB[2][MAX_N_COEF], MagB[3], B0[3], len_P2B, len_P0B;

         if(rk_step == RK_STEP)
             st = tri->st;
         else
             st = midsoln[tri->id].st[rk_step];

         Gam = gruneisen_gamma(st);
         press = POLY_thermal_pressure_MHD(st);

         for(i = 0; i < 2; i++)
         {
             assign(tmpB[i], dg_B(st)[i], sizeof(float)*MAX_N_COEF); 
             MagB[i] = Mag(st)[i]; 
         }
         len_P2B = sqr(MagB[0]) + sqr(MagB[1]);

         /***
         if(tri->id == 27563)
         {
             // printf("\n\n*****tri[%d] Entered negative_therm_press_after_B_reconstruction(),\n", tri->id);
             // verbose_print_state("before check negative press", st);
             for(i = 0; i < MAX_N_COEF; i++)
             {
                 tmpB[0][i] = dg_B(st)[0][i];
                 tmpB[1][i] = dg_B(st)[1][i];
             }
             for(i = 0; i < 2; i++)
                 MagB[i] = Mag(st)[i];
             printf("press %g of tri[%d] in negative_therm_press_after_B_reconstruction(), B2 %g\n", press, tri->id, sqr(MagB[0]) + sqr(MagB[1]));

             tri_B_P0_polynomial_reconstruction(tri, midsoln,rk_step, B0);
             press = POLY_thermal_pressure_MHD(st);
             printf("press %g of tri[%d] by P0 B reconstruction, B2 %g\n", press, tri->id, sqr(B0[0]) + sqr(B0[1]));

             for(i = 0; i < 2; i++)
             {
                 Mag(st)[i] = MagB[i];
                 assign(dg_B(st)[i], tmpB[i],sizeof(float)*MAX_N_COEF);
             }

             debug = YES;
         }
         ***/

         tri_B_P0_polynomial_reconstruction(tri, midsoln,rk_step, B0);
         press2 = POLY_thermal_pressure_MHD(st);
         len_P0B = sqr(B0[0]) + sqr(B0[1]);

         if(press < 0.0)
         {
             // tri_B_P0_polynomial_reconstruction(tri, midsoln,rk_step, B0);
             if(len_P0B < len_P2B)
             {
                 for(i = 0; i < 2; i++)
                 {
                     Mag(st)[i] = MagB[i];
                     assign(dg_B(st)[i], tmpB[i],sizeof(float)*MAX_N_COEF);
                 }
             }

             entr = Ent(st);
             if(entr < 0.0)
             {
                 printf("ERROR:  negative_therm_press_after_B_reconstruction()\n");
                 printf("TRI[%d] has negative entropy\n", tri->id);
                 verbose_print_state("fixed state",st);
                 clean_up(ERROR);
             }
             press = entr*(pow(Dens(st), Gam));
             for(i = 0; i < dim; i++)
                 u[i] = Mom(st)[i]/Dens(st);
             for (ke = 0.0, i = 0; i < dim; ++i)
                 ke += sqr(u[i]);
             ke *= 0.5*Dens(st);
             for(B_sqr = 0.0, i = 0; i < 3; i++)
                 B_sqr += sqr(Mag(st)[i]);

             new_engy = press/Gam + ke + 0.5*B_sqr;
             Energy(st) = new_engy;
             Energy(midsoln[tri->id].st[0]) = new_engy;

             if(debug == YES)
             {
                 verbose_print_state("pre-fixed state",st);
             }
             return;
         }

         // tri_B_P0_polynomial_reconstruction(tri, midsoln,rk_step, B0);
         // len_P0B = sqr(B0[0]) + sqr(B0[1]);
         // press = POLY_thermal_pressure_MHD(st);

         for(i = 0; i < 2; i++)
         {
             Mag(st)[i] = MagB[i];
             assign(dg_B(st)[i], tmpB[i],sizeof(float)*MAX_N_COEF);
         }

         if(press2 < 0.0)
         {
             entr = Ent(st);
             if(entr < 0.0)
             {
                 printf("ERROR:  negative_therm_press_after_B_reconstruction() 2\n");
                 printf("TRI[%d] has negative entropy\n", tri->id);
                 verbose_print_state("fixed state",st);
                 clean_up(ERROR);
             }
             press = entr*(pow(Dens(st), Gam));
             for(i = 0; i < dim; i++)
                 u[i] = Mom(st)[i]/Dens(st);
             for (ke = 0.0, i = 0; i < dim; ++i)
                 ke += sqr(u[i]);
             ke *= 0.5*Dens(st);
             for(B_sqr = 0.0, i = 0; i < 3; i++)
                 B_sqr += sqr(Mag(st)[i]);

             new_engy = press/Gam + ke + 0.5*B_sqr;
             Energy(st) = new_engy;
             Energy(midsoln[tri->id].st[0]) = new_engy;
         }
}

LOCAL void Compute_flattener(
         TRI       *tri,
         TRI       *nbtri[3],
         Mid_soln  *midsoln,
         Limiting_store **store,
         int       rk_step,
         Front     *fr)
{
         TRI       *crsp_tri, *sten_tris[100], *tmptri, *new_cand[60];
         int       side, N_between, N_cells = 0, i;
         Locstate  neighbst[100], st;
         double    mag_sonic_speed = HUGE_VAL, div_v, diam = fg_diam(tri), k1 = 0.4, flattener, k2 = 0.4;
         double    rho_min =  HUGE_VAL,  rho_max = -HUGE_VAL;
         double    p_min =  HUGE_VAL,  p_max = -HUGE_VAL;

         for(side = 0; side < 3; side++)
         {
             tris_between_edge_neighbrs(tri, NULL, side, new_cand, &N_between);
             for(i = 2; i < N_between; i++)
             {
                 sten_tris[N_cells] = new_cand[i];
                 N_cells++;
             }
         }
         for(side = 0; side < 3; side++)
         {
             sten_tris[N_cells] = Tri_on_side(tri,side);
             N_cells++;
         }

         // printf("rk_step = %d, N_cells = %d, tri[%d]\n", rk_step, N_cells, tri->id);
         // for(i = 0; i < N_cells; i++)
         //     printf("adj %d tri[%d]\n", i, sten_tris[i]->id);

         if(rk_step == RK_STEP)
         {
             st = tri->st;
             for(i = 0; i < N_cells; i++)
                 neighbst[i] = sten_tris[i]->st;
         }
         else
         {
             st = midsoln[tri->id].st[rk_step];
             for(i = 0; i < N_cells; i++)
                 neighbst[i] = midsoln[sten_tris[i]->id].st[rk_step];
         }

         div_v = dg_Mom(st)[0][1] + dg_Mom(st)[1][1];

         // printf("before compute flatern rk_step = %d, N_cells = %d, tri[%d]\n", rk_step, N_cells, tri->id);

         for(i = 0; i < N_cells; i++)
         {
             mag_sonic_speed = min(mag_sonic_speed, POLY_magnetosonic_speed_MHD(neighbst[i]));
             rho_min = min(rho_min, Dens(neighbst[i]));
             rho_max = max(rho_max, Dens(neighbst[i]));
             p_min = min(p_min, POLY_thermal_pressure_MHD(neighbst[i]));
             p_max = max(p_max, POLY_thermal_pressure_MHD(neighbst[i]));
         }

         rho_min = min(rho_min, Dens(st));
         rho_max = max(rho_max, Dens(st));
         p_min = min(p_min, POLY_thermal_pressure_MHD(st));
         p_max = max(p_max, POLY_thermal_pressure_MHD(st));

         p_min = min(1.0e-14, POLY_thermal_pressure_MHD(st));

         tri->flattener = flattener = min(1.0, max(0.0, -(diam*div_v + k1*mag_sonic_speed)/(k1*mag_sonic_speed) ));

         tri->rho_lim[0] = rho_min*(1-k2 + k2*flattener);
         tri->rho_lim[1] = rho_max*(1+k2 - k2*flattener);

         tri->p_lim[0] = p_min*(1-k2 + k2*flattener);
         tri->p_lim[1] = p_max*(1+k2 - k2*flattener);
}

LOCAL void update_CVs(
        Front    *fr,
        Mid_soln *midsoln,
        Limiting_store **store,
        int      rk_step)
{
        TRI       *tri, *crsp_tri, *nbtri[3];
        SURFACE   **surf;
        int       cv_indx, indx;
        Locstate  soln;

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf);
                 !at_end_of_tri_list(tri,*surf); tri = tri->next)
            {
                if(tri->BC_type == SUBDOMAIN)
                    continue;

                // Assign to CV and map polynomial to one defined on CV center
                if(rk_step == RK_STEP)
                {
                    for(cv_indx = 0; cv_indx < N_PART; cv_indx++)
                    {
                        for(indx = 0; indx < MAX_N_COEF; indx++)
                        {
                            tri->cv_soln->cv_dg_rho[cv_indx][indx] = dg_Dens(tri->st)[indx];
                            tri->cv_soln->cv_dg_e[cv_indx][indx] = dg_Energy(tri->st)[indx];
                            tri->cv_soln->cv_dg_m[0][cv_indx][indx] = dg_Mom(tri->st)[0][indx];
                            tri->cv_soln->cv_dg_m[1][cv_indx][indx] = dg_Mom(tri->st)[1][indx];
                        }
                    }
                    if(MAX_N_COEF ==  6)
                        map_poly_SV_to_CV_p2(tri);
                    else if(MAX_N_COEF ==  10)
                        map_poly_SV_to_CV_p3(tri);
                }
                else
                {
                    soln = midsoln[tri->id].st[rk_step];
                    for(cv_indx = 0; cv_indx < N_PART; cv_indx++)
                    {
                        for(indx = 0; indx < MAX_N_COEF; indx++)
                        {
                            store[rk_step][tri->id].cv_dg_rho[cv_indx][indx] = dg_Dens(soln)[indx];
                            store[rk_step][tri->id].cv_dg_e[cv_indx][indx] = dg_Energy(soln)[indx];
                            store[rk_step][tri->id].cv_dg_m[0][cv_indx][indx] = dg_Mom(soln)[0][indx];
                            store[rk_step][tri->id].cv_dg_m[1][cv_indx][indx] = dg_Mom(soln)[1][indx];
                        }
                    }
                    if(MAX_N_COEF ==  6)
                        map_poly_SV_to_CV_ver2_p2(tri, store[rk_step]);
                    else
                        map_poly_SV_to_CV_ver2_p3(tri, store[rk_step]);
                }
            }
        }
}

LOCAL void update_tri_CVs(
        TRI      *tri,
        Mid_soln *midsoln,
        Limiting_store **store,
        int      rk_step)
{
                int       cv_indx, indx;
                Locstate  soln;

                // Assign to CV and map polynomial to one defined on CV center
                if(rk_step == RK_STEP)
                {
                    for(cv_indx = 0; cv_indx < N_PART; cv_indx++)
                    {
                        for(indx = 0; indx < MAX_N_COEF; indx++)
                        {
                            tri->cv_soln->cv_dg_rho[cv_indx][indx] = dg_Dens(tri->st)[indx];
                            tri->cv_soln->cv_dg_e[cv_indx][indx] = dg_Energy(tri->st)[indx];
                            tri->cv_soln->cv_dg_m[0][cv_indx][indx] = dg_Mom(tri->st)[0][indx];
                            tri->cv_soln->cv_dg_m[1][cv_indx][indx] = dg_Mom(tri->st)[1][indx];
                        }
                    }
                    if(MAX_N_COEF ==  6)
                        map_poly_SV_to_CV_p2(tri);
                    else if(MAX_N_COEF ==  10)
                        map_poly_SV_to_CV_p3(tri);
                }
                else
                {
                    soln = midsoln[tri->id].st[rk_step];
                    for(cv_indx = 0; cv_indx < N_PART; cv_indx++)
                    {
                        for(indx = 0; indx < MAX_N_COEF; indx++)
                        {
                            store[rk_step][tri->id].cv_dg_rho[cv_indx][indx] = dg_Dens(soln)[indx];
                            store[rk_step][tri->id].cv_dg_e[cv_indx][indx] = dg_Energy(soln)[indx];
                            store[rk_step][tri->id].cv_dg_m[0][cv_indx][indx] = dg_Mom(soln)[0][indx];
                            store[rk_step][tri->id].cv_dg_m[1][cv_indx][indx] = dg_Mom(soln)[1][indx];
                        }
                    }
                    if(MAX_N_COEF ==  6)
                        map_poly_SV_to_CV_ver2_p2(tri, store[rk_step]);
                    else
                        map_poly_SV_to_CV_ver2_p3(tri, store[rk_step]);
                }
}

LOCAL void comput_Roe_ver2(
         TRI       *tri,
         TRI       *nbtri[3],
         Mid_soln  *midsoln,
         Limiting_store **limit_store,
         int       rk_iter,
         Locstate  *Roe_st,
         float     **L[],
         float     **R[])
{
         Locstate st, nbst[3];
         int      i, side, dim = 2;
         float    t[3], nor[3];

         if(rk_iter == RK_STEP)
         {
             st = tri->st;
             for(i = 0; i < 3; i++)
                 nbst[i] = nbtri[i]->st;
         }
         else
         {
             st = midsoln[tri->id].st[rk_iter];
             for(i = 0; i < 3; i++)
                 nbst[i] = midsoln[nbtri[i]->id].st[rk_iter];
         }
         for(i = 0; i < 3; i++)
             assign(Roe_st[i], st, Params(st)->sizest);

         for(side = 0; side < 3; side++)
         {
             for(i = 0; i < dim; i++)
                 t[i] = fg_side_vector(tri)[side][i];
             nor[0] = t[1];
             nor[1] = -t[0];
             compute_Roemean(Roe_st[side],st,nbst[side], nor);

             if(invalid_state("comput_Roe",Roe_st[side],NO))
             {
                 printf("ERROR: comput_Roe_ver2, tri[%d] side[%d], nbstri[%d], Roe mean invalid\n",
                          tri->id, side, nbtri[side]->id);
                 verbose_print_state("st", st);
                 verbose_print_state("nb_st", nbst[side]);
                 verbose_print_state("Roe_mean", Roe_st[side]);
                 print_tri_crds(tri);
                 clean_up(ERROR);
             }

             LR_matrix_in_dir(nor, Roe_st[side], L[side], R[side]);
         }
}

// compute 2d, Roe mean value
// in the normal direction.
LOCAL void compute_Roemean(
        Locstate  Roe,
        Locstate  st1,
        Locstate  st2,
        float     *n)
{
        float     u[2], r1, r2, H, H1, H2, nu1[2], nu2[2], u1[2], u2[2];
        float     Gam, P, t[2];

        Gam = gruneisen_gamma(st1);

        r1 = sqrt(Dens(st1));
        r2 = sqrt(Dens(st2));
        Dens(Roe) = sqrt(Dens(st1)*Dens(st2));

        u1[0] = Mom(st1)[0]/Dens(st1); u1[1] = Mom(st1)[1]/Dens(st1);
        u2[0] = Mom(st2)[0]/Dens(st2); u2[1] = Mom(st2)[1]/Dens(st2);

        t[0] = - n[1];  t[1] = n[0];
        nu1[0] = u1[0]*n[0] + u1[1]*n[1];
        nu1[1] = u1[0]*t[0] + u1[1]*t[1];
        nu2[0] = u2[0]*n[0] + u2[1]*n[1];
        nu2[1] = u2[0]*t[0] + u2[1]*t[1];

        u[0] = (r1*nu1[0] + r2*nu2[0])/(r1 + r2);
        u[1] = (r1*nu1[1] + r2*nu2[1])/(r1 + r2);

        H1 = (Energy(st1) + pressure(st1))/Dens(st1);
        H2 = (Energy(st2) + pressure(st2))/Dens(st2);

        H = (r1*H1 + r2*H2)/(r1 + r2);
        P = (H*Dens(Roe) - 0.5*(sqr(u[0]) + sqr(u[1]))*Dens(Roe) )*Gam/(Gam+1.0);
        Mom(Roe)[0] = Dens(Roe)*u[0];
        Mom(Roe)[1] = Dens(Roe)*u[1];
        Energy(Roe) = 0.5*Dens(Roe)*(sqr(u[0]) + sqr(u[1])) + P/Gam;
}


// compute 2d, Roe mean value
// in the normal direction.
EXPORT void compute_Roemean_MHD(
        Locstate  st1,
        Locstate  st2,
        float     *n,
        float     *lambda1,
        float     *lambda7)
{
        float     u[3], r1, r2, H, H1, H2, nu1[3], nu2[3], u1[3], u2[3];
        float     Gam, P, t[2], B1[3], B2[3], nB1[3], nB2[3], B[3];
        int       i;
        float     X, del_B_sqr, jump_B_sqr, B1_sqr = 0.0, B2_sqr = 0.0, u_sqr = 0.0; 
        float     small_b_bar[3], small_b_bar_sqr = 0.0;
        float     a_bar_sqr, a_star_sqr;
        float     cf_sqr, cs_sqr, Dens_Roe;

        Gam = gruneisen_gamma(st1);

        r1 = sqrt(Dens(st1));
        r2 = sqrt(Dens(st2));

        Dens_Roe = sqrt(Dens(st1)*Dens(st2));

        for(i = 0; i < 3; i++)
        { 
            u1[i] = Mom(st1)[i]/Dens(st1);
            u2[i] = Mom(st2)[i]/Dens(st2);
            B1[i] = Mag(st1)[i];
            B2[i] = Mag(st2)[i];
            B1_sqr += sqr(B1[i]);
            B2_sqr += sqr(B2[i]);
        }

        t[0] = - n[1];  t[1] = n[0];

        nu1[0] = u1[0]*n[0] + u1[1]*n[1];
        nu1[1] = u1[0]*t[0] + u1[1]*t[1];
        nu2[0] = u2[0]*n[0] + u2[1]*n[1];
        nu2[1] = u2[0]*t[0] + u2[1]*t[1];
        nu1[2] = u1[2];
        nu2[2] = u2[2];

        nB1[0] = B1[0]*n[0] + B1[1]*n[1];
        nB1[1] = B1[0]*t[0] + B1[1]*t[1];
        nB2[0] = B2[0]*n[0] + B2[1]*n[1];
        nB2[1] = B2[0]*t[0] + B2[1]*t[1];
        nB1[2] = B1[2];
        nB2[2] = B2[2];

        for(i = 0; i < 3; i++)
        {
            // u[0] = (r1*nu1[0] + r2*nu2[0])/(r1 + r2);
            // u[1] = (r1*nu1[1] + r2*nu2[1])/(r1 + r2);
            u[i] = (r1*nu1[i] + r2*nu2[i])/(r1 + r2);
            B[i] = (r1*nB2[i] + r2*nB1[i])/(r1 + r2);
            u_sqr += sqr(u[i]);

            small_b_bar[i] = B[i]/sqrt(Dens_Roe);
            small_b_bar_sqr += sqr(small_b_bar[i]);
        }

        del_B_sqr = 0.0;
        for(i = 0; i < 3; i++)
            del_B_sqr += sqr(B1[i] - B2[i]);
        X = 0.5*del_B_sqr/sqr(r1 + r2); 

        jump_B_sqr = X*(Dens(st1) - Dens(st2));
        for(i = 0; i < 3; i++)
            jump_B_sqr += B[i]*(B1[i] - B2[i]); 

        H1 = (Energy(st1) + POLY_thermal_pressure_MHD(st1) + 0.5*B1_sqr)/Dens(st1);
        H2 = (Energy(st2) + POLY_thermal_pressure_MHD(st2) + 0.5*B2_sqr)/Dens(st2);

        H = (r1*H1 + r2*H2)/(r1 + r2);

        a_bar_sqr = Gam*(H - u_sqr*0.5 - small_b_bar_sqr) - (Gam-1)*X;
        a_star_sqr = a_bar_sqr + small_b_bar_sqr;

        cf_sqr = 0.5*( a_star_sqr + sqrt( sqr(a_star_sqr) - 4.0*a_bar_sqr*sqr(small_b_bar[0])  ) );
        cs_sqr = 0.5*( a_star_sqr - sqrt( sqr(a_star_sqr) - 4.0*a_bar_sqr*sqr(small_b_bar[0])  ) );
        // P = (H*Dens(Roe) - 0.5*(sqr(u[0]) + sqr(u[1]))*Dens(Roe) )*Gam/(Gam+1.0);
        // Mom(Roe)[0] = Dens(Roe)*u[0];
        // Mom(Roe)[1] = Dens(Roe)*u[1];
        // Energy(Roe) = 0.5*Dens(Roe)*(sqr(u[0]) + sqr(u[1])) + P/Gam;
        *lambda1 = u[0] - sqrt(cf_sqr); 
        *lambda7 = u[0] + sqrt(cf_sqr); 

        if(isnan(*lambda1) || isnan(*lambda7))
        {
            printf("ERROR: compute_Roemean_MHD(), nan state\n");
            clean_up(ERROR);
        }
}



// Eigenvalues D = {v_n -a, v_n, v_n +a, v_n}
// v_n -- velocity in normal direction
// a -- sound speed
LOCAL void      LR_matrix_in_dir(
        float     *nor,
        Locstate  st,
        float     **L,
        float     **R)
{
        float    Gam, gam; // gamma-1.0
        float    v[MAXD], a, h, ek, h0, vn;
        // float    e, p;

        Gam = gruneisen_gamma(st);
        gam = Gam + 1.0;
        v[0] = Mom(st)[0]/Dens(st);
        v[1] = Mom(st)[1]/Dens(st);

        a = sound_speed(st);
        // e = sqr(a)/(Gam*(Gam+1));
        h = sqr(a)/Gam;
        // p = Dens(st)*sqr(a)/Gam; 
        ek = 0.5*(sqr(v[0]) + sqr(v[1]));
        h0 = h + ek;
        vn = v[0]*nor[0] + v[1]*nor[1];

        R[0][0] = R[0][1] = R[0][2] = 1.0; R[0][3] = 0.0;
        R[1][0] = v[0] - a*nor[0]; R[1][1] = v[0]; R[1][2] = v[0] + a*nor[0]; R[1][3] = nor[1];
        R[2][0] = v[1] - a*nor[1]; R[2][1] = v[1]; R[2][2] = v[1] + a*nor[1]; R[2][3] = -nor[0];
        R[3][0] = h0 - a*vn; R[3][1] = ek; R[3][2] = h0 + a*vn; R[3][3] = v[0]*nor[1]-v[1]*nor[0];

        L[0][0] = (Gam*ek + a*vn)/(2.0*sqr(a)); L[0][1] = ((1-gam)*v[0] - a*nor[0])/(2.0*sqr(a));
        L[0][2] = ((1-gam)*v[1] - a*nor[1])/(2.0*sqr(a)); L[0][3] = Gam/(2.0*sqr(a));

        L[1][0] = (sqr(a) - Gam*ek)/(sqr(a)); L[1][1] = (Gam*v[0])/(sqr(a)); L[1][2] = (Gam*v[1])/(sqr(a));
        L[1][3] = (1-gam)/(sqr(a));

        L[2][0] = (Gam*ek - a*vn)/(2.0*sqr(a)); L[2][1] = ((1-gam)*v[0] + a*nor[0])/(2.0*sqr(a));
        L[2][2] = ((1-gam)*v[1] + a*nor[1])/(2.0*sqr(a)); L[2][3] = Gam/(2.0*sqr(a));

        L[3][0] = v[1]*nor[0] - v[0]*nor[1]; L[3][1] = nor[1]; L[3][2] = -nor[0]; L[3][3] = 0.0;
}

LOCAL void convert_cand_conu_charu(
         TRI       *tri,
         TRI       *nbtri[3],
         Mid_soln  *midsoln,
         Limiting_store **store, 
         int       rk_iter,
         float     **L)
{
         Locstate st, nbst[3];
         int      i, cv_indx, indx;

         if(rk_iter == RK_STEP)
         {
             st = tri->st;
             for(i = 0; i < 3; i++)
                 nbst[i] = nbtri[i]->st;
         }
         else
         {
             st = midsoln[tri->id].st[rk_iter];
             for(i = 0; i < 3; i++)
                 nbst[i] = midsoln[nbtri[i]->id].st[rk_iter];
         }

         convert_con_char(st, L, st);
         if(rk_iter == RK_STEP)
         {
             for(cv_indx = 0; cv_indx < N_PART; cv_indx++)
             {
                 for(indx = 0; indx < MAX_N_COEF; indx++)
                 {
                     tri->cv_soln->cv_dg_rho[cv_indx][indx] = dg_Dens(st)[indx];
                     tri->cv_soln->cv_dg_e[cv_indx][indx] = dg_Energy(st)[indx];
                     tri->cv_soln->cv_dg_m[0][cv_indx][indx] = dg_Mom(st)[0][indx];
                     tri->cv_soln->cv_dg_m[1][cv_indx][indx] = dg_Mom(st)[1][indx];
                 }
             }
             if(MAX_N_COEF ==  6)
                 map_poly_SV_to_CV_p2(tri);
             else if(MAX_N_COEF ==  10)
                 map_poly_SV_to_CV_p3(tri);
         } 
         else 
         {
             for(cv_indx = 0; cv_indx < N_PART; cv_indx++)
             {
                 for(indx = 0; indx < MAX_N_COEF; indx++)
                 {
                     store[rk_iter][tri->id].cv_dg_rho[cv_indx][indx] = dg_Dens(st)[indx];
                     store[rk_iter][tri->id].cv_dg_e[cv_indx][indx] = dg_Energy(st)[indx];
                     store[rk_iter][tri->id].cv_dg_m[0][cv_indx][indx] = dg_Mom(st)[0][indx];
                     store[rk_iter][tri->id].cv_dg_m[1][cv_indx][indx] = dg_Mom(st)[1][indx];
                 }
             }
             if(MAX_N_COEF ==  6)
                 map_poly_SV_to_CV_ver2_p2(tri, store[rk_iter]);
             else
                 map_poly_SV_to_CV_ver2_p3(tri, store[rk_iter]);
         }

         if (isnan(Dens(st)))
         {
             printf("ERROR: convert_cand_conu_charu, tri[%d] state after convertion, nan\n", tri->id);
             clean_up(ERROR);
         }
         for(i = 0; i < 3; i++)
         {
             convert_con_char(nbst[i], L, nbst[i]);
             if (isnan(Dens(nbst[i])))
             {
                 printf("ERROR: convert_cand_conu_charu, tri[%d] nb state[%d] after convertion, nan\n", tri->id, i);
                 clean_up(ERROR);
             }
             if(rk_iter == RK_STEP)
             {
                 for(cv_indx = 0; cv_indx < N_PART; cv_indx++)
                 {
                     for(indx = 0; indx < MAX_N_COEF; indx++)
                     {
                         nbtri[i]->cv_soln->cv_dg_rho[cv_indx][indx] = dg_Dens(nbst[i])[indx];
                         nbtri[i]->cv_soln->cv_dg_e[cv_indx][indx] = dg_Energy(nbst[i])[indx];
                         nbtri[i]->cv_soln->cv_dg_m[0][cv_indx][indx] = dg_Mom(nbst[i])[0][indx];
                         nbtri[i]->cv_soln->cv_dg_m[1][cv_indx][indx] = dg_Mom(nbst[i])[1][indx];
                     }
                 }
                 if(MAX_N_COEF ==  6)
                     map_poly_SV_to_CV_p2(nbtri[i]);
                 else if(MAX_N_COEF ==  10)
                     map_poly_SV_to_CV_p3(nbtri[i]);
             }
             else 
             {
                 for(cv_indx = 0; cv_indx < N_PART; cv_indx++)
                 {
                     for(indx = 0; indx < MAX_N_COEF; indx++)
                     {
                         store[rk_iter][nbtri[i]->id].cv_dg_rho[cv_indx][indx] = dg_Dens(nbst[i])[indx];
                         store[rk_iter][nbtri[i]->id].cv_dg_e[cv_indx][indx] = dg_Energy(nbst[i])[indx];
                         store[rk_iter][nbtri[i]->id].cv_dg_m[0][cv_indx][indx] = dg_Mom(nbst[i])[0][indx];
                         store[rk_iter][nbtri[i]->id].cv_dg_m[1][cv_indx][indx] = dg_Mom(nbst[i])[1][indx];
                     }
                 }
                 if(MAX_N_COEF ==  6)
                     map_poly_SV_to_CV_ver2_p2(nbtri[i], store[rk_iter]);
                 else
                     map_poly_SV_to_CV_ver2_p3(nbtri[i], store[rk_iter]);
             }
         }
}

// The ghost states are set in buffer tris.
LIB_LOCAL void twod_riemann_attach_buffer_states(
        Front    *fr,
        Mid_soln *midsoln,
        int      rk_step,
        TRI      *tri,
        Limiting_store **store)
{
        TRI       *nbtri[3];
        int       side, i, dim = 2, cv_indx, indx;
        float     nor[3], t[3], u, crds[3], fcent[3];
        Locstate  st, gst;
        size_t    sizest = fr->sizest;
        double    *cent, pt[MAXD], con_u[4], **Lmass_matrix;
        float     dirx[2] = {1.0, 0.0}, ans;
        GRID_DIRECTION Gside; 

        if(rk_step == RK_STEP)
            st = tri->st;
        else
            st = midsoln[tri->id].st[rk_step];
        for(side = 0; side < 3; side++)
            nbtri[side] = Tri_on_side(tri,side);

        for(side = 0; side < 3; side++)
        {
            if(nbtri[side]->BC_type == SUBDOMAIN)
            {
                /// NEW
                for(indx = 0; indx < 3; indx++)
                {
                    if(tri ==  Tri_on_side(nbtri[side],indx))
                        break;
                }
                for(i = 0; i < dim; i++)
                    t[i] = fg_side_vector(nbtri[side])[indx][i];
                nor[0] = t[1];
                nor[1] = -t[0];
                // Convert to x-y coord.
                ans = fabs(nor[0]*dirx[0] + nor[1]*dirx[1]);
                if(ans > 0.5 && nor[0] < -0.5)
                    Gside = EAST;
                else if(ans > 0.5 && nor[0] > 0.5)
                    Gside = WEST;
                else if(ans < 0.5 && nor[1] < -0.5)
                    Gside = NORTH;
                else
                    Gside = SOUTH;
                if(rk_step == RK_STEP)
                    gst = nbtri[side]->st;
                else
                    gst = midsoln[nbtri[side]->id].st[rk_step];
                assign(gst, st, sizest);
                Lmass_matrix = nbtri[side]->Lmass_matrix;
                switch(Gside)
                {
                case EAST:
                case WEST:
                    dg_Dens(gst)[1] *= -1.0;
                    dg_Dens(gst)[4] *= -1.0;
                    dg_Dens(gst)[6] *= -1.0;
                    dg_Dens(gst)[8] *= -1.0;
                    dg_Energy(gst)[1] *= -1.0;
                    dg_Energy(gst)[4] *= -1.0;
                    dg_Energy(gst)[6] *= -1.0;
                    dg_Energy(gst)[8] *= -1.0;
                    dg_Mom(gst)[1][1] *= -1.0;
                    dg_Mom(gst)[1][4] *= -1.0;
                    dg_Mom(gst)[1][6] *= -1.0;
                    dg_Mom(gst)[1][8] *= -1.0;
                    dg_Mom(gst)[0][1] *= -1.0;
                    dg_Mom(gst)[0][4] *= -1.0;
                    dg_Mom(gst)[0][6] *= -1.0;
                    dg_Mom(gst)[0][8] *= -1.0;
                break;
                case NORTH:
                case SOUTH:
                    dg_Dens(gst)[2] *= -1.0;
                    dg_Dens(gst)[4] *= -1.0;
                    dg_Dens(gst)[7] *= -1.0;
                    dg_Dens(gst)[9] *= -1.0;
                    dg_Energy(gst)[2] *= -1.0;
                    dg_Energy(gst)[4] *= -1.0;
                    dg_Energy(gst)[7] *= -1.0;
                    dg_Energy(gst)[9] *= -1.0;
                    dg_Mom(gst)[1][2] *= -1.0;
                    dg_Mom(gst)[1][4] *= -1.0;
                    dg_Mom(gst)[1][7] *= -1.0;
                    dg_Mom(gst)[1][9] *= -1.0;
                    dg_Mom(gst)[0][2] *= -1.0;
                    dg_Mom(gst)[0][4] *= -1.0;
                    dg_Mom(gst)[0][7] *= -1.0;
                    dg_Mom(gst)[0][9] *= -1.0;
                break;
                }
                // Compute average soln
                Dens(gst) = 0.0;
                Mom(gst)[0] = 0.0;
                Mom(gst)[1] = 0.0;
                Energy(gst) = 0.0;
                for(indx = 0; indx < MAX_N_COEF; indx++)
                {
                    Dens(gst) += dg_Dens(gst)[indx]*Lmass_matrix[0][indx];
                    Mom(gst)[0] += dg_Mom(gst)[0][indx]*Lmass_matrix[0][indx];
                    Mom(gst)[1] += dg_Mom(gst)[1][indx]*Lmass_matrix[0][indx];
                    Energy(gst) += dg_Energy(gst)[indx]*Lmass_matrix[0][indx];
                }
                Dens(gst) /= Lmass_matrix[0][0];
                Mom(gst)[0] /= Lmass_matrix[0][0];
                Mom(gst)[1] /= Lmass_matrix[0][0];
                Energy(gst) /= Lmass_matrix[0][0];

                // The subdomain zero level state, for reconstruction purpose (P1 projection)
                assign(midsoln[nbtri[side]->id].st[0], gst, sizest);
                /// END NEW
            }
        }

        if(store != NULL)
        {
            for(side = 0; side < 3; side++)
            {
                if(nbtri[side]->BC_type == SUBDOMAIN)
                {
                    if(rk_step == RK_STEP)
                    {
                        gst = nbtri[side]->st;
                        for(cv_indx = 0; cv_indx < 4; cv_indx++)
                        {
                            for(indx = 0; indx < MAX_N_COEF; indx++)
                            {
                                nbtri[side]->cv_soln->cv_dg_rho[cv_indx][indx] = dg_Dens(gst)[indx];
                                nbtri[side]->cv_soln->cv_dg_e[cv_indx][indx] = dg_Energy(gst)[indx];
                                nbtri[side]->cv_soln->cv_dg_m[0][cv_indx][indx] = dg_Mom(gst)[0][indx];
                                nbtri[side]->cv_soln->cv_dg_m[1][cv_indx][indx] = dg_Mom(gst)[1][indx];
                            }
                        }
                        if(6 == MAX_N_COEF)
                            map_poly_SV_to_CV_p2(nbtri[side]);
                        else if(10 == MAX_N_COEF)
                            map_poly_SV_to_CV_p3(nbtri[side]);
                    }
                    else
                    {
                        gst = midsoln[nbtri[side]->id].st[rk_step];
                        for(cv_indx = 0; cv_indx < 4; cv_indx++)
                        {
                            for(indx = 0; indx < MAX_N_COEF; indx++)
                            {
                                store[rk_step][nbtri[side]->id].cv_dg_rho[cv_indx][indx] =
                                              dg_Dens(gst)[indx];
                                store[rk_step][nbtri[side]->id].cv_dg_e[cv_indx][indx] =
                                              dg_Energy(gst)[indx];
                                store[rk_step][nbtri[side]->id].cv_dg_m[0][cv_indx][indx] =
                                              dg_Mom(gst)[0][indx];
                                store[rk_step][nbtri[side]->id].cv_dg_m[1][cv_indx][indx] =
                                              dg_Mom(gst)[1][indx];
                            }
                        }
                        if(6 == MAX_N_COEF)
                            map_poly_SV_to_CV_ver2_p2(nbtri[side], store[rk_step]);
                        else if(10 == MAX_N_COEF)
                            map_poly_SV_to_CV_ver2_p3(nbtri[side], store[rk_step]);
                    }

                    // The subdomain zero level state, for reconstruction purpose (P1 projection)
                    for(cv_indx = 0; cv_indx < 4; cv_indx++)
                    {
                        for(indx = 0; indx < MAX_N_COEF; indx++)
                        {
                            store[0][nbtri[side]->id].cv_dg_rho[cv_indx][indx] =
                                          dg_Dens(gst)[indx];
                            store[0][nbtri[side]->id].cv_dg_e[cv_indx][indx] =
                                          dg_Energy(gst)[indx];
                            store[0][nbtri[side]->id].cv_dg_m[0][cv_indx][indx] =
                                          dg_Mom(gst)[0][indx];
                            store[0][nbtri[side]->id].cv_dg_m[1][cv_indx][indx] =
                                          dg_Mom(gst)[1][indx];
                        }
                    }
                    if(6 == MAX_N_COEF)
                        map_poly_SV_to_CV_ver2_p2(nbtri[side], store[0]);
                    else if(10 == MAX_N_COEF)
                        map_poly_SV_to_CV_ver2_p3(nbtri[side], store[0]);
                }
            }
        }
}


LOCAL int find_char_dir(
         TRI       *tri,
         TRI       *nbtri[3],
         Mid_soln  *midsoln,
         int       rk_iter)
{
         Locstate st, nbst[3];
         int      i, side = -1;
         float    den, nbden[3], jump;

         if(rk_iter == RK_STEP)
         {
             st = tri->st;
             for(i = 0; i < 3; i++)
                 nbst[i] = nbtri[i]->st;
         }
         else
         {
             st = midsoln[tri->id].st[rk_iter];
             for(i = 0; i < 3; i++)
                 nbst[i] = midsoln[nbtri[i]->id].st[rk_iter];
         }

         jump = -HUGE_VAL;
         den = Dens(st);
         for(i = 0; i < 3; i++)
         {
             nbden[i] = Dens(nbst[i]); 
             if(fabs(nbden[i] - den) > jump)
             {
                 side = i;
                 jump = fabs(nbden[i] - den);
             }
         }
         return side;
}

LOCAL void  update_coef_layers(
         TRI       *tri,
         Mid_soln  *midsoln,
         int       rk_iter,
         Front     *fr, 
         int       *layer)
{
         Locstate    st, st2;
         int         i, dim = 2, indx, side;
         float       u0[4] = {0.0, 0.0, 0.0, 0.0}; 
         double  **Lmass_matrix = tri->Lmass_matrix;
    
         if(rk_iter == RK_STEP)
             st = tri->st;
         else
             st = midsoln[tri->id].st[rk_iter];
         st2 = midsoln[tri->id].st[0];

         // assign(st, st2, l_sizest);
         if(MAX_N_COEF == 10)
         {
             if(layer[2] == YES)
             {
                 for(i = 6; i < MAX_N_COEF; i++)
                 {
                     u0[0] += (dg_Dens(st)[i]- dg_Dens(st2)[i])*Lmass_matrix[0][i];
                     u0[1] += (dg_Mom(st)[0][i]-dg_Mom(st2)[0][i])*Lmass_matrix[0][i];
                     u0[2] += (dg_Mom(st)[1][i]-dg_Mom(st2)[1][i])*Lmass_matrix[0][i];
                     u0[3] += (dg_Energy(st)[i]-dg_Energy(st2)[i])*Lmass_matrix[0][i];
                     dg_Dens(st)[i] = dg_Dens(st2)[i];
                     dg_Energy(st)[i] = dg_Energy(st2)[i];
                     dg_Mom(st)[0][i] = dg_Mom(st2)[0][i];
                     dg_Mom(st)[1][i] = dg_Mom(st2)[1][i];
                 }
                 for(i = 0; i < N_EQN; i++)
                     u0[i] /= Lmass_matrix[0][0];
                 dg_Dens(st)[0] += u0[0];
                 dg_Mom(st)[0][0] += u0[1];
                 dg_Mom(st)[1][0] += u0[2];
                 dg_Energy(st)[0] += u0[3];
             }
             for(i = 0; i < N_EQN; i++)  u0[i] = 0.0;
             if(layer[1] == YES)
             {
                 for(i = 3; i < 6; i++)
                 {
                     u0[0] += (dg_Dens(st)[i]- dg_Dens(st2)[i])*Lmass_matrix[0][i];
                     u0[1] += (dg_Mom(st)[0][i]-dg_Mom(st2)[0][i])*Lmass_matrix[0][i];
                     u0[2] += (dg_Mom(st)[1][i]-dg_Mom(st2)[1][i])*Lmass_matrix[0][i];
                     u0[3] += (dg_Energy(st)[i]-dg_Energy(st2)[i])*Lmass_matrix[0][i];
                     dg_Dens(st)[i] = dg_Dens(st2)[i];
                     dg_Energy(st)[i] = dg_Energy(st2)[i];
                     dg_Mom(st)[0][i] = dg_Mom(st2)[0][i];
                     dg_Mom(st)[1][i] = dg_Mom(st2)[1][i];
                 }
                 for(i = 0; i < N_EQN; i++)
                     u0[i] /= Lmass_matrix[0][0];
                 dg_Dens(st)[0] += u0[0];
                 dg_Mom(st)[0][0] += u0[1];
                 dg_Mom(st)[1][0] += u0[2];
                 dg_Energy(st)[0] += u0[3];
             }
         }
         else 
         {
             printf("ERROR: update_coef_layers() implement MAX_N_COEF = %d\n", 
                     MAX_N_COEF);
             clean_up(ERROR);
         }
}
LOCAL void pre_process_limiting_P3(
	 TRI       *tri,
         TRI       *nbtri[3], 
         Mid_soln  *midsoln,
         Limiting_store **limit_store,
         int       rk_step)
{
         limiting_3rd_degreeP3(tri,nbtri,3,midsoln,rk_step,YES);
         limiting_2nd_degreeP3(tri,nbtri,3,midsoln,rk_step,YES);
         limiting_1st_degreeP3(tri,nbtri,3,midsoln,rk_step,NO);
         /*
         pre_Subcell_limiting_3rd_degreeP3_PNC(tri,nbtri,
                               midsoln,limit_store,rk_step,YES);
         pre_Subcell_limiting_2nd_degreeP3_PNC(tri,nbtri,
                               midsoln,limit_store,rk_step,NO);
         pre_Subcell_limiting_1st_degreeP3_PNC(tri,nbtri,
                               midsoln,limit_store,rk_step,NO,NO,NO);
         */
}

// Reconstructed coeffs. are stored in limit_store for CV polynomials
// reconstruct by partial neighboring cells
LOCAL void pre_Subcell_limiting_3rd_degreeP3_PNC(
         TRI       *tri,
         TRI       *nbtri[3],
         Mid_soln  *midsoln,
         Limiting_store **store,
         int       rk_iter,
	 int       detect_extr)
{
         Locstate st, nbst[3], st2;
         int      cv_indx, num_CVs, i, k, j;
         int      on_SV_side, on_SV_side2, nbcv_indx[20], nbcv_side[20]; 
         TRI      *tris[20];
         float    *cent, *nbcent[20], diam;
         float    uxxave[4], nbuxxave[20][8], tmpnbuxxave[20][4]; // [cv_indx][eqn#]
         float    uxyave[4], nbuxyave[20][8], tmpnbuxyave[20][4];
         float    uyyave[4], nbuyyave[20][8], tmpnbuyyave[20][4]; 
         int      is_bad_stenxx[20], is_bad_stenxy[20], is_bad_stenyy[20];
         float    rside[3], Axx[30][2][2], Axy[30][2][2], Ayy[30][2][2];
         float    coefxx[30][2], coefxy[30][2], coefyy[30][2];
         float    c_num_xx[30], c_num_xy[30], c_num_yy[30]; // condition number of stencils  
         float    u6, u7, u8, u9, u7_0, u7_1, u8_0, u8_1,
                   avg3, avg4, avg5, arrya[30], arryb[30], w[30];
         int      debug = NO, over_lap[3] = {0, 3, 6};
         float    Axxb[30][2][2], Axyb[30][2][2], Ayyb[30][2][2], 
                  c_num_xxb[30], c_num_xyb[30], c_num_yyb[30],
                  coefxxb[30][2], coefxyb[30][2], coefyyb[30][2];

         if(rk_iter == RK_STEP)
         {
             st = tri->st;
             for(i = 0; i < 3; i++)
                 nbst[i] = nbtri[i]->st;
         }
         else
         {
             st = midsoln[tri->id].st[rk_iter];
             for(i = 0; i < 3; i++)
                 nbst[i] = midsoln[nbtri[i]->id].st[rk_iter];
         }
         st2 = midsoln[tri->id].st[0];
         Set_params(st2,st);
         set_type_of_state(st2,state_type(st));

         /////////// TMP, make P^2
         /**
         for(i = 6; i < MAX_N_COEF; i++)
         {
              dg_Dens(st2)[i] = 0.0; dg_Mom(st2)[0][i] = 0.0;
              dg_Mom(st2)[1][i] = 0.0; dg_Energy(st2)[i] = 0.0;
         }
         return;
         **/
         ////////// END TMP

         cent = fg_centroid(tri);
         // diam = sqr(fg_diam(tri));
         // diam = fg_diam(tri);
	 diam = 1.0;
         
         /*
         if(debug == YES)
         {
             printf("neighboring CVs found = %d\n", num_CVs);
             for(i = 0; i < num_CVs; i++)
             {
                 printf("indx[%d] nbtri[%d], nbcv_indx[%d]\n",i, tris[i]->id, nbcv_indx[i]);
                 print_tri_crds(tris[i]);
             }
         }
         **/

         // num_CVs = get_tri_edge_vert_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_near_neighbr_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // current lax test
         // num_CVs = get_tri_near_neighbr_CV_stencil_ver2(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         num_CVs = get_tri_center_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_near_neighbr_overlap_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_liu_neighbr_overlap_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         for(i = 0; i < num_CVs; i++)
             nbcent[i] = tris[i]->CVcent[nbcv_indx[i]];

         u_average_indx(tri,st,3,uxxave);
         u_average_indx(tri,st,4,uxyave);
         u_average_indx(tri,st,5,uyyave);                 
         for(k = 0; k < N_EQN; k++)
         {
             uxxave[k] *= 2.0;
             uyyave[k] *= 2.0;
         }

         //////////// NEW, whole cell to detect extreme
         /***
         for(i = 0; i < 3; i++)
         {
             u_average_indx(nbtri[i],nbst[i],3,nbuxxave[i]);
             u_average_indx(nbtri[i],nbst[i],4,nbuxyave[i]);
             u_average_indx(nbtri[i],nbst[i],5,nbuyyave[i]);
             for(k = 0; k < N_EQN; k++)
             {
                 nbuxxave[i][k] *= 2.0;
                 nbuyyave[i][k] *= 2.0;
             }
         }
         if(detect_extr == YES)
         {
             NEW_extrema_detec(uxxave,nbuxxave,3,is_bad_stenxx);
             NEW_extrema_detec(uxyave,nbuxyave,3,is_bad_stenxy);
             NEW_extrema_detec(uyyave,nbuyyave,3,is_bad_stenyy);
         }
         ***/
         //////////////////

         if(rk_iter == RK_STEP)
         {
             for(i = 0; i < num_CVs; i++)
             {
                 CV_u_average_indx(tris[i],nbcv_indx[i],3,nbuxxave[i]);
                 CV_u_average_indx(tris[i],nbcv_indx[i],4,nbuxyave[i]);
                 CV_u_average_indx(tris[i],nbcv_indx[i],5,nbuyyave[i]);                 
             }
         }
         else
         {
             for(i = 0; i < num_CVs; i++)
             {
                 CV_u_average_indx_from_store(tris[i],nbcv_indx[i],3,store[rk_iter],nbuxxave[i]);  
                 CV_u_average_indx_from_store(tris[i],nbcv_indx[i],4,store[rk_iter],nbuxyave[i]);  
                 CV_u_average_indx_from_store(tris[i],nbcv_indx[i],5,store[rk_iter],nbuyyave[i]);  
             }
         }

         for(i = 0; i < num_CVs; i++)
         {
             for(k = 0; k < N_EQN; k++)
             {
                 nbuxxave[i][k] *= 2.0;
                 nbuyyave[i][k] *= 2.0;
             }
         }
 
         ///////////// TMP
         if(debug == YES)
         {
             for(i = 0; i < num_CVs; i++)
                 printf("neighbr[%d], xx ave = %g, xy ave = %g, yy ave = %g\n",
                          i, nbuxxave[i][0], nbuxyave[i][0], nbuyyave[i][0]);
         }

         if(detect_extr == YES)
         {
             NEW_extrema_detec(uxxave,nbuxxave,num_CVs,is_bad_stenxx);
             NEW_extrema_detec(uxyave,nbuxyave,num_CVs,is_bad_stenxy);
             NEW_extrema_detec(uyyave,nbuyyave,num_CVs,is_bad_stenyy);
	     /*
             j = 0;
             for(i = 0; i < num_CVs; i++)
             {
                 if(i == 0 || i == 3 || i == 6)
                     continue;
                 for(k = 0; k < N_EQN; k++)
                 {
                     tmpnbuxxave[j][k] = nbuxxave[i][k];
                     tmpnbuxyave[j][k] = nbuxyave[i][k];
                     tmpnbuyyave[j][k] = nbuyyave[i][k];
                 }
                 j++;
             }
             NEW_extrema_detec(uxxave,tmpnbuxxave,num_CVs-3,is_bad_stenxx);
             NEW_extrema_detec(uxyave,tmpnbuxyave,num_CVs-3,is_bad_stenxy);
             NEW_extrema_detec(uyyave,tmpnbuyyave,num_CVs-3,is_bad_stenyy);
	     */
         }

         /***
         for(i = 0; i < num_CVs; i++)
         {
             Ayy[i][0][0] = Axy[i][0][0] = Axx[i][0][0] = (nbcent[i][0]-cent[0]);
             Ayy[i][0][1] = Axy[i][0][1] = Axx[i][0][1] = (nbcent[i][1]-cent[1]);
             Ayy[i][1][0] = Axy[i][1][0] = Axx[i][1][0] = (nbcent[(i+1)%num_CVs][0]-cent[0]);
             Ayy[i][1][1] = Axy[i][1][1] = Axx[i][1][1] = (nbcent[(i+1)%num_CVs][1]-cent[1]);
             c_num_yy[i] = c_num_xy[i] = c_num_xx[i] = cond_num(Axx[i]);

             // Axy[i][0][0] = (nbcent[i][0]-cent[0]);
             // Axy[i][0][1] = (nbcent[i][1]-cent[1]);
             // Axy[i][1][0] = (nbcent[(i+1)%num_CVs][0]-cent[0]);
             // Axy[i][1][1] = (nbcent[(i+1)%num_CVs][1]-cent[1]);
             // c_num_xy[i] = cond_num(Axy[i]);

             // Ayy[i][0][0] = (nbcent[i][0]-cent[0]);
             // Ayy[i][0][1] = (nbcent[i][1]-cent[1]);
             // Ayy[i][1][0] = (nbcent[(i+1)%num_CVs][0]-cent[0]);
             // Ayy[i][1][1] = (nbcent[(i+1)%num_CVs][1]-cent[1]);
             //c_num_yy[i] = cond_num(Ayy[i]);
             if(debug == YES)
             {
                 printf("sten[%d] [%g %g], [%g %g], condi = %g\n",
                      i, Ayy[i][0][0], Ayy[i][0][1], Ayy[i][1][0], Ayy[i][1][1], c_num_yy[i]);
             }
         }
         **/

         for(i = 0; i < num_CVs; i++)
         {
             Axx[i][0][0] = 6.0*(nbcent[i][0]-cent[0]);
             Axx[i][0][1] = 2.0*(nbcent[i][1]-cent[1]);
             Axx[i][1][0] = 6.0*(nbcent[(i+1)%num_CVs][0]-cent[0]);
             Axx[i][1][1] = 2.0*(nbcent[(i+1)%num_CVs][1]-cent[1]);
             c_num_xx[i] = cond_num(Axx[i]);

             Axy[i][0][0] = 2.0*(nbcent[i][0]-cent[0]);
             Axy[i][0][1] = 2.0*(nbcent[i][1]-cent[1]);
             Axy[i][1][0] = 2.0*(nbcent[(i+1)%num_CVs][0]-cent[0]);
             Axy[i][1][1] = 2.0*(nbcent[(i+1)%num_CVs][1]-cent[1]);
             c_num_xy[i] = cond_num(Axy[i]);

             Ayy[i][0][0] = 2.0*(nbcent[i][0]-cent[0]);
             Ayy[i][0][1] = 6.0*(nbcent[i][1]-cent[1]);
             Ayy[i][1][0] = 2.0*(nbcent[(i+1)%num_CVs][0]-cent[0]);
             Ayy[i][1][1] = 6.0*(nbcent[(i+1)%num_CVs][1]-cent[1]);
             c_num_yy[i] = cond_num(Ayy[i]);
         }
         ///// use very other one to make stencil
	 /**
         for(i = 0; i < num_CVs; i++)
         {
             Axxb[i][0][0] = 6.0*(nbcent[i][0]-cent[0]);
             Axxb[i][0][1] = 2.0*(nbcent[i][1]-cent[1]);
             Axxb[i][1][0] = 6.0*(nbcent[(i+2)%num_CVs][0]-cent[0]);
             Axxb[i][1][1] = 2.0*(nbcent[(i+2)%num_CVs][1]-cent[1]);
             c_num_xx[i+num_CVs] = cond_num(Axxb[i]);

             Axyb[i][0][0] = 2.0*(nbcent[i][0]-cent[0]);
             Axyb[i][0][1] = 2.0*(nbcent[i][1]-cent[1]);
             Axyb[i][1][0] = 2.0*(nbcent[(i+2)%num_CVs][0]-cent[0]);
             Axyb[i][1][1] = 2.0*(nbcent[(i+2)%num_CVs][1]-cent[1]);
             c_num_xy[i+num_CVs] = cond_num(Axyb[i]);

             Ayyb[i][0][0] = 2.0*(nbcent[i][0]-cent[0]);
             Ayyb[i][0][1] = 6.0*(nbcent[i][1]-cent[1]);
             Ayyb[i][1][0] = 2.0*(nbcent[(i+2)%num_CVs][0]-cent[0]);
             Ayyb[i][1][1] = 6.0*(nbcent[(i+2)%num_CVs][1]-cent[1]);
             c_num_yy[i+num_CVs] = cond_num(Ayyb[i]);
         }
	 **/
         ///// END: use very other one to make stencil
         for(k = 0; k < N_EQN; k++)
         {
             for(i = 0; i < num_CVs; i++)
             {
                 rside[0] = nbuxxave[i][k] - uxxave[k];
                 rside[1] = nbuxxave[(i+1)%num_CVs][k] - uxxave[k];
                 comp_coef(Axx[i],rside,coefxx[i]);

                 rside[0] = nbuxyave[i][k] - uxyave[k];
                 rside[1] = nbuxyave[(i+1)%num_CVs][k] - uxyave[k];
                 comp_coef(Axy[i],rside,coefxy[i]);

                 rside[0] = nbuyyave[i][k] - uyyave[k];
                 rside[1] = nbuyyave[(i+1)%num_CVs][k] - uyyave[k];
                 comp_coef(Ayy[i],rside,coefyy[i]);
                 //// TMP, debug
                 // if(k == 0 && debug == YES)
                 //     printf("sten[%d], solved xyy[%g] yyy[%g]\n", i, coefyy[i][0], coefyy[i][1]);

                 ////// use every other stencil
		 /*
                 rside[0] = nbuxxave[i][k] - uxxave[k];
                 rside[1] = nbuxxave[(i+2)%num_CVs][k] - uxxave[k];
                 comp_coef(Axxb[i],rside,coefxxb[i]);

                 rside[0] = nbuxyave[i][k] - uxyave[k];
                 rside[1] = nbuxyave[(i+2)%num_CVs][k] - uxyave[k];
                 comp_coef(Axyb[i],rside,coefxyb[i]);

                 rside[0] = nbuyyave[i][k] - uyyave[k];
                 rside[1] = nbuyyave[(i+2)%num_CVs][k] - uyyave[k];
                 comp_coef(Ayyb[i],rside,coefyyb[i]);
		 */
                 ////// END: use every other stencil
             }
             /////////////////// WENO
             ///// u_xx polynomial
             for(i = 0; i < num_CVs; i++)
             {
                 arrya[i] = coefxx[i][0];
                 arryb[i] = coefxx[i][1];
             }
             //// use every other stencil
	     /*
             for(i = 0; i < num_CVs; i++)
             {
                 arrya[i+num_CVs] = coefxxb[i][0];
                 arryb[i+num_CVs] = coefxxb[i][1];
             }
	     */
             ////END:  use every other stencil
             WENO_mod_on_3rd(arrya,arryb,c_num_xx,num_CVs,diam,w);
             u7_0 = u6 = 0.0;
             for(i = 0; i < num_CVs; i++)
             {
                 u6   += w[i]*coefxx[i][0];
                 u7_0 += w[i]*coefxx[i][1];
             }
             /// use every other stencil
	     /*
             for(i = 0; i < num_CVs; i++)
             {
                 u6   += w[i+num_CVs]*coefxxb[i][0];
                 u7_0 += w[i+num_CVs]*coefxxb[i][1];
             }
	     */
             ///END: use every other stencil
             if(is_bad_stenxx[k] == YES && detect_extr == YES)
             {
                 u6 = 0.0; u7_0 = 0.0;
             }
             ///// u_xy polynomial
             for(i = 0; i < num_CVs; i++)
             {
                 arrya[i] = coefxy[i][0];
                 arryb[i] = coefxy[i][1];
             }
             /// use every other stencil
	     /*
             for(i = 0; i < num_CVs; i++)
             {
                 arrya[i+num_CVs] = coefxyb[i][0];
                 arryb[i+num_CVs] = coefxyb[i][1];
             }
	     */
             ///END: use every other stencil
             WENO_mod_on_3rd(arrya,arryb,c_num_xy,num_CVs,diam,w);
             // area_WENO_mod_on_3rd(arrya,arryb,c_num_xy,sten_n,diam,area,w);
             u7_1 = u8_0 = 0.0;
             for(i = 0; i < num_CVs; i++)
             {
                 u7_1 += w[i]*coefxy[i][0];
                 u8_0 += w[i]*coefxy[i][1];
             }
             //// use every other stencil
	     /*
             for(i = 0; i < num_CVs; i++)
             {
                 u7_1 += w[i+num_CVs]*coefxyb[i][0];
                 u8_0 += w[i+num_CVs]*coefxyb[i][1];
             }
	     */
             ////END: use every other stencil
             if(is_bad_stenxy[k] == YES && detect_extr == YES)
             {
                 u7_1 = 0.0; u8_0 = 0.0;
             }
             ///// u_yy polynomial
             for(i = 0; i < num_CVs; i++)
             {
                 arrya[i] = coefyy[i][0];
                 arryb[i] = coefyy[i][1];
             }
             //// use every other stencil
	     /*
             for(i = 0; i < num_CVs; i++)
             {
                 arrya[i+num_CVs] = coefyyb[i][0];
                 arryb[i+num_CVs] = coefyyb[i][1];
             }
	     */
             ////END: use every other stencil

             WENO_mod_on_3rd(arrya,arryb,c_num_yy,num_CVs,diam,w);
             // area_WENO_mod_on_3rd(arrya,arryb,c_num_yy,sten_n,diam,area,w);
             
             /////// TMP
             if(k == 0 && debug == YES)
             {
                 for(i = 0; i < num_CVs; i++)
                     printf("sten[%d], xyy, yyy weight = %g\n", i, w[i]);
             }
             ////// END TMP
             u8_1 = u9 = 0.0;
              
             for(i = 0; i < num_CVs; i++)
             {
                 u8_1 += w[i]*coefyy[i][0];
                 u9   += w[i]*coefyy[i][1];
             }
             ////// use every other stencil
	     /*
             for(i = 0; i < num_CVs; i++)
             {
                 u8_1 += w[i+num_CVs]*coefyyb[i][0];
                 u9   += w[i+num_CVs]*coefyyb[i][1];
             }
	     */
             //////END: use every other stencil
             if(is_bad_stenyy[k] == YES && detect_extr == YES)
             {
                 u8_1 = 0.0; u9 = 0.0;
             }
             u7 = minmod((1+0.05)*minmod(u7_0,u7_1), 0.5*(u7_0 + u7_1)); // eps = 0.05, 0.01
             u8 = minmod((1+0.05)*minmod(u8_0,u8_1), 0.5*(u8_0 + u8_1));
             // u7 = minmod2((1+0.05)*minmod2(u7_0,u7_1), 0.5*(u7_0 + u7_1)); // eps = 0.05, 0.01
             // u8 = minmod2((1+0.05)*minmod2(u8_0,u8_1), 0.5*(u8_0 + u8_1));
             /////////////////// End WENO
             switch(k)
             {
              case 0:
                  if(fabs(dg_Dens(st)[6]) < 1.0E-13)
                      dg_Dens(st2)[6] = 0.0;
                  else
                      dg_Dens(st2)[6] = u6;
                  if(fabs(dg_Dens(st)[7]) < 1.0E-13)
                      dg_Dens(st2)[7] = 0.0;
                  else
                      dg_Dens(st2)[7] = u7;
                  if(fabs(dg_Dens(st)[8]) < 1.0E-13)
                      dg_Dens(st2)[8] = 0.0;
                  else
                      dg_Dens(st2)[8] = u8;
                  if(fabs(dg_Dens(st)[9]) < 1.0E-13)
                      dg_Dens(st2)[9] = 0.0;
                  else
                      dg_Dens(st2)[9] = u9;
              break;
              case 1:
                  if(fabs(dg_Mom(st)[0][6]) < 1.0E-13)
                      dg_Mom(st2)[0][6] = 0.0;
                  else
                      dg_Mom(st2)[0][6] = u6;
                  if(fabs(dg_Mom(st)[0][7]) < 1.0E-13)
                      dg_Mom(st2)[0][7] = 0.0;
                  else
                      dg_Mom(st2)[0][7] = u7;
                  if(fabs(dg_Mom(st)[0][8]) < 1.0E-13)
                      dg_Mom(st2)[0][8] = 0.0;
                  else
                      dg_Mom(st2)[0][8] = u8;
                  if(fabs(dg_Mom(st)[0][9]) < 1.0E-13)
                      dg_Mom(st2)[0][9] = 0.0;
                  else
                      dg_Mom(st2)[0][9] = u9;
              break;
              case 2:
                  if(fabs(dg_Mom(st)[1][6]) < 1.0E-13)
                      dg_Mom(st2)[1][6] = 0.0;
                  else
                      dg_Mom(st2)[1][6] = u6;
                  if(fabs(dg_Mom(st)[1][7]) < 1.0E-13)
                      dg_Mom(st2)[1][7] = 0.0;
                  else
                      dg_Mom(st2)[1][7] = u7;
                  if(fabs(dg_Mom(st)[1][8]) < 1.0E-13)
                      dg_Mom(st2)[1][8] = 0.0;
                  else
                      dg_Mom(st2)[1][8] = u8;
                  if(fabs(dg_Mom(st)[1][9]) < 1.0E-13)
                      dg_Mom(st2)[1][9] = 0.0;
                  else
                      dg_Mom(st2)[1][9] = u9;
              break;
              case 3:
                  if(fabs(dg_Energy(st)[6]) < 1.0E-13)
                      dg_Energy(st2)[6] = 0.0;
                  else
                      dg_Energy(st2)[6] = u6;
                  if(fabs(dg_Energy(st)[7]) < 1.0E-13)
                      dg_Energy(st2)[7] = 0.0;
                  else
                      dg_Energy(st2)[7] = u7;
                  if(fabs(dg_Energy(st)[8]) < 1.0E-13)
                      dg_Energy(st2)[8] = 0.0;
                  else
                      dg_Energy(st2)[8] = u8;
                  if(fabs(dg_Energy(st)[9]) < 1.0E-13)
                      dg_Energy(st2)[9] = 0.0;
                  else
                      dg_Energy(st2)[9] = u9;
              break;
             }
         }
}

// Reconstructed coeffs. are stored in limit_store for CV polynomials
// Reconstruct by partial neighboring cells
LOCAL void pre_Subcell_limiting_2nd_degreeP3_PNC(
         TRI       *tri,
         TRI       *nbtri[3],
         Mid_soln  *midsoln,
         Limiting_store **store,
         int       rk_iter,
         int       detect_extr)
{
         Locstate st, nbst[30], st2, nbst2[30];
         int      cv_indx, num_CVs, i, k, j;
         int      on_SV_side, on_SV_side2, nbcv_indx[20], nbcv_side[20];
         TRI      *tris[20];
         float    uxave[4], nbuxave[30][4], uyave[4], nbuyave[30][4];
         float    Rxave[4], nbRxave[30][4], Ryave[4], nbRyave[30][4];
         float    Lxave[4], nbLxave[30][8], Lyave[4], nbLyave[30][8],
                   tmpnbLxave[30][4], tmpnbLyave[30][4];
         int      dim = 2, indx, l;
         double    *cent, *nbcent[30];
         float    rside[3], Ax[30][2][2], Ay[30][2][2], rside2[3];
         float    coefx[30][2], coefy[30][2];
         float    u3, u4, u5, u4_0, u4_1, avg1, avg2, wx[90], wy[90], arrya[90], arryb[90], w[90];
         double **Lmass_matrix = tri->Lmass_matrix;
         float    eps = 1.5, diam;
         // float    sqr_diam, cub_diam; // 0.5, 0.8 for all_cent
         int      idir, is_bad_stenx[30], is_bad_steny[30];
         float    c_num_x[30], c_num_y[30];
         int      debug = NO;
         float    area = fg_area(tri);
         float     Axb[30][2][2], Ayb[30][2][2], coefxb[30][2], coefyb[30][2];

         if(rk_iter == RK_STEP)
         {
             st = tri->st;
             for(i = 0; i < 3; i++)
                 nbst[i] = nbtri[i]->st;
         }
         else
         {
             st = midsoln[tri->id].st[rk_iter];
             for(i = 0; i < 3; i++)
                 nbst[i] = midsoln[nbtri[i]->id].st[rk_iter];
         }
         st2 = midsoln[tri->id].st[0];
         // diam = fg_diam(tri);
         diam = 1.0;
         cent = fg_centroid(tri);

         // num_CVs = get_tri_edge_vert_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_near_neighbr_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_near_neighbr_CV_stencil_ver2(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         num_CVs = get_tri_center_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_near_neighbr_overlap_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_liu_neighbr_overlap_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         for(i = 0; i < num_CVs; i++)
             nbcent[i] = tris[i]->CVcent[nbcv_indx[i]];

         u_average_indx(tri,st,1,uxave);
         u_average_indx(tri,st,2,uyave);
         R_degree3_term_averageP3(tri,st,tri->Lmass_matrix,0,1.0,Rxave);
         R_degree3_term_averageP3(tri,st,tri->Lmass_matrix,1,1.0,Ryave);
         for(k = 0; k < N_EQN; k++)
         {
             uxave[k] += Rxave[k];
             uyave[k] += Ryave[k];
         }
         R_degree3_term_averageP3(tri,st2,tri->Lmass_matrix,0,1.0,Rxave);
         R_degree3_term_averageP3(tri,st2,tri->Lmass_matrix,1,1.0,Ryave);

         if(rk_iter == RK_STEP)
         {
             // sten_n partial neighbor Cvs
             for(i = 0; i < num_CVs; i++)
             {
                 for(j = 0; j < MAX_N_COEF; j++)
                     mass_1st_row[0][j] = tris[i]->CVmass_matrix[nbcv_indx[i]][j];

                 CV_u_average_indx(tris[i],nbcv_indx[i],1,nbuxave[i]);
                 CV_u_average_indx(tris[i],nbcv_indx[i],2,nbuyave[i]);
                 CV_R_degree3_term_averageP3(tris[i],nbcv_indx[i],mass_1st_row,0,nbRxave[i]);
                 CV_R_degree3_term_averageP3(tris[i],nbcv_indx[i],mass_1st_row,1,nbRyave[i]);
                 for(k = 0; k < N_EQN; k++)
                 {
                     nbuxave[i][k] += nbRxave[i][k];
                     nbuyave[i][k] += nbRyave[i][k];
                 }
             }
         }
         else
         {
             /// rewrite
             // sten_n partial neighbor Cvs
             for(i = 0; i < num_CVs; i++)
             {  
                 for(j = 0; j < MAX_N_COEF; j++)
                     mass_1st_row[0][j] = tris[i]->CVmass_matrix[nbcv_indx[i]][j];

                 CV_u_average_indx_from_store(tris[i],nbcv_indx[i],1,store[rk_iter],nbuxave[i]);
                 CV_u_average_indx_from_store(tris[i],nbcv_indx[i],2,store[rk_iter],nbuyave[i]);
                 CV_R_degree3_term_averageP3_store(tris[i],nbcv_indx[i],
                                   store[rk_iter][tris[i]->id],mass_1st_row,0,nbRxave[i]);
                 CV_R_degree3_term_averageP3_store(tris[i],nbcv_indx[i],
                                   store[rk_iter][tris[i]->id],mass_1st_row,1,nbRyave[i]);
                 for(k = 0; k < N_EQN; k++)
                 {
                     nbuxave[i][k] += nbRxave[i][k];
                     nbuyave[i][k] += nbRyave[i][k];
                 }
             }  
         }
         for(i = 0; i < num_CVs; i++)
         {
             comp_CV_mass_matrix_1st_row(MAX_N_COEF,tris[i],dim,nbcv_indx[i],cent,mass_1st_rows[i]);
             R_degree3_term_averageP3(tris[i],st2,mass_1st_rows[i],0,1.0,nbRxave[i]);
             R_degree3_term_averageP3(tris[i],st2,mass_1st_rows[i],1,1.0,nbRyave[i]);
         }

         for(k = 0; k < N_EQN; k++)
         {
             Lxave[k] = uxave[k]-Rxave[k];
             Lyave[k] = uyave[k]-Ryave[k];
             for(i = 0; i < num_CVs; i++)
             {
                 nbLxave[i][k] = nbuxave[i][k]-nbRxave[i][k];
                 nbLyave[i][k] = nbuyave[i][k]-nbRyave[i][k];
             }
         }

         if(detect_extr == YES)
         {
             NEW_extrema_detec(Lxave,nbLxave,num_CVs,is_bad_stenx);
             NEW_extrema_detec(Lyave,nbLyave,num_CVs,is_bad_steny);
	     /*
             j = 0;
             for(i = 0; i < num_CVs; i++)
             {
                 if(i == 0 || i == 3 || i == 6)
                     continue;
                 for(k = 0; k < N_EQN; k++)
                 {
                     tmpnbLxave[j][k] = nbLxave[i][k];
                     tmpnbLyave[j][k] = nbLyave[i][k];
                 }
                 j++;
             }
             NEW_extrema_detec(Lxave,tmpnbLxave,num_CVs-3,is_bad_stenx);
             NEW_extrema_detec(Lyave,tmpnbLyave,num_CVs-3,is_bad_steny);
	     */
         }

         /**
         for(i = 0; i < num_CVs; i++)
         {
             Ay[i][0][0] = Ax[i][0][0] = (nbcent[i][0]-cent[0]);
             Ay[i][0][1] = Ax[i][0][1] = (nbcent[i][1]-cent[1]);
             Ay[i][1][0] = Ax[i][1][0] = (nbcent[(i+1)%num_CVs][0]-cent[0]);
             Ay[i][1][1] = Ax[i][1][1] = (nbcent[(i+1)%num_CVs][1]-cent[1]);
             c_num_y[i] = c_num_x[i] = cond_num(Ax[i]); // c_num_x[i] = 1.0;

             // Ay[i][0][0] = (nbcent[i][0]-cent[0]);
             // Ay[i][0][1] = (nbcent[i][1]-cent[1]);
             // Ay[i][1][0] = (nbcent[(i+1)%num_CVs][0]-cent[0]);
             // Ay[i][1][1] = (nbcent[(i+1)%num_CVs][1]-cent[1]);
             // c_num_y[i] = cond_num(Ay[i]); // c_num_y[i] = 1.0;
         }
         **/
         for(i = 0; i < num_CVs; i++)
         {
             Ax[i][0][0] = 2.0*(nbcent[i][0]-cent[0]);
             Ax[i][0][1] =     (nbcent[i][1]-cent[1]);
             Ax[i][1][0] = 2.0*(nbcent[(i+1)%num_CVs][0]-cent[0]);
             Ax[i][1][1] =     (nbcent[(i+1)%num_CVs][1]-cent[1]);
             c_num_x[i] = cond_num(Ax[i]); // c_num_x[i] = 1.0;
             Ay[i][0][0] =     (nbcent[i][0]-cent[0]);
             Ay[i][0][1] = 2.0*(nbcent[i][1]-cent[1]);
             Ay[i][1][0] =     (nbcent[(i+1)%num_CVs][0]-cent[0]);
             Ay[i][1][1] = 2.0*(nbcent[(i+1)%num_CVs][1]-cent[1]);
             c_num_y[i] = cond_num(Ay[i]); // c_num_y[i] = 1.0;
         }
         ////// use every other stencil
	 /*
         for(i = 0; i < num_CVs; i++)
         {
             Axb[i][0][0] = 2.0*(nbcent[i][0]-cent[0]);
             Axb[i][0][1] =     (nbcent[i][1]-cent[1]);
             Axb[i][1][0] = 2.0*(nbcent[(i+2)%num_CVs][0]-cent[0]);
             Axb[i][1][1] =     (nbcent[(i+2)%num_CVs][1]-cent[1]);
             c_num_x[i+num_CVs] = cond_num(Axb[i]); // c_num_x[i] = 1.0;
             Ayb[i][0][0] =     (nbcent[i][0]-cent[0]);
             Ayb[i][0][1] = 2.0*(nbcent[i][1]-cent[1]);
             Ayb[i][1][0] =     (nbcent[(i+2)%num_CVs][0]-cent[0]);
             Ayb[i][1][1] = 2.0*(nbcent[(i+2)%num_CVs][1]-cent[1]);
             c_num_y[i+num_CVs] = cond_num(Ayb[i]); // c_num_y[i] = 1.0;
         }
	 */
         //////END: use every other stencil
         for(k = 0; k < N_EQN; k++)
         {
             for(i = 0; i < num_CVs; i++)
             {
                 rside[0] = nbLxave[i][k] - Lxave[k];
                 rside[1] = nbLxave[(i+1)%num_CVs][k] - Lxave[k];
                 comp_coef(Ax[i],rside,coefx[i]);

                 rside[0] = nbLyave[i][k] - Lyave[k];
                 rside[1] = nbLyave[(i+1)%num_CVs][k] - Lyave[k];
                 comp_coef(Ay[i],rside,coefy[i]);
                 ////// use every other stencil
		 /*
                 rside[0] = nbLxave[i][k] - Lxave[k];
                 rside[1] = nbLxave[(i+2)%num_CVs][k] - Lxave[k];
                 comp_coef(Axb[i],rside,coefxb[i]);

                 rside[0] = nbLyave[i][k] - Lyave[k];
                 rside[1] = nbLyave[(i+2)%num_CVs][k] - Lyave[k];
                 comp_coef(Ayb[i],rside,coefyb[i]);
		 */
                 //////END: use every other stencil
             }
             for(i = 0; i < num_CVs; i++)
             {
                 arrya[i] = coefx[i][0];
                 arryb[i] = coefx[i][1];
             }
             ////// use every other stencil
	     /*
             for(i = 0; i < num_CVs; i++)
             {
                 arrya[i+num_CVs] = coefxb[i][0];
                 arryb[i+num_CVs] = coefxb[i][1];
             }
	     */
             //////END: use every other stencil
             WENO_mod_on_2nd(arrya,arryb,c_num_x,num_CVs,diam,w);
             u3 = u4_0 = 0.0;
             for(i = 0; i < num_CVs; i++)
             {
                 u3   += w[i]*coefx[i][0];
                 u4_0 += w[i]*coefx[i][1];
             }
             ////// use every other stencil
	     /*
             for(i = 0; i < num_CVs; i++)
             {
                 u3   += w[i+num_CVs]*coefxb[i][0];
                 u4_0 += w[i+num_CVs]*coefxb[i][1];
             }
	     */
             //////END: use every other stencil
             if(is_bad_stenx[k] == YES && detect_extr == YES)
             {
                 u3 = 0.0; u4_0 = 0.0;
             }

             for(i = 0; i < num_CVs; i++)
             {
                 arrya[i] = coefy[i][0];
                 arryb[i] = coefy[i][1];
             }
             ////// use every other stencil
	     /*
             for(i = 0; i < num_CVs; i++)
             {
                 arrya[i+num_CVs] = coefyb[i][0];
                 arryb[i+num_CVs] = coefyb[i][1];
             }
	     */
             //////END: use every other stencil
             WENO_mod_on_2nd(arrya,arryb,c_num_y,num_CVs,diam,w);
             u4_1 = u5 = 0.0;
             for(i = 0; i < num_CVs; i++)
             {
                 u4_1 += w[i]*coefy[i][0];
                 u5   += w[i]*coefy[i][1];
             }
             ////// use every other stencil
	     /*
             for(i = 0; i < num_CVs; i++)
             {
                 u4_1 += w[i+num_CVs]*coefyb[i][0];
                 u5   += w[i+num_CVs]*coefyb[i][1];
             }
	     */
             //////END: use every other stencil
             if(is_bad_steny[k] == YES && detect_extr == YES)
             {
                 u4_1 = 0.0; u5 = 0.0;
             }
             u4 = minmod((1+0.05)*minmod(u4_0,u4_1), 0.5*(u4_0 + u4_1)); // 0.05, 0.01
             // u4 = minmod2((1+0.05)*minmod2(u4_0,u4_1), 0.5*(u4_0 + u4_1)); // 0.05, 0.01
             switch(k)
             {
             case 0:
                 // if(fabs(dg_Dens(st)[3]) < 1.0E-13) dg_Dens(st2)[3] = 0.0;
                 // else
                     dg_Dens(st2)[3] = u3;
                 // if(fabs(dg_Dens(st)[4]) < 1.0E-13)
                 //     dg_Dens(st2)[4] = 0.0;
                 // else
                     dg_Dens(st2)[4] = u4;
                 // if(fabs(dg_Dens(st)[5]) < 1.0E-13)
                 //     dg_Dens(st2)[5] = 0.0;
                 // else
                     dg_Dens(st2)[5] = u5;
             break;
             case 1:
                 // if(fabs(dg_Mom(st)[0][3]) < 1.0E-13)
                 //     dg_Mom(st2)[0][3] = 0.0;
                 // else
                     dg_Mom(st2)[0][3] = u3;
                 // if(fabs(dg_Mom(st)[0][4]) < 1.0E-13)
                 //     dg_Mom(st2)[0][4] = 0.0;
                 // else
                     dg_Mom(st2)[0][4] = u4;
                 // if(fabs(dg_Mom(st)[0][5]) < 1.0E-13)
                 //     dg_Mom(st2)[0][5] = 0.0;
                 // else
                     dg_Mom(st2)[0][5] = u5;
             break;
             case 2:
                 // if(fabs(dg_Mom(st)[1][3]) < 1.0E-13)
                 //     dg_Mom(st2)[1][3] = 0.0;
                 // else
                     dg_Mom(st2)[1][3] = u3;
                 // if(fabs(dg_Mom(st)[1][4]) < 1.0E-13)
                 //     dg_Mom(st2)[1][4] = 0.0;
                 // else
                     dg_Mom(st2)[1][4] = u4;
                 // if(fabs(dg_Mom(st)[1][5]) < 1.0E-13)
                 //     dg_Mom(st2)[1][5] = 0.0;
                 // else
                     dg_Mom(st2)[1][5] = u5;
             break;
             case 3:
                 // if(fabs(dg_Energy(st)[3]) < 1.0E-13)
                 //     dg_Energy(st2)[3] = 0.0;
                 // else
                     dg_Energy(st2)[3] = u3;
                 // if(fabs(dg_Energy(st)[4]) < 1.0E-13)
                 //     dg_Energy(st2)[4] = 0.0;
                 // else
                     dg_Energy(st2)[4] = u4;
                 // if(fabs(dg_Energy(st)[5]) < 1.0E-13)
                 //     dg_Energy(st2)[5] = 0.0;
                 // else
                     dg_Energy(st2)[5] = u5;
             break;
             }
         }
}

// Reconstructed coeffs. are stored in limit_store for CV polynomials
// Reconstruct by partial neighboring cells
LOCAL void pre_Subcell_limiting_1st_degreeP3_PNC(
         TRI       *tri,
         TRI       *nbtri[3],
         Mid_soln  *midsoln,
         Limiting_store **store,
         int       rk_iter,
         int       comput_mat,
	 int       detect_extr,
         int       check_quadr)
{
         Locstate st, nbst[3], st2, nbst2[3];
         float    uave[4], nbuave[30][4];
         float    Rave[4], nbRave[30][4];
         float    Lave[4], nbLave[30][8], tmpnbLave[20][4];
         int      i, j, dim = 2, indx, k, is_bad_sten[20], num_CVs;
         int      on_SV_side, on_SV_side2, nbcv_indx[20], nbcv_side[20], cv_indx;
         double    *cent, *nbcent[20];
         float    rside[3], A[30][2][2], rside2[3], least_soln1[3];
         float    coef[30][2];
         float    u0, u1, u2, avg1, avg2, w[20], arrya[20], arryb[20];
         double **Lmass_matrix = tri->Lmass_matrix;
         float    c_num[30], diam, eps = 0.05;
         float    A_edge[20][2], mid[20][2], sv_coef[20];
         int      debug = NO;
         TRI      *tris[30];
         float    qcrds[MAXD], *pcrds[4];
         static float tmpq[2] = {-0.5, 0.5};
         float    coefb[30][2], Ab[30][2][2];

         if(rk_iter == RK_STEP)
         {
             st = tri->st;
             for(i = 0; i < 3; i++)
                 nbst[i] = nbtri[i]->st;
         }
         else
         {
             st = midsoln[tri->id].st[rk_iter];
             for(i = 0; i < 3; i++)
                 nbst[i] = midsoln[nbtri[i]->id].st[rk_iter];
         }

         st2 = midsoln[tri->id].st[0];
         uave[0] = Dens(st);
         uave[1] = Mom(st)[0];
         uave[2] = Mom(st)[1];
         uave[3] = Energy(st);
         R_degree2_above_term_averageP3(tri,st2,tri->Lmass_matrix,Rave);

         diam = fg_diam(tri);
         cent = fg_centroid(tri);

         // num_CVs = get_tri_edge_vert_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_near_neighbr_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_near_neighbr_CV_stencil_ver2(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         num_CVs = get_tri_center_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_near_neighbr_overlap_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         // num_CVs = get_tri_liu_neighbr_overlap_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         for(i = 0; i < num_CVs; i++)
             nbcent[i] = tris[i]->CVcent[nbcv_indx[i]];
         
         if(comput_mat == YES)
         {
             for(i = 0; i < num_CVs; i++)
                 comp_mass_matrix_1st_row(MAX_N_COEF,tris[i],dim,fg_centroid(tri),mass_1st_rows[i]);
         }

         if(rk_iter == RK_STEP)
         {
             for(i = 0; i < num_CVs; i++)
             {
                 //// POINT value
                 /**
                 for(j = 0; j < 3; j++)
                     pcrds[j] = Coords( Point_of_tri(tris[i])[j] );
                 if(nbcv_side[i] == nbcv_indx[i])
                 {
                     for(j = 0; j < dim; j++)
                        qcrds[j] = (pcrds[(nbcv_side[i]+1)%3][j] + pcrds[nbcv_side[i]][j])/2.0 +
                           (pcrds[(nbcv_side[i]+1)%3][j] - pcrds[nbcv_side[i]][j])/2.0*tmpq[0];
                 } 
                 else
                 {
                     for(j = 0; j < dim; j++)
                        qcrds[j] = (pcrds[(nbcv_side[i]+1)%3][j] + pcrds[nbcv_side[i]][j])/2.0 +
                           (pcrds[(nbcv_side[i]+1)%3][j] - pcrds[nbcv_side[i]][j])/2.0*tmpq[1];
                 }
                 con_u_at_CV_pt(tris[i], nbcv_indx[i], qcrds, tris[i]->CVcent[nbcv_indx[i]], nbuave[i]);
                 //// END POINT value
                 **/
                 avg_st_on_cv_ver2(tris[i], nbcv_indx[i], NULL, nbuave[i]);
             }
         }
         else
         {
             for(i = 0; i < num_CVs; i++)
             {
                 //// POINT value
                 /**
                 for(j = 0; j < 3; j++)
                     pcrds[j] = Coords( Point_of_tri(tris[i])[j] );
                 if(nbcv_side[i] == nbcv_indx[i])
                 {
                     for(j = 0; j < dim; j++)
                        qcrds[j] = (pcrds[(nbcv_side[i]+1)%3][j] + pcrds[nbcv_side[i]][j])/2.0 +
                           (pcrds[(nbcv_side[i]+1)%3][j] - pcrds[nbcv_side[i]][j])/2.0*tmpq[0];
                 } 
                 else
                 {
                     for(j = 0; j < dim; j++)
                        qcrds[j] = (pcrds[(nbcv_side[i]+1)%3][j] + pcrds[nbcv_side[i]][j])/2.0 +
                           (pcrds[(nbcv_side[i]+1)%3][j] - pcrds[nbcv_side[i]][j])/2.0*tmpq[1];
                 }
                 con_u_at_CV_pt_from_store(tris[i], nbcv_indx[i], qcrds, tris[i]->CVcent[nbcv_indx[i]], 
                         store[rk_iter], nbuave[i]);
                 //// END POINT value
                 **/
                 avg_st_on_cv_from_store(tris[i], nbcv_indx[i], NULL, store[rk_iter], nbuave[i]);
             }
         }

         for(i = 0; i < num_CVs; i++)
         {
             //// POINT value
             // for(k = 0; k < N_EQN; k++)
             //     nbRave[i][k] = Rave[k];
             //// END POINT value
             R_degree2_above_term_averageP3(tris[i],st2,mass_1st_rows[i],nbRave[i]);
         }

         for(k = 0; k < N_EQN; k++)
         {
             Lave[k] = uave[k]-Rave[k];
             for(i = 0; i < num_CVs; i++)
                 nbLave[i][k] = nbuave[i][k]-nbRave[i][k];
             /// TMP
             if(debug == YES && (k == 1 || k == 2))
             {
                 printf("eqn[%d], avg = %g\n", k, Lave[k]);
                 for(i = 0; i < num_CVs; i++)
                     printf("neighbor[%d] avg = %g\n", i, nbLave[i][k]);
             } 
         }

         if(detect_extr == YES)
         {
             NEW_extrema_detec(Lave,nbLave,num_CVs,is_bad_sten);
	     /*
             j = 0;
             for(i = 0; i < num_CVs; i++)
             {
                 if(i == 0 || i == 3 || i == 6)
                     continue;
                 for(k = 0; k < N_EQN; k++)
                     tmpnbLave[j][k] = nbLave[i][k];
                 j++;
             }
             NEW_extrema_detec(Lave,tmpnbLave,num_CVs-3,is_bad_sten);
	     */
         }

         //// POINT value
         /**
         {
             for(i = 0; i < num_CVs; i++)
             {
                 for(j = 0; j < 3; j++)
                     pcrds[j] = Coords( Point_of_tri(tris[i])[j] );
                 if(nbcv_side[i] == nbcv_indx[i])
                 {
                     for(j = 0; j < dim; j++)
                        qcrds[j] = (pcrds[(nbcv_side[i]+1)%3][j] + pcrds[nbcv_side[i]][j])/2.0 +
                           (pcrds[(nbcv_side[i]+1)%3][j] - pcrds[nbcv_side[i]][j])/2.0*tmpq[0];
                 }
                 else
                 {
                     for(j = 0; j < dim; j++)
                        qcrds[j] = (pcrds[(nbcv_side[i]+1)%3][j] + pcrds[nbcv_side[i]][j])/2.0 +
                           (pcrds[(nbcv_side[i]+1)%3][j] - pcrds[nbcv_side[i]][j])/2.0*tmpq[1];
                 }
                 nbcent[i][0] =  qcrds[0];
                 nbcent[i][1] =  qcrds[1];
             }
         }
         **/
         //// end POINT value

         for(i = 0; i < num_CVs; i++)
         {
             A[i][0][0] = (nbcent[i][0]-cent[0]);
             A[i][0][1] = (nbcent[i][1]-cent[1]);
             A[i][1][0] = (nbcent[(i+1)%num_CVs][0]-cent[0]);
             A[i][1][1] = (nbcent[(i+1)%num_CVs][1]-cent[1]);
             c_num[i] = cond_num(A[i]); // c_num[i] =1.0;
             // TMP
             // if(debug == YES)
             //     printf("condition number of sten[%d] = %g\n", i, c_num[i]);
         }
         ////// use every other stencil
	 /*
         for(i = 0; i < num_CVs; i++)
         {
             Ab[i][0][0] = (nbcent[i][0]-cent[0]);
             Ab[i][0][1] = (nbcent[i][1]-cent[1]);
             Ab[i][1][0] = (nbcent[(i+2)%num_CVs][0]-cent[0]);
             Ab[i][1][1] = (nbcent[(i+2)%num_CVs][1]-cent[1]);
             c_num[i+num_CVs] = cond_num(Ab[i]); // c_num[i] =1.0;
         }
	 */
         //////END: use every other stencil

         for(k = 0; k < N_EQN; k++)
         {
             // linear part of polynomial
             for(i = 0; i < num_CVs; i++)
             {
                 rside[0] = nbLave[i][k] - Lave[k];
                 rside[1] = nbLave[(i+1)%num_CVs][k] - Lave[k];
                 comp_coef(A[i],rside,coef[i]);
                 //// use every other stencil
		 /*
                 rside[0] = nbLave[i][k] - Lave[k];
                 rside[1] = nbLave[(i+2)%num_CVs][k] - Lave[k];
                 comp_coef(Ab[i],rside,coefb[i]);
		 */
                 //////END: use every other stencil
             }
             if(debugging("weno_w"))
             {
                 for(i = 0; i < num_CVs; i++)
                 {
                     arrya[i] = coef[i][0];
                     arryb[i] = coef[i][1];
                 }
                 //// use every other stencil
		 /*
                 for(i = 0; i < num_CVs; i++)
                 {
                     arrya[i+num_CVs] = coefb[i][0];
                     arryb[i+num_CVs] = coefb[i][1];
                 }
		 */
                 //////END: use every other stencil
                 /***
                 // TMP
                 c_num[num_CVs] = 1.0;
                 if(k == 0)
                 {
                     coef[num_CVs][0] = arrya[num_CVs] = dg_Dens(st)[1];
                     coef[num_CVs][1] = arryb[num_CVs] = dg_Dens(st)[2];
                 }
                 else if(k == 1)
                 {
                     coef[num_CVs][0] = arrya[num_CVs] = dg_Mom(st)[0][1];
                     coef[num_CVs][1] = arryb[num_CVs] = dg_Mom(st)[0][2];
                 }
                 else if(k == 2)
                 {
                     coef[num_CVs][0] = arrya[num_CVs] = dg_Mom(st)[1][1];
                     coef[num_CVs][1] = arryb[num_CVs] = dg_Mom(st)[1][2];
                 }
                 else
                 {
                     coef[num_CVs][0] = arrya[num_CVs] = dg_Energy(st)[1];
                     coef[num_CVs][1] = arryb[num_CVs] = dg_Energy(st)[2];
                 }
                 // END TMP
                 **/
                 WENO_mod_1(arrya, arryb, c_num, num_CVs, w);
                 u1 = u2 = 0.0;
                 for(i = 0; i < num_CVs; i++)
                 {
                     u1 += w[i]*coef[i][0];
                     u2 += w[i]*coef[i][1];
                 }
                 /// use every other stencil
		 /**
                 for(i = 0; i < num_CVs; i++)
                 {
                     u1 += w[i+num_CVs]*coefb[i][0];
                     u2 += w[i+num_CVs]*coefb[i][1];
                 }
		 **/
                 //////END: use every other stencil
                 if(is_bad_sten[k] == YES && detect_extr == YES)
                 {
                     u1 = u2 = 0.0;
                 }
                 u0 = Lave[k];
             }
             else if(debugging("cent_bias"))
             {
                 avg1 = 0.0; u1 = coef[0][0];
                 avg2 = 0.0; u2 = coef[0][1];
                 for(i = 0; i < num_CVs; i++)
                 {
                     avg1 += coef[i][0];
                     u1 = minmod(coef[i][0],u1);
                     avg2 += coef[i][1];
                     u2 = minmod(coef[i][1],u2);
                 }
                 avg1 /= num_CVs;
                 avg2 /= num_CVs;
                 u1 = minmod(((1+eps)*u1), avg1);
                 u2 = minmod(((1+eps)*u2), avg2);
                 u0 = Lave[k];
             }
             switch(k)
             {
             case 0:
                 dg_Dens(st2)[0] = u0;
                 dg_Dens(st2)[1] = u1;
                 dg_Dens(st2)[2] = u2;
             break;
             case 1:
                 dg_Mom(st2)[0][0] = u0;
                 dg_Mom(st2)[0][1] = u1;
                 dg_Mom(st2)[0][2] = u2;
             break;
             case 2:
                 dg_Mom(st2)[1][0] = u0;
                 dg_Mom(st2)[1][1] = u1;
                 dg_Mom(st2)[1][2] = u2;
             break;
             case 3:
                 dg_Energy(st2)[0] = u0;
                 dg_Energy(st2)[1] = u1;
                 dg_Energy(st2)[2] = u2;
             break;
             }
         }
         Dens(st2) = Dens(st);
         Mom(st2)[0] = Mom(st)[0];
         Mom(st2)[1] = Mom(st)[1];
         Energy(st2) = Energy(st);

         if(check_quadr == YES && N_EQN == 4 && YES == unphysical_st_at_quadrature(tri, st2))
             tri->redo_limiting = YES;
         else
             tri->redo_limiting = NO;
}

LOCAL void change_cent_of_p3_poly(
        TRI       **tris,
        int       *nbcv_indx,
        int       *nbcv_side,
        int       num_CVs,
        double    *cent,
        CV_Soln   *mp_soln)
{
        int       cv_indx;
        double    xdiff, ydiff;
        ////////// re-map polynomial
        for(cv_indx = 0; cv_indx < num_CVs; cv_indx++)
        {
             xdiff = cent[0] - tris[cv_indx]->CVcent[nbcv_indx[cv_indx]][0];
             ydiff = cent[1] - tris[cv_indx]->CVcent[nbcv_indx[cv_indx]][1];

             /// tmp_soln[cv_indx].cv_dg_rho[0][0]
             mp_soln[cv_indx].cv_dg_rho[0][0] += mp_soln[cv_indx].cv_dg_rho[0][1]*xdiff +
                                           mp_soln[cv_indx].cv_dg_rho[0][2]*ydiff +
                                           mp_soln[cv_indx].cv_dg_rho[0][3]*sqr(xdiff) +
                                           mp_soln[cv_indx].cv_dg_rho[0][4]*xdiff*ydiff +
                                           mp_soln[cv_indx].cv_dg_rho[0][5]*sqr(ydiff);
             
             mp_soln[cv_indx].cv_dg_rho[0][0] += mp_soln[cv_indx].cv_dg_rho[0][6]*cub(xdiff);
             mp_soln[cv_indx].cv_dg_rho[0][0] += mp_soln[cv_indx].cv_dg_rho[0][7]*sqr(xdiff)*ydiff;
             mp_soln[cv_indx].cv_dg_rho[0][0] += mp_soln[cv_indx].cv_dg_rho[0][8]*sqr(ydiff)*xdiff;
             mp_soln[cv_indx].cv_dg_rho[0][0] += mp_soln[cv_indx].cv_dg_rho[0][9]*cub(ydiff);

             mp_soln[cv_indx].cv_dg_rho[0][1] += 2.0*mp_soln[cv_indx].cv_dg_rho[0][3]*xdiff +
                                           mp_soln[cv_indx].cv_dg_rho[0][4]*ydiff;

             mp_soln[cv_indx].cv_dg_rho[0][1] += 3.0*mp_soln[cv_indx].cv_dg_rho[0][6]*sqr(xdiff);
             mp_soln[cv_indx].cv_dg_rho[0][1] += 2.0*mp_soln[cv_indx].cv_dg_rho[0][7]*xdiff*ydiff;
             mp_soln[cv_indx].cv_dg_rho[0][1] += mp_soln[cv_indx].cv_dg_rho[0][8]*sqr(ydiff);

             mp_soln[cv_indx].cv_dg_rho[0][2] += mp_soln[cv_indx].cv_dg_rho[0][4]*xdiff +
                                           2.0*mp_soln[cv_indx].cv_dg_rho[0][5]*ydiff;

             mp_soln[cv_indx].cv_dg_rho[0][2] += mp_soln[cv_indx].cv_dg_rho[0][7]*sqr(xdiff);
             mp_soln[cv_indx].cv_dg_rho[0][2] += 2.0*mp_soln[cv_indx].cv_dg_rho[0][8]*xdiff*ydiff;
             mp_soln[cv_indx].cv_dg_rho[0][2] += 3.0*mp_soln[cv_indx].cv_dg_rho[0][9]*sqr(ydiff);

             mp_soln[cv_indx].cv_dg_rho[0][3] += 3.0*mp_soln[cv_indx].cv_dg_rho[0][6]*xdiff;
             mp_soln[cv_indx].cv_dg_rho[0][3] += mp_soln[cv_indx].cv_dg_rho[0][7]*ydiff;

             mp_soln[cv_indx].cv_dg_rho[0][4] += 2.0*mp_soln[cv_indx].cv_dg_rho[0][7]*xdiff;
             mp_soln[cv_indx].cv_dg_rho[0][4] += 2.0*mp_soln[cv_indx].cv_dg_rho[0][8]*ydiff;

             mp_soln[cv_indx].cv_dg_rho[0][5] += mp_soln[cv_indx].cv_dg_rho[0][8]*xdiff;
             mp_soln[cv_indx].cv_dg_rho[0][5] += 3.0*mp_soln[cv_indx].cv_dg_rho[0][9]*ydiff;

             ///////////////// energy
             mp_soln[cv_indx].cv_dg_e[0][0] += mp_soln[cv_indx].cv_dg_e[0][1]*xdiff +
                                           mp_soln[cv_indx].cv_dg_e[0][2]*ydiff +
                                           mp_soln[cv_indx].cv_dg_e[0][3]*sqr(xdiff) +
                                           mp_soln[cv_indx].cv_dg_e[0][4]*xdiff*ydiff +
                                           mp_soln[cv_indx].cv_dg_e[0][5]*sqr(ydiff);

             mp_soln[cv_indx].cv_dg_e[0][0] += mp_soln[cv_indx].cv_dg_e[0][6]*cub(xdiff);
             mp_soln[cv_indx].cv_dg_e[0][0] += mp_soln[cv_indx].cv_dg_e[0][7]*sqr(xdiff)*ydiff;
             mp_soln[cv_indx].cv_dg_e[0][0] += mp_soln[cv_indx].cv_dg_e[0][8]*sqr(ydiff)*xdiff;
             mp_soln[cv_indx].cv_dg_e[0][0] += mp_soln[cv_indx].cv_dg_e[0][9]*cub(ydiff);

             mp_soln[cv_indx].cv_dg_e[0][1] += 2.0*mp_soln[cv_indx].cv_dg_e[0][3]*xdiff +
                                           mp_soln[cv_indx].cv_dg_e[0][4]*ydiff;

             mp_soln[cv_indx].cv_dg_e[0][1] += 3.0*mp_soln[cv_indx].cv_dg_e[0][6]*sqr(xdiff);
             mp_soln[cv_indx].cv_dg_e[0][1] += 2.0*mp_soln[cv_indx].cv_dg_e[0][7]*xdiff*ydiff;
             mp_soln[cv_indx].cv_dg_e[0][1] += mp_soln[cv_indx].cv_dg_e[0][8]*sqr(ydiff);

             mp_soln[cv_indx].cv_dg_e[0][2] += mp_soln[cv_indx].cv_dg_e[0][4]*xdiff +
                                           2.0*mp_soln[cv_indx].cv_dg_e[0][5]*ydiff;

             mp_soln[cv_indx].cv_dg_e[0][2] += mp_soln[cv_indx].cv_dg_e[0][7]*sqr(xdiff);
             mp_soln[cv_indx].cv_dg_e[0][2] += 2.0*mp_soln[cv_indx].cv_dg_e[0][8]*xdiff*ydiff;
             mp_soln[cv_indx].cv_dg_e[0][2] += 3.0*mp_soln[cv_indx].cv_dg_e[0][9]*sqr(ydiff);

             mp_soln[cv_indx].cv_dg_e[0][3] += 3.0*mp_soln[cv_indx].cv_dg_e[0][6]*xdiff;
             mp_soln[cv_indx].cv_dg_e[0][3] += mp_soln[cv_indx].cv_dg_e[0][7]*ydiff;

             mp_soln[cv_indx].cv_dg_e[0][4] += 2.0*mp_soln[cv_indx].cv_dg_e[0][7]*xdiff;
             mp_soln[cv_indx].cv_dg_e[0][4] += 2.0*mp_soln[cv_indx].cv_dg_e[0][8]*ydiff;

             mp_soln[cv_indx].cv_dg_e[0][5] += mp_soln[cv_indx].cv_dg_e[0][8]*xdiff;
             mp_soln[cv_indx].cv_dg_e[0][5] += 3.0*mp_soln[cv_indx].cv_dg_e[0][9]*ydiff;

             //////////////// mom[0]
             mp_soln[cv_indx].cv_dg_m[0][0][0] += mp_soln[cv_indx].cv_dg_m[0][0][1]*xdiff +
                                           mp_soln[cv_indx].cv_dg_m[0][0][2]*ydiff +
                                           mp_soln[cv_indx].cv_dg_m[0][0][3]*sqr(xdiff) +
                                           mp_soln[cv_indx].cv_dg_m[0][0][4]*xdiff*ydiff +
                                           mp_soln[cv_indx].cv_dg_m[0][0][5]*sqr(ydiff);

             mp_soln[cv_indx].cv_dg_m[0][0][0] += mp_soln[cv_indx].cv_dg_m[0][0][6]*cub(xdiff);
             mp_soln[cv_indx].cv_dg_m[0][0][0] += mp_soln[cv_indx].cv_dg_m[0][0][7]*sqr(xdiff)*ydiff;
             mp_soln[cv_indx].cv_dg_m[0][0][0] += mp_soln[cv_indx].cv_dg_m[0][0][8]*sqr(ydiff)*xdiff;
             mp_soln[cv_indx].cv_dg_m[0][0][0] += mp_soln[cv_indx].cv_dg_m[0][0][9]*cub(ydiff);

             mp_soln[cv_indx].cv_dg_m[0][0][1] += 2.0*mp_soln[cv_indx].cv_dg_m[0][0][3]*xdiff +
                                           mp_soln[cv_indx].cv_dg_m[0][0][4]*ydiff;

             mp_soln[cv_indx].cv_dg_m[0][0][1] += 3.0*mp_soln[cv_indx].cv_dg_m[0][0][6]*sqr(xdiff);
             mp_soln[cv_indx].cv_dg_m[0][0][1] += 2.0*mp_soln[cv_indx].cv_dg_m[0][0][7]*xdiff*ydiff;
             mp_soln[cv_indx].cv_dg_m[0][0][1] += mp_soln[cv_indx].cv_dg_m[0][0][8]*sqr(ydiff);

             mp_soln[cv_indx].cv_dg_m[0][0][2] += mp_soln[cv_indx].cv_dg_m[0][0][4]*xdiff +
                                           2.0*mp_soln[cv_indx].cv_dg_m[0][0][5]*ydiff;

             mp_soln[cv_indx].cv_dg_m[0][0][2] += mp_soln[cv_indx].cv_dg_m[0][0][7]*sqr(xdiff);
             mp_soln[cv_indx].cv_dg_m[0][0][2] += 2.0*mp_soln[cv_indx].cv_dg_m[0][0][8]*xdiff*ydiff;
             mp_soln[cv_indx].cv_dg_m[0][0][2] += 3.0*mp_soln[cv_indx].cv_dg_m[0][0][9]*sqr(ydiff);

             mp_soln[cv_indx].cv_dg_m[0][0][3] += 3.0*mp_soln[cv_indx].cv_dg_m[0][0][6]*xdiff;
             mp_soln[cv_indx].cv_dg_m[0][0][3] += mp_soln[cv_indx].cv_dg_m[0][0][7]*ydiff;

             mp_soln[cv_indx].cv_dg_m[0][0][4] += 2.0*mp_soln[cv_indx].cv_dg_m[0][0][7]*xdiff;
             mp_soln[cv_indx].cv_dg_m[0][0][4] += 2.0*mp_soln[cv_indx].cv_dg_m[0][0][8]*ydiff;

             mp_soln[cv_indx].cv_dg_m[0][0][5] += mp_soln[cv_indx].cv_dg_m[0][0][8]*xdiff;
             mp_soln[cv_indx].cv_dg_m[0][0][5] += 3.0*mp_soln[cv_indx].cv_dg_m[0][0][9]*ydiff;

             //////////////// mom[1]
             mp_soln[cv_indx].cv_dg_m[1][0][0] += mp_soln[cv_indx].cv_dg_m[1][0][1]*xdiff +
                                           mp_soln[cv_indx].cv_dg_m[1][0][2]*ydiff +
                                           mp_soln[cv_indx].cv_dg_m[1][0][3]*sqr(xdiff) +
                                           mp_soln[cv_indx].cv_dg_m[1][0][4]*xdiff*ydiff +
                                           mp_soln[cv_indx].cv_dg_m[1][0][5]*sqr(ydiff);

             mp_soln[cv_indx].cv_dg_m[1][0][0] += mp_soln[cv_indx].cv_dg_m[1][0][6]*cub(xdiff);
             mp_soln[cv_indx].cv_dg_m[1][0][0] += mp_soln[cv_indx].cv_dg_m[1][0][7]*sqr(xdiff)*ydiff;
             mp_soln[cv_indx].cv_dg_m[1][0][0] += mp_soln[cv_indx].cv_dg_m[1][0][8]*sqr(ydiff)*xdiff;
             mp_soln[cv_indx].cv_dg_m[1][0][0] += mp_soln[cv_indx].cv_dg_m[1][0][9]*cub(ydiff);

             mp_soln[cv_indx].cv_dg_m[1][0][1] += 2.0*mp_soln[cv_indx].cv_dg_m[1][0][3]*xdiff +
                                           mp_soln[cv_indx].cv_dg_m[1][0][4]*ydiff;

             mp_soln[cv_indx].cv_dg_m[1][0][1] += 3.0*mp_soln[cv_indx].cv_dg_m[1][0][6]*sqr(xdiff);
             mp_soln[cv_indx].cv_dg_m[1][0][1] += 2.0*mp_soln[cv_indx].cv_dg_m[1][0][7]*xdiff*ydiff;
             mp_soln[cv_indx].cv_dg_m[1][0][1] += mp_soln[cv_indx].cv_dg_m[1][0][8]*sqr(ydiff);

             mp_soln[cv_indx].cv_dg_m[1][0][2] += mp_soln[cv_indx].cv_dg_m[1][0][4]*xdiff +
                                           2.0*mp_soln[cv_indx].cv_dg_m[1][0][5]*ydiff;

             mp_soln[cv_indx].cv_dg_m[1][0][2] += mp_soln[cv_indx].cv_dg_m[1][0][7]*sqr(xdiff);
             mp_soln[cv_indx].cv_dg_m[1][0][2] += 2.0*mp_soln[cv_indx].cv_dg_m[1][0][8]*xdiff*ydiff;
             mp_soln[cv_indx].cv_dg_m[1][0][2] += 3.0*mp_soln[cv_indx].cv_dg_m[1][0][9]*sqr(ydiff);

             mp_soln[cv_indx].cv_dg_m[1][0][3] += 3.0*mp_soln[cv_indx].cv_dg_m[1][0][6]*xdiff;
             mp_soln[cv_indx].cv_dg_m[1][0][3] += mp_soln[cv_indx].cv_dg_m[1][0][7]*ydiff;

             mp_soln[cv_indx].cv_dg_m[1][0][4] += 2.0*mp_soln[cv_indx].cv_dg_m[1][0][7]*xdiff;
             mp_soln[cv_indx].cv_dg_m[1][0][4] += 2.0*mp_soln[cv_indx].cv_dg_m[1][0][8]*ydiff;

             mp_soln[cv_indx].cv_dg_m[1][0][5] += mp_soln[cv_indx].cv_dg_m[1][0][8]*xdiff;
             mp_soln[cv_indx].cv_dg_m[1][0][5] += 3.0*mp_soln[cv_indx].cv_dg_m[1][0][9]*ydiff;
         }
}

LOCAL void change_cent_of_p2_poly(
        TRI       **tris,
        int       *nbcv_indx,
        int       *nbcv_side,
        int       num_CVs,
        double    *cent,
        CV_Soln   *tmp_soln)
{
        int       cv_indx;
        double    xdiff, ydiff;
        ////////// re-map polynomial
        for(cv_indx = 0; cv_indx < num_CVs; cv_indx++)
        {
            xdiff = cent[0] - tris[cv_indx]->CVcent[nbcv_indx[cv_indx]][0];
            ydiff = cent[1] - tris[cv_indx]->CVcent[nbcv_indx[cv_indx]][1];
            ///////////// rho
            tmp_soln[cv_indx].cv_dg_rho[0][0] +=
                                           tmp_soln[cv_indx].cv_dg_rho[0][1]*xdiff +
                                           tmp_soln[cv_indx].cv_dg_rho[0][2]*ydiff +
                                           tmp_soln[cv_indx].cv_dg_rho[0][3]*sqr(xdiff) +
                                           tmp_soln[cv_indx].cv_dg_rho[0][4]*xdiff*ydiff +
                                           tmp_soln[cv_indx].cv_dg_rho[0][5]*sqr(ydiff);
            tmp_soln[cv_indx].cv_dg_rho[0][1] +=
                                           2.0*tmp_soln[cv_indx].cv_dg_rho[0][3]*xdiff +
                                           tmp_soln[cv_indx].cv_dg_rho[0][4]*ydiff;
            tmp_soln[cv_indx].cv_dg_rho[0][2] +=
                                           tmp_soln[cv_indx].cv_dg_rho[0][4]*xdiff +
                                           2.0*tmp_soln[cv_indx].cv_dg_rho[0][5]*ydiff;
            ///////////// e
            tmp_soln[cv_indx].cv_dg_e[0][0] +=
                                           tmp_soln[cv_indx].cv_dg_e[0][1]*xdiff +
                                           tmp_soln[cv_indx].cv_dg_e[0][2]*ydiff +
                                           tmp_soln[cv_indx].cv_dg_e[0][3]*sqr(xdiff) +
                                           tmp_soln[cv_indx].cv_dg_e[0][4]*xdiff*ydiff +
                                           tmp_soln[cv_indx].cv_dg_e[0][5]*sqr(ydiff);
            tmp_soln[cv_indx].cv_dg_e[0][1] +=
                                           2.0*tmp_soln[cv_indx].cv_dg_e[0][3]*xdiff +
                                           tmp_soln[cv_indx].cv_dg_e[0][4]*ydiff;
            tmp_soln[cv_indx].cv_dg_e[0][2] +=
                                           tmp_soln[cv_indx].cv_dg_e[0][4]*xdiff +
                                           2.0*tmp_soln[cv_indx].cv_dg_e[0][5]*ydiff;
            /////////////// m[0]
            tmp_soln[cv_indx].cv_dg_m[0][0][0] +=
                                           tmp_soln[cv_indx].cv_dg_m[0][0][1]*xdiff +
                                           tmp_soln[cv_indx].cv_dg_m[0][0][2]*ydiff +
                                           tmp_soln[cv_indx].cv_dg_m[0][0][3]*sqr(xdiff) +
                                           tmp_soln[cv_indx].cv_dg_m[0][0][4]*xdiff*ydiff +
                                           tmp_soln[cv_indx].cv_dg_m[0][0][5]*sqr(ydiff);
            tmp_soln[cv_indx].cv_dg_m[0][0][1] +=
                                           2.0*tmp_soln[cv_indx].cv_dg_m[0][0][3]*xdiff +
                                           tmp_soln[cv_indx].cv_dg_m[0][0][4]*ydiff;
            tmp_soln[cv_indx].cv_dg_m[0][0][2] +=
                                           tmp_soln[cv_indx].cv_dg_m[0][0][4]*xdiff +
                                           2.0*tmp_soln[cv_indx].cv_dg_m[0][0][5]*ydiff;
            //////////////// m[1]
            tmp_soln[cv_indx].cv_dg_m[1][0][0] +=
                                           tmp_soln[cv_indx].cv_dg_m[1][0][1]*xdiff +
                                           tmp_soln[cv_indx].cv_dg_m[1][0][2]*ydiff +
                                           tmp_soln[cv_indx].cv_dg_m[1][0][3]*sqr(xdiff) +
                                           tmp_soln[cv_indx].cv_dg_m[1][0][4]*xdiff*ydiff +
                                           tmp_soln[cv_indx].cv_dg_m[1][0][5]*sqr(ydiff);
            tmp_soln[cv_indx].cv_dg_m[1][0][1] +=
                                           2.0*tmp_soln[cv_indx].cv_dg_m[1][0][3]*xdiff +
                                           tmp_soln[cv_indx].cv_dg_m[1][0][4]*ydiff;
            tmp_soln[cv_indx].cv_dg_m[1][0][2] +=
                                           tmp_soln[cv_indx].cv_dg_m[1][0][4]*xdiff +
                                           2.0*tmp_soln[cv_indx].cv_dg_m[1][0][5]*ydiff;
        }
}

LOCAL void avg_st_on_cv_for_mapped_p2_poly(
        TRI    *tris,
        int    indx,
        double **Lmass_matrix,
        double *conu)
{
        int    i;
        double tmpu[4];
        for(i = 0; i < 4; i++)
            tmpu[i] = 0.0;
        // for(i = 0; i < 6; i++)
        for(i = 0; i < MAX_N_COEF; i++)
        {
            tmpu[0] += cv_soln_2degree_tech1[indx].cv_dg_rho[0][i]*Lmass_matrix[0][i];
            tmpu[1] += cv_soln_2degree_tech1[indx].cv_dg_m[0][0][i]*Lmass_matrix[0][i];
            tmpu[2] += cv_soln_2degree_tech1[indx].cv_dg_m[1][0][i]*Lmass_matrix[0][i];
            tmpu[3] += cv_soln_2degree_tech1[indx].cv_dg_e[0][i]*Lmass_matrix[0][i];
        }

        for(i = 0; i < N_EQN; i++)
        {
             tmpu[i] /= Lmass_matrix[0][0];
             conu[i] = tmpu[i];
        }
}

LOCAL void R_quadr_term_averageP3(
        TRI      *tri,
        Locstate st,
        double   **Lmass_matrix,
        double   *ave)
{
         double      area;
         int        i;

         area = Lmass_matrix[0][0];

         for(i = 0; i < N_EQN; i++)
             ave[i] = 0.0;
         for(i = 3; i < 6; i++)
         // for(i = 3; i < MAX_N_COEF; i++)
             ave[0] += dg_Dens(st)[i]*Lmass_matrix[0][i];
         ave[0] /= area;
         if(N_EQN == 1) return;

         for(i = 3; i < 6; i++)
         // for(i = 3; i < MAX_N_COEF; i++)
         {
             ave[1] += dg_Mom(st)[0][i]*Lmass_matrix[0][i];
             ave[2] += dg_Mom(st)[1][i]*Lmass_matrix[0][i];
             ave[3] += dg_Energy(st)[i]*Lmass_matrix[0][i];
         }
         for(i = 1; i < N_EQN; i++)
             ave[i] /= area;

}

LOCAL void map_p2_part_of_p3_ploy(
        TRI       **tris,
        int       *nbcv_indx,
        int       *nbcv_side,
        int       num_CVs,
        Limiting_store **store,
        int       rk_iter,
        double    *cent)
{
        int       cv_indx, i;
        double    xdiff, ydiff;

        if(rk_iter == RK_STEP)
        {
            for(cv_indx = 0; cv_indx < num_CVs; cv_indx++)
            {
                for(i = 0; i < MAX_N_COEF; i++)
                {
                    cv_soln_2degree_tech1[cv_indx].cv_dg_rho[0][i] =
                            tris[cv_indx]->cv_soln->cv_dg_rho[nbcv_indx[cv_indx]][i];
                    cv_soln_2degree_tech1[cv_indx].cv_dg_e[0][i] =
                            tris[cv_indx]->cv_soln->cv_dg_e[nbcv_indx[cv_indx]][i];
                    cv_soln_2degree_tech1[cv_indx].cv_dg_m[0][0][i] =
                            tris[cv_indx]->cv_soln->cv_dg_m[0][nbcv_indx[cv_indx]][i];
                    cv_soln_2degree_tech1[cv_indx].cv_dg_m[1][0][i] =
                            tris[cv_indx]->cv_soln->cv_dg_m[1][nbcv_indx[cv_indx]][i];
                }
            }
        }
        else
        {
            for(cv_indx = 0; cv_indx < num_CVs; cv_indx++)
            {
                for(i = 0; i < MAX_N_COEF; i++)
                {
                    cv_soln_2degree_tech1[cv_indx].cv_dg_rho[0][i] =
                            store[rk_iter][tris[cv_indx]->id].cv_dg_rho[nbcv_indx[cv_indx]][i];
                    cv_soln_2degree_tech1[cv_indx].cv_dg_e[0][i] =
                            store[rk_iter][tris[cv_indx]->id].cv_dg_e[nbcv_indx[cv_indx]][i];
                    cv_soln_2degree_tech1[cv_indx].cv_dg_m[0][0][i] =
                            store[rk_iter][tris[cv_indx]->id].cv_dg_m[0][nbcv_indx[cv_indx]][i];
                    cv_soln_2degree_tech1[cv_indx].cv_dg_m[1][0][i] =
                            store[rk_iter][tris[cv_indx]->id].cv_dg_m[1][nbcv_indx[cv_indx]][i];
                }
            }
        }

        ////////// re-map polynomial
        // change_cent_of_p2_poly(tris,nbcv_indx,nbcv_side,num_CVs,cent,cv_soln_2degree_tech1);
        change_cent_of_p3_poly(tris,nbcv_indx,nbcv_side,num_CVs,cent,cv_soln_2degree_tech1);
}

// average of 3rd degree terms of polynomial
// after differenation.
LIB_LOCAL void R_degree3_term_pt_P3(
         Locstate  st,
         int       diff, // diff = 0, w.r.t. x; diff = 1, w.r.t. y.
         double    *cent,
         double    *pt,
         float     *ave)
{
         double     area, dx[2];
         int        i, dim = 2;

         for(i = 0; i < dim; i++)
             dx[i] = cent[i] - pt[i];

         if(diff == 0)
         {
             ave[0] =(3.0*dg_Dens(st)[6]*sqr(dx[0]) +
                      2.0*dg_Dens(st)[7]*dx[0]*dx[1] +
                          dg_Dens(st)[8]*sqr(dx[1]));

             if(N_EQN == 1) return;
             
             ave[1] =(3.0*dg_Mom(st)[0][6]*sqr(dx[0]) +
                      2.0*dg_Mom(st)[0][7]*dx[0]*dx[1] +
                          dg_Mom(st)[0][8]*sqr(dx[1]));
         
             ave[2] =(3.0*dg_Mom(st)[1][6]*sqr(dx[0]) +
                      2.0*dg_Mom(st)[1][7]*dx[0]*dx[1] +
                          dg_Mom(st)[1][8]*sqr(dx[1]));

             ave[3] =(3.0*dg_Energy(st)[6]*sqr(dx[0]) +
                      2.0*dg_Energy(st)[7]*dx[0]*dx[1] +
                          dg_Energy(st)[8]*sqr(dx[1]));
         }
         else
         {
             ave[0] =(dg_Dens(st)[7]*sqr(dx[0]) +
                  2.0*dg_Dens(st)[8]*dx[0]*dx[1] +
                  3.0*dg_Dens(st)[9]*sqr(dx[1]));

             if(N_EQN == 1) return;

             ave[1] =(dg_Mom(st)[0][7]*sqr(dx[0]) +
                  2.0*dg_Mom(st)[0][8]*dx[0]*dx[1] +
                  3.0*dg_Mom(st)[0][9]*sqr(dx[1]));

             ave[2] =(dg_Mom(st)[1][7]*sqr(dx[0]) +
                  2.0*dg_Mom(st)[1][8]*dx[0]*dx[1] +
                  3.0*dg_Mom(st)[1][9]*sqr(dx[1]));

             ave[3] =(dg_Energy(st)[7]*sqr(dx[0]) +
                  2.0*dg_Energy(st)[8]*dx[0]*dx[1] +
                  3.0*dg_Energy(st)[9]*sqr(dx[1]));
         }
}

LOCAL void Subcell_limiting_1st_degreeP2(
         TRI       *tri,
         TRI       *nbtri[3],
         Mid_soln  *midsoln,
         Limiting_store **store,
         int       rk_iter)
{
         Locstate st, nbst[3], st2, nbst2[3], tmpst;
         float    uave[8], nbuave[20][8];
         float    Rave[8], nbRave[20][8];
         float    Lave[8], nbLave[20][8];
         int      dim = 2, indx;
         int      cv_indx, num_CVs, i, k;
         int      on_SV_side, on_SV_side2, nbcv_indx[20], nbcv_side[20];
         TRI      *tris[20];
         float    *cent, *nbcent[20], diam, sqrt_area;
         float    rside[3];
         float    coef[20][2];
         float    u0, u1, u2, w[20], arrya[20], arryb[20];
         // double   **Lmass_matrix = tri->Lmass_matrix;
         // float    c_num[20];
         double   *c_num, ***A;
         int      is_bad_sten[20], debug;
         float    avg1, avg2;
         float    eps = 0.1; // 0.005, 0.05

         /**
         if(tri->id == 14)
         {
             debug = YES;
             printf("\ntri[%d] entered Subcell_limiting_1st_degreeP2(), iteration %d\n", tri->id, rk_iter);
             printf("tri [%d] cent %g %g\n",  tri->id, fg_centroid(tri)[0], fg_centroid(tri)[1]);
         }
         **/

         if(rk_iter == RK_STEP)
         {
             st = tri->st;
             // for(i = 0; i < 3; i++)
             //     nbst[i] = nbtri[i]->st;
         }
         else
         {
             st = midsoln[tri->id].st[rk_iter];
             // for(i = 0; i < 3; i++)
             //     nbst[i] = midsoln[nbtri[i]->id].st[rk_iter];
         }
         st2 = midsoln[tri->id].st[0];
         diam = fg_diam(tri);
         cent = fg_centroid(tri);
         sqrt_area = sqrt(fg_area(tri));

         // num_CVs = get_tri_near_neighbr_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         num_CVs = get_tri_neighbr_CV_stencil_from_set_HR_sten(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);

         for(i = 0; i < num_CVs; i++)
             nbcent[i] = tris[i]->CVcent[nbcv_indx[i]];

         if(alltri_HR_sten_2[tri->id].HR_sten_set_1st == NO)
         {
             tri_array(&A,num_CVs,2,2,sizeof(double));
             vector(&c_num,num_CVs,sizeof(double));

             for(i = 0; i < num_CVs; i++)
             {
                 A[i][0][0] = (nbcent[i][0]-cent[0])/sqrt_area;
                 A[i][0][1] = (nbcent[i][1]-cent[1])/sqrt_area;
                 A[i][1][0] = (nbcent[(i+1)%num_CVs][0]-cent[0])/sqrt_area;
                 A[i][1][1] = (nbcent[(i+1)%num_CVs][1]-cent[1])/sqrt_area;
                 // c_num[i] = cond_num(A[i]); // c_num[i] =1.0;
                 c_num[i] = cond_num2(A,i); // c_num[i] =1.0;
             }
             alltri_HR_sten_2[tri->id].HR_sten_set_1st = YES;
             alltri_HR_sten_2[tri->id].c_num = c_num;
             alltri_HR_sten_2[tri->id].A = A;
         }
         else
         {
             A = alltri_HR_sten_2[tri->id].A;
             c_num = alltri_HR_sten_2[tri->id].c_num;
         }

         uave[0] = Dens(st);
         for(i = 0; i < 3; i++)
             uave[i+1] = Mom(st)[i];
         uave[4] = Energy(st);
         uave[5] = Mag(st)[0];
         uave[6] = Mag(st)[1];
         uave[7] = Mag(st)[2];

         R_degree2_term_average(tri,st2,Rave);

         for(i = 0; i < num_CVs; i++)
         {
             if(rk_iter == RK_STEP)
                 tmpst = tris[i]->st;
             else
                 tmpst = midsoln[tris[i]->id].st[rk_iter];
             avg_st_on_cv_ver3_MHD(tris[i], nbcv_indx[i], tmpst, nbuave[i]);
         }

         /***
         if(rk_iter == RK_STEP)
         {
             for(i = 0; i < num_CVs; i++)
                 avg_st_on_cv_ver2(tris[i], nbcv_indx[i], NULL, nbuave[i]);
         }
         else
         {
             for(i = 0; i < num_CVs; i++)
                 avg_st_on_cv_from_store(tris[i], nbcv_indx[i], NULL, store[rk_iter], nbuave[i]);
         }
         ***/

         for(i = 0; i < num_CVs; i++)
         {
             // comp_CV_mass_matrix_1st_row(MAX_N_COEF,tris[i],dim,nbcv_indx[i],
             //                  cent,mass_1st_row);
             comp_CV_Mag_mass_matrix_1st_row(MAX_N_COEF,tris[i],dim,nbcv_indx[i],
                              cent,sqrt_area, mass_1st_row);
             CV_R_degree2_term_average(tris[i], nbcv_indx[i], st2, mass_1st_row, nbRave[i]);
         }
         for(k = 0; k < N_EQN; k++)
         {
             Lave[k] = uave[k]-Rave[k];
             for(i = 0; i < num_CVs; i++)
                 nbLave[i][k] = nbuave[i][k]-nbRave[i][k];
         }

         //// TMP
         if(debug == YES)
         {
             printf("Dens avg = %g\n", Lave[0]);
             for(i = 0; i < num_CVs; i++) 
             {
                 printf("Neighbor[%d] tri[%d] Dens avg = %g\n", i, tris[i]->id, nbLave[i][0]);
                 printf("cent %g %g\n",  fg_centroid(tris[i])[0], fg_centroid(tris[i])[1]);
                 if(isnan(nbLave[i][0]))
                 {
                     if(rk_iter == RK_STEP)
                         g_verbose_print_state(tris[i]->st);
                     else
                         g_verbose_print_state(midsoln[tris[i]->id].st[rk_iter]);
                     clean_up(ERROR);
                 }
             }
         }

         NEW_extrema_detec(Lave,nbLave,num_CVs,is_bad_sten);
         /***
         for(i = 0; i < num_CVs; i++)
         {
             A[i][0][0] = (nbcent[i][0]-cent[0]);
             A[i][0][1] = (nbcent[i][1]-cent[1]);
             A[i][1][0] = (nbcent[(i+1)%num_CVs][0]-cent[0]);
             A[i][1][1] = (nbcent[(i+1)%num_CVs][1]-cent[1]);
             c_num[i] = cond_num(A[i]); // c_num[i] =1.0;
         }
         ***/
         for(k = 0; k < N_EQN; k++)
         {
             // if(k == 5 || k == 6) continue; // mag[0] and mag[1] do not need limiting.

             // linear part of polynomial
             // tri, nb0, nb1; // tri, nb1, nb2; // tri, nb2, nb0
             for(i = 0; i < num_CVs; i++)
             {
                 rside[0] = nbLave[i][k] - Lave[k];
                 rside[1] = nbLave[(i+1)%num_CVs][k] - Lave[k];
                 // comp_coef(A[i],rside,coef[i]);
                 comp_coef2(A,rside,coef[i],i);
             }
             ///////////// WENO
             // if(debugging("weno_w"))
             {
                 for(i = 0; i < num_CVs; i++)
                 {
                     arrya[i] = coef[i][0];
                     arryb[i] = coef[i][1];
                 }
                 WENO_mod_1(arrya, arryb, c_num, num_CVs, w);
                 // WENO_mod_1_sqr_weight(arrya, arryb, c_num, num_CVs, w);
                 u1 = u2 = 0.0;
                 for(i = 0; i < num_CVs; i++)
                 {
                     u1 += w[i]*coef[i][0];
                     u2 += w[i]*coef[i][1];
                 }
                 if(is_bad_sten[k] == YES)
                 {
                     u1 = u2 = 0.0;
                 }
                 u0 = Lave[k];
                 /////////////////////// END WENO
             }
             /***
             else if(debugging("cent_bias"))
             {
                 avg1 = 0.0; u1 = coef[0][0];
                 avg2 = 0.0; u2 = coef[0][1];
                 for(i = 0; i < num_CVs; i++)
                 {
                     avg1 += coef[i][0];
                     u1 = minmod(coef[i][0],u1);
                     avg2 += coef[i][1];
                     u2 = minmod(coef[i][1],u2);
                 }
                 avg1 /= num_CVs;
                 avg2 /= num_CVs;
                 u1 = minmod(((1+eps)*u1), avg1);
                 u2 = minmod(((1+eps)*u2), avg2);
                 u0 = Lave[k];
             }
             ***/

             switch(k)
             {
             case 0:
                 dg_Dens(st2)[0] = u0;
                 dg_Dens(st2)[1] = u1;
                 dg_Dens(st2)[2] = u2;
                 if(isnan(u0) || isnan(u1) || isnan(u2))
                 {
                     printf("ERROR: Subcell_limiting_1st_degreeP2(),tri[%d] %g %g %g is nan\n", tri->id, u0, u1, u2);
                     g_verbose_print_state(st);
                     clean_up(ERROR);
                 }
             break;
             case 1:
                 dg_Mom(st2)[0][0] = u0;
                 dg_Mom(st2)[0][1] = u1;
                 dg_Mom(st2)[0][2] = u2;
             break;
             case 2:
                 dg_Mom(st2)[1][0] = u0;
                 dg_Mom(st2)[1][1] = u1;
                 dg_Mom(st2)[1][2] = u2;
             break;
             case 3:
                 dg_Mom(st2)[2][0] = u0;
                 dg_Mom(st2)[2][1] = u1;
                 dg_Mom(st2)[2][2] = u2;
             break;
             case 4:
                 dg_Energy(st2)[0] = u0;
                 dg_Energy(st2)[1] = u1;
                 dg_Energy(st2)[2] = u2;
             break;
             case 5:
                 dg_B(st2)[0][0] = u0;
                 dg_B(st2)[0][1] = u1;
                 dg_B(st2)[0][2] = u2;
             break;
             case 6:
                 dg_B(st2)[1][0] = u0;
                 dg_B(st2)[1][1] = u1;
                 dg_B(st2)[1][2] = u2;
             break;
             case 7:
                 dg_B(st2)[2][0] = u0;
                 dg_B(st2)[2][1] = u1;
                 dg_B(st2)[2][2] = u2;
             break;
             default:
                 printf("ERROR: Subcell_limiting_1st_degreeP2(), implement case %d\n", k);
                 clean_up(ERROR);
             break;
             }
         }

         Dens(st2) = Dens(st);
         Energy(st2) = Energy(st);
         for(i = 0; i < 3; i++)
             Mom(st2)[i] = Mom(st)[i];
         for(i = 0; i < 3; i++)
             Mag(st2)[i] = Mag(st)[i];

         ///// Re-enforce divergence-free property
         if(dg_B(st2)[0][1] >= 0.0)
             dg_B(st2)[0][1] = min(fabs(dg_B(st2)[0][1]), fabs(dg_B(st2)[1][2]));
         else
             dg_B(st2)[0][1] = -min(fabs(dg_B(st2)[0][1]), fabs(dg_B(st2)[1][2]));
         dg_B(st2)[1][2] = -dg_B(st2)[0][1];

         /****
         Dens(st2) = Dens(st);
         Energy(st2) = Energy(st);
         if(N_EQN == 8)
         {
             for(i = 0; i < 3; i++)
                 Mom(st2)[i] = Mom(st)[i];
             Mag(st2)[2] = Mag(st)[2];
             for(i=0; i < MAX_N_COEF; i++)
             {
                 dg_B(st2)[0][i] =dg_B(st)[0][i];
                 dg_B(st2)[1][i] =dg_B(st)[1][i];
                 Mag(st2)[0] = Mag(st)[0];
                 Mag(st2)[1] = Mag(st)[1];
             }
         }
         else
         {
             Mom(st2)[0] = Mom(st)[0];
             Mom(st2)[1] = Mom(st)[1];
         }
         ****/

         /**
         for(i = 0; i < 2; i++)
         {
             dg_Dens(st2)[i] = dg_Dens(st)[i];
             dg_Mom(st2)[0][i] = dg_Mom(st)[0][i];
             dg_Mom(st2)[1][i] = dg_Mom(st)[1][i];
             dg_Energy(st2)[i] = dg_Energy(st)[i];
         }
         return;
         **/

         /**
         if(debug == YES)
         {
             printf("print tri[%d] reconstructed state\n", tri->id);
             verbose_print_state("Recon St", st2);
         }
         **/

         if((N_EQN == 4 || N_EQN == 8) && YES == unphysical_st_at_quadrature(tri, st2))
             tri->redo_limiting = YES;
         else
             tri->redo_limiting = NO;
}

LOCAL void CV_R_degree2_term_average(
         TRI       *tri,
         int       cv_indx,
         Locstate  st,
         double    **mass_1st_row,
         float     *ave)
{
         float      area;
         int        dim = 2, i;

         area = mass_1st_row[0][0];
         if(N_EQN == 8)
         {
             ave[0] =(dg_Dens(st)[3]*mass_1st_row[0][3] +
                      dg_Dens(st)[4]*mass_1st_row[0][4] +
                      dg_Dens(st)[5]*mass_1st_row[0][5])/area;
             ave[4] =(dg_Energy(st)[3]*mass_1st_row[0][3] +
                      dg_Energy(st)[4]*mass_1st_row[0][4] +
                      dg_Energy(st)[5]*mass_1st_row[0][5])/area;
             for(i = 0; i < 3; i++)
                 ave[i+1] =(dg_Mom(st)[i][3]*mass_1st_row[0][3] +
                            dg_Mom(st)[i][4]*mass_1st_row[0][4] +
                            dg_Mom(st)[i][5]*mass_1st_row[0][5])/area;
             for(i = 0; i < 3; i++)
                 ave[i+5] =(dg_B(st)[i][3]*mass_1st_row[0][3] +
                            dg_B(st)[i][4]*mass_1st_row[0][4] +
                            dg_B(st)[i][5]*mass_1st_row[0][5])/area;
         }
         else
         {
             ave[0] =(dg_Dens(st)[3]*mass_1st_row[0][3] +
                      dg_Dens(st)[4]*mass_1st_row[0][4] +
                      dg_Dens(st)[5]*mass_1st_row[0][5])/area;
             ave[1] =(dg_Mom(st)[0][3]*mass_1st_row[0][3] +
                      dg_Mom(st)[0][4]*mass_1st_row[0][4] +
                      dg_Mom(st)[0][5]*mass_1st_row[0][5])/area;
             ave[2] =(dg_Mom(st)[1][3]*mass_1st_row[0][3] +
                      dg_Mom(st)[1][4]*mass_1st_row[0][4] +
                      dg_Mom(st)[1][5]*mass_1st_row[0][5])/area;
             ave[3] =(dg_Energy(st)[3]*mass_1st_row[0][3] +
                      dg_Energy(st)[4]*mass_1st_row[0][4] +
                      dg_Energy(st)[5]*mass_1st_row[0][5])/area;
         }
}

EXPORT void FV_P0(
         TRI       *tri,
         TRI       *nbtri[3],
         Mid_soln  *midsoln,
         Limiting_store **store,
         int       rk_step,
         Front     *fr)
{
         TRI       *crsp_tri, *sten_tris[50], *tmptri, *new_cand[50];
         int       i, j, side, nn_num, k, sten_n, num_tris_vertex, tmp_side, N_STEN = 0;
         int       debug = NO, N_cells = 0, is_bad[8];
         Locstate  st;

         if(rk_step == RK_STEP)
             st = tri->st;
         else
             st = midsoln[tri->id].st[rk_step];

         
         dg_Dens(st)[0] = Dens(st);
         dg_Energy(st)[0] = Energy(st);
         for(i = 0; i < 3; i++)
         {
             dg_Mom(st)[i][0] = Mom(st)[i];  
             dg_B(st)[i][0] = Mag(st)[i];
         }

         for(i = 1; i < MAX_N_COEF; i++)
         {
             dg_Dens(st)[i] = 0.0;
             dg_Energy(st)[i] = 0.0;
             for(j = 0; j < 3; j++)
             {
                 dg_Mom(st)[j][i] = 0.0;  
                 dg_B(st)[j][i] = 0.0;
             }
         }
}

EXPORT void WENO_FV_P1(
         TRI       *tri,
         TRI       *nbtri[3],
         Mid_soln  *midsoln,
         Limiting_store **store,
         int       rk_step,
         Front     *fr)
{
         TRI       *crsp_tri, *sten_tris[50], *tmptri, *new_cand[50];
         int       i, j, side, nn_num, k, sten_n, num_tris_vertex, tmp_side, N_STEN = 0;
         static double **con_u[20], eps = 1.0e-6;
         static int    first = YES;
         double    OI[20], tmp_u[20], sum, wei[20], final_u[8][20], alpha[20];
         Locstate  st;
         int       debug = NO, N_cells = 0, is_bad[8];
         double    nb_conu[20][8], my_conu[8];

         if(first == YES)
         {
             for(i = 0; i < 20; i++)
                 matrix(&con_u[i], 8, MAX_N_COEF,sizeof(double));
             first = NO;
         }

         if(rk_step == RK_STEP)
             st = tri->st;
         else
             st = midsoln[tri->id].st[rk_step];

         /****
         if(tri->id == 16039)
         {
             printf("tri[%d] entered WENO_FV_P1()\n", tri->id);
             verbose_print_state("FV state",st);
             debug = YES;
         }
         ****/

         init_tri_comput_P1_polynomials_from_avg_MHD(tri,nbtri,3, midsoln, rk_step, con_u[0]);
         N_STEN = 1;

         for(side = 0; side < 3; side++)
         {
             // sten_tris[0] = tri;
             // sten_tris[1] = Tri_on_side(tri,side);
             // sten_tris[2] = Tri_on_side(tri,(side+1)%3);
             // nn_num = 3;

             sten_tris[0] = tri;
             sten_tris[1] = Tri_on_side(tri,side);
             for(tmp_side = 0; tmp_side < 3; tmp_side++)
             {
                 if(tri == Tri_on_side(sten_tris[1],tmp_side))
                     break;
             }
             sten_tris[2] = Tri_on_side(sten_tris[1],(tmp_side+1)%3);
             nn_num = 3;

             tri_comput_P1_polynomials_from_avg_MHD_3sten(tri, sten_tris, nn_num,
                       midsoln,store,rk_step, side, con_u[N_STEN]);
             N_STEN++;

             /////// Next tri on the other side of sten_tris[1]
             sten_tris[2] = Tri_on_side(sten_tris[1],(tmp_side+2)%3);

             tri_comput_P1_polynomials_from_avg_MHD_3sten(tri, sten_tris, nn_num,
                       midsoln,store,rk_step, side, con_u[N_STEN]);
             N_STEN++;
         }

         for(side = 0; side < 3; side++)
         {
             get_reverse_sten_P1(tri, side, sten_tris, &nn_num);
             tri_comput_P1_polynomials_from_avg_MHD_3sten(tri, sten_tris, nn_num,
                       midsoln,store,rk_step, side, con_u[N_STEN]);
             N_STEN++;
         }

         for(k = 0; k < N_EQN; k++)
         {
             sum = 0.0;
             for(sten_n = 0; sten_n < N_STEN; sten_n++)
             {
                 /**
                 for(j = 0; j < MAX_N_COEF; j++)
                     tmp_u[j] = con_u[sten_n][k][j];
                 // OI[sten_n] = weno_weight_P2(tri, tmp_u);
                 OI[sten_n] = sqr(tmp_u[1]) + sqr(tmp_u[2]);
                 **/
                 OI[sten_n] = sqr(con_u[sten_n][k][1]) + sqr(con_u[sten_n][k][2]);
                 if(sten_n == 0)
                     // alpha[sten_n] = 10.0/(sqr(eps + OI[sten_n])); // This construction maybe problematic 03/27
                     alpha[sten_n] = 0.0/(sqr(eps + OI[sten_n]));
                 else
                     alpha[sten_n] = 1.0/(sqr(eps + OI[sten_n]));
                 sum += alpha[sten_n];
             }

             for(sten_n = 0; sten_n < N_STEN; sten_n++)
                 wei[sten_n] = alpha[sten_n]/sum;

             for(i = 0; i < MAX_N_COEF; i++)
                 final_u[k][i] = 0.0;

             for(sten_n = 0; sten_n < N_STEN; sten_n++)
             {
                 for(i = 0; i < MAX_N_COEF; i++)
                     final_u[k][i] += wei[sten_n]*con_u[sten_n][k][i];
             }
             /// TEMP 03/28/14
             // for(i = 0; i < MAX_N_COEF; i++)
             //     final_u[k][i] = con_u[0][k][i];
             /// END: TEMP 03/28/14
             switch(k)
             {
             case 0:
                 for(i = 0; i < MAX_N_COEF; i++)
                     dg_Dens(st)[i] = final_u[k][i];
                 // if(is_bad[0] == YES)
                 //     for(i = 1; i < MAX_N_COEF; i++) 
                 //         dg_Dens(st)[i] = 0.0;
             break;
             case 1:
                 for(i = 0; i < MAX_N_COEF; i++)
                     dg_Mom(st)[0][i] = final_u[k][i];
                 // if(is_bad[1] == YES)
                 //     for(i = 1; i < MAX_N_COEF; i++) 
                 //         dg_Mom(st)[0][i] = 0.0;
             break;
             case 2:
                 for(i = 0; i < MAX_N_COEF; i++)
                     dg_Mom(st)[1][i] = final_u[k][i];
                 // if(is_bad[2] == YES)
                 //     for(i = 1; i < MAX_N_COEF; i++) 
                 //         dg_Mom(st)[1][i] = 0.0;
             break;
             case 3:
                 for(i = 0; i < MAX_N_COEF; i++)
                     dg_Mom(st)[2][i] = final_u[k][i];
                 // if(is_bad[3] == YES)
                 //     for(i = 1; i < MAX_N_COEF; i++) 
                 //         dg_Mom(st)[2][i] = 0.0;
             break;
             case 4:
                 for(i = 0; i < MAX_N_COEF; i++)
                     dg_Energy(st)[i] = final_u[k][i];
                 // if(is_bad[4] == YES)
                 //     for(i = 1; i < MAX_N_COEF; i++) 
                 //         dg_Energy(st)[i] = 0.0;
             break;
             case 5:
                 for(i = 0; i < MAX_N_COEF; i++)
                     dg_B(st)[0][i] = final_u[k][i];
                 // if(is_bad[5] == YES)
                 //     for(i = 1; i < MAX_N_COEF; i++) 
                 //         dg_B(st)[0][i] = 0.0;
             break;
             case 6:
                 for(i = 0; i < MAX_N_COEF; i++)
                     dg_B(st)[1][i] = final_u[k][i];
                 // if(is_bad[6] == YES)
                 //     for(i = 1; i < MAX_N_COEF; i++) 
                 //         dg_B(st)[1][i] = 0.0;
             break;
             case 7:
                 for(i = 0; i < MAX_N_COEF; i++)
                     dg_B(st)[2][i] = final_u[k][i];
                 // if(is_bad[7] == YES)
                 //     for(i = 1; i < MAX_N_COEF; i++) 
                 //         dg_B(st)[2][i] = 0.0;
             break;
             }
         }

         if(debug == YES)
         {
             for(sten_n = 0; sten_n < N_STEN; sten_n++)
             {
                 printf("Sten[%d] Energy: ", sten_n);
                 for(i = 0; i < MAX_N_COEF; i++)
                     printf("%10.9g ", con_u[sten_n][4][i]);
                 printf("\n");
             } 
             verbose_print_state("WENO state:", st);

             for(side = 0; side < 3; side++)
             {
                 sten_tris[N_cells] = Tri_on_side(tri,side);
                 N_cells++;
                 for(tmp_side = 0; tmp_side < 3; tmp_side++)
                 {
                     if(tri == Tri_on_side(sten_tris[N_cells-1],tmp_side))
                         break;
                 }
                 sten_tris[N_cells] = Tri_on_side(sten_tris[N_cells-1],(tmp_side+1)%3);
                 N_cells++;

                 /////// Next tri on the other side of sten_tris[1]
                 sten_tris[N_cells] = Tri_on_side(sten_tris[N_cells-2],(tmp_side+2)%3);
                 N_cells++;
             }
             for(i = 0; i < N_cells; i++)
                 printf("Energy in neighbor[%d] = %10.9g\n", i, Energy(midsoln[sten_tris[i]->id].st[0]));
         }

         if((N_EQN == 4 || N_EQN == 8) && YES == unphysical_st_at_quadrature(tri, st))
         {
             tri->redo_limiting = YES;
             fix_unphysical_st(tri,midsoln,rk_step,fr);
         }
         else
             tri->redo_limiting = NO;
}


LOCAL void tri_comput_P1_polynomials_from_avg_MHD_3sten(
         TRI       *tri,
         TRI       *tris[],
         int       nn_num,
         Mid_soln *midsoln,
         Limiting_store **limit_store,
         int      rk_step,
         int      side, 
         double   **con_u)
{            
        TRI       *Bnbtri[3];
        double    Ab[8][20], Bb[8][6], B[8][MAX_N_COEF]; /// B[#cv][mass_matrix_term for each coeff]
        double    AA[300], BB[300], CC[300], DD[300], XX[300], work[300], tmp;
        static double   **ALmass_matrix = NULL, **A, **loc_mass_1st_row;
        int       dim = 2, i, j, k, M, N, P, l, LDA, LDB, LWORK, INFO, indx, debug = NO, cv_indx;
        Locstate  st, nbst[3], st2;
        double    **Bmass_matrix, Bavg[8], **tmpA;

        if(ALmass_matrix == NULL)
        {
            matrix(&(ALmass_matrix), 6, MAX_N_COEF, sizeof(double));
            matrix(&(A), 6, MAX_N_COEF, sizeof(double));
            matrix(&loc_mass_1st_row, 1, MAX_N_COEF,sizeof(double));
        }

            for(cv_indx = 0; cv_indx < nn_num; cv_indx++)
            {
                // comp_mass_matrix_1st_row(MAX_N_COEF,tris[cv_indx],dim,fg_centroid(tri),loc_mass_1st_row);
                comp_Mag_mass_matrix_1st_row(MAX_N_COEF,tris[cv_indx],dim,fg_centroid(tri),
                          sqrt(fg_area(tri)),loc_mass_1st_row);
                for(i = 0; i < MAX_N_COEF; i++)
                    A[cv_indx][i] = loc_mass_1st_row[0][i]/loc_mass_1st_row[0][0];
            }

        if(NULL == midsoln)
        {
            for(indx = 0; indx < nn_num; indx++)
            {
                Ab[0][indx] = Dens(tris[indx]->st);
                Ab[1][indx] = Mom(tris[indx]->st)[0];
                Ab[2][indx] = Mom(tris[indx]->st)[1];
                Ab[3][indx] = Mom(tris[indx]->st)[2];
                Ab[4][indx] = Energy(tris[indx]->st);
                Ab[5][indx] = Mag(tris[indx]->st)[2];
                Ab[6][indx] = Mag(tris[indx]->st)[0];
                Ab[7][indx] = Mag(tris[indx]->st)[1];
            }
        }
        else
        {
            // st = midsoln[tri->id].st[0];
            for(indx = 0; indx < nn_num; indx++)
            {
                Ab[0][indx] = Dens(midsoln[tris[indx]->id].st[0]);
                Ab[1][indx] = Mom(midsoln[tris[indx]->id].st[0])[0];
                Ab[2][indx] = Mom(midsoln[tris[indx]->id].st[0])[1];
                Ab[3][indx] = Mom(midsoln[tris[indx]->id].st[0])[2];
                Ab[4][indx] = Energy(midsoln[tris[indx]->id].st[0]);
                Ab[5][indx] = Mag(midsoln[tris[indx]->id].st[0])[2];
                Ab[6][indx] = Mag(midsoln[tris[indx]->id].st[0])[0];
                Ab[7][indx] = Mag(midsoln[tris[indx]->id].st[0])[1];
            }
        }

        if(rk_step == RK_STEP)
            st = tri->st;
        else
            st = midsoln[tri->id].st[rk_step];

        for(k = 0; k < N_EQN; k++)
        {
            solve_by_LU(A,MAX_N_COEF,Ab[k],XX);

            switch(k)
            {
            case 0:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Dens(st)[i] = XX[i];
            break;
            case 1:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Mom(st)[0][i] = XX[i];
            break;
            case 2:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Mom(st)[1][i] = XX[i];
            break;
            case 3:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Mom(st)[2][i] = XX[i];
            break;
            case 4:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Energy(st)[i] = XX[i];
            break;
            case 5:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_B(st)[2][i] = XX[i];
            break;
            case 6:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_B(st)[0][i] = XX[i];
            break;
            case 7:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_B(st)[1][i] = XX[i];
            break;
            }
        }

        if(con_u != NULL)
        {
            for(i = 0; i < MAX_N_COEF; i++)
            {
                con_u[0][i] = dg_Dens(st)[i];
                con_u[4][i] = dg_Energy(st)[i];
                for(j = 0; j < 3; j++)
                {
                    con_u[j+1][i] = dg_Mom(st)[j][i];
                    con_u[j+5][i] = dg_B(st)[j][i];
                }
            }
        }
}


EXPORT void WENO_FV_P2(
         TRI       *tri,
         TRI       *nbtri[3],
         Mid_soln  *midsoln,
         Limiting_store **store,
         int       rk_step,
         Front     *fr)
{
         TRI       *crsp_tri, *sten_tris[50], *tmptri, *new_cand[50];
         int       i, j, side, nn_num, k, sten_n, num_tris_vertex;
         static double **con_u[20], eps = 1.0e-6;
         static int    first = YES, dim = 2;
         double    OI[20], tmp_u[20], sum, wei[20], final_u[10][20], alpha[20], tmp;
         Locstate  st;
         double    entr, press, Gam, ke, new_engy;
         double    u[MAXD], den, B_sqr;


         // printf("Entered WENO_FV_P2()\n");
         // fflush(stdout);

         if(first == YES)
         {
             for(i = 0; i < 20; i++)
                 matrix(&con_u[i], 10, MAX_N_COEF,sizeof(double)); 
             first = NO;
         }

        if(rk_step == RK_STEP)
            st = tri->st;
        else
            st = midsoln[tri->id].st[rk_step];

         /****
         if(debugging("blast_MHD"))
         {
             if(tri->id == 26620)
             {
                 printf("\n\n*****tri[%d] Entered WENO_FV_P2(),\n", tri->id);
                 verbose_print_state("before check negative press", st);
             }
             Gam = gruneisen_gamma(st);
             press = POLY_thermal_pressure_MHD(st);
             if(press < 0.0)
             {
                 entr = Ent(st);
                 if(entr < 0.0)
                 {
                     printf("ERROR:  WENO_FV_P2()\n");
                     printf("TRI[%d] has negative entropy\n", tri->id);
                     verbose_print_state("fixed state",st);
                     clean_up(ERROR);
                 }   
                 press = entr*(pow(Dens(st), Gam));
                 for(i = 0; i < dim; i++)
                     u[i] = Mom(st)[i]/Dens(st);
                 for (ke = 0.0, i = 0; i < dim; ++i)
                     ke += sqr(u[i]);
                 ke *= 0.5*Dens(st);
                 for(B_sqr = 0.0, i = 0; i < 3; i++) 
                     B_sqr += sqr(Mag(st)[i]);       
                                                     
                 new_engy = press/Gam + ke + 0.5*B_sqr;
                 Energy(st) = new_engy;
                 Energy(midsoln[tri->id].st[0]) = new_engy;
             }
         }
         ****/

         // get_sten_neighbr_B_field(nbtri, tri, sten_tris, &nn_num);
         get_sten_neighbr(nbtri, tri, sten_tris, &nn_num);
         if(nn_num != 9)
         {
              printf("ERROR: WENO_FV_P2(), get_sten_neighbr_B_field not enough stencil\n");
              clean_up(ERROR);
         }
         tri_comput_P2_polynomials_from_avg_MHD(tri, sten_tris, nn_num, midsoln,store,rk_step, con_u[0]);

         for(side = 0; side < 3; side++)
         {
             get_one_sided_sten_P2(tri, side, sten_tris, &nn_num);
             tri_comput_P2_polynomials_from_avg_MHD_6sten_one_side(tri, sten_tris, nn_num, 
                       midsoln,store,rk_step, side, con_u[side+1]);
         }

         for(side = 0; side < 3; side++)
         {
             count_num_tris_vertex(tri,Point_of_tri(tri)[(side+2)%3], &num_tris_vertex);
             if(num_tris_vertex == 5)
             {
                 p2_reverse_sten_5pt_vertex(tri,side,sten_tris, &nn_num);
             }
             else if(num_tris_vertex < 5)
             {
                 printf("ERROR: WENO_FV_P2(), not enough tris at vertex for reverse\n");
                 clean_up(ERROR);
             }
             else
                 get_reverse_sten_P2(tri, side, sten_tris, &nn_num);
             tri_comput_P2_polynomials_from_avg_MHD_7sten_reverse(tri, sten_tris, nn_num, 
                       midsoln,store,rk_step, side, con_u[side+4]);
         }

         for(k = 0; k < N_EQN; k++)
         {
             if(k == 5 || k == 6) continue;

             sum = 0.0;
             for(sten_n = 0; sten_n < 7; sten_n++)
             {
                 for(j = 0; j < MAX_N_COEF; j++)
                     tmp_u[j] = con_u[sten_n][k][j]; 
                 OI[sten_n] = weno_weight_P2(tri, tmp_u);

                 tmp = (eps + OI[sten_n]);
                 if(sten_n == 0) /// central stencil
                     // alpha[sten_n] = 10.0/(quad(sqr(eps + OI[sten_n])));
                     alpha[sten_n] = 10.0/(quad(tmp));
                 else
                     // alpha[sten_n] = 1.0/(quad(sqr(eps + OI[sten_n])));
                     alpha[sten_n] = 1.0/(quad(tmp));
                 sum += alpha[sten_n];
             }

             for(sten_n = 0; sten_n < 7; sten_n++)
                 wei[sten_n] = alpha[sten_n]/sum;

             for(i = 0; i < MAX_N_COEF; i++)
                 final_u[k][i] = 0.0;

             for(sten_n = 0; sten_n < 7; sten_n++)
             {
                 for(i = 0; i < MAX_N_COEF; i++)
                     final_u[k][i] += wei[sten_n]*con_u[sten_n][k][i]; 
             }
             switch(k)
             {
             case 0:
                 for(i = 0; i < MAX_N_COEF; i++)
                     dg_Dens(st)[i] = final_u[k][i];
             break;
             case 1:
                 for(i = 0; i < MAX_N_COEF; i++)
                     dg_Mom(st)[0][i] = final_u[k][i];
             break;
             case 2:
                 for(i = 0; i < MAX_N_COEF; i++)
                     dg_Mom(st)[1][i] = final_u[k][i];
             break;
             case 3:
                 for(i = 0; i < MAX_N_COEF; i++)
                     dg_Mom(st)[2][i] = final_u[k][i];
             break;
             case 4:
                 for(i = 0; i < MAX_N_COEF; i++)
                     dg_Energy(st)[i] = final_u[k][i];
             break;
             case 5:
                 for(i = 0; i < MAX_N_COEF; i++)
                     dg_B(st)[0][i] = final_u[k][i];
             break;
             case 6:
                 for(i = 0; i < MAX_N_COEF; i++)
                     dg_B(st)[1][i] = final_u[k][i];
             break;
             case 7:
                 for(i = 0; i < MAX_N_COEF; i++)
                     dg_B(st)[2][i] = final_u[k][i];
             break;
             }
         }

         // printf("tri[%d] before unphysical_st_at_quadrature()\n", tri->id);
         // fflush(stdout);

         if((N_EQN == 4 || N_EQN == 8) && YES == unphysical_st_at_quadrature(tri, st))
         {
             tri->redo_limiting = YES;

             // printf("tri[%d] before fix_unphysical_st()\n", tri->id);
             // fflush(stdout);
             fix_unphysical_st(tri,midsoln,rk_step,fr);
         }
         else
             tri->redo_limiting = NO;

         /***
         if(tri->id == 27087)
         {
             printf("Before leaving WENO_FV_P2(), tri[%d] state\n", tri->id); 
             verbose_print_state("WENO state:", st);
         }
         ***/
         // printf("Leave WENO_FV_P2()\n");
         // fflush(stdout);
}

EXPORT float POLY_thermal_pressure_MHD(
        Locstate state)
{
        float pr, rho, Gam, gamma, p_gas, B_len_sqr;

        if (is_obstacle_state(state))
            return HUGE_VAL;
        rho = Dens(state);
 
        Gam = gruneisen_gamma(state);
        gamma = Gam + 1.0;

        B_len_sqr = sqr(Mag(state)[0]) + sqr(Mag(state)[1]) + sqr(Mag(state)[2]);

        switch (state_type(state))
        {

        case    GAS_STATE:
            p_gas = (Gam)*(Energy(state) - 0.5*rho*( sqr(Mom(state)[0]/rho) + 
                                                     sqr(Mom(state)[1]/rho) +
                                                     sqr(Mom(state)[2]/rho) ) 
                            - 0.5*B_len_sqr  );
            break;
        default:
            screen("ERROR in POLY_thermal_pressure_MHD(), no such state type\n");
            clean_up(ERROR);
        }

        return p_gas;
}

EXPORT float POLY_magnetosonic_speed_MHD(
        Locstate state)
{
        float pr, rho, Gam, gamma, p_gas, B_len_sqr, cf_speed;

        if (is_obstacle_state(state))
            return HUGE_VAL;
        rho = Dens(state);

        Gam = gruneisen_gamma(state);
        gamma = Gam + 1.0;

        B_len_sqr = sqr(Mag(state)[0]) + sqr(Mag(state)[1]) + sqr(Mag(state)[2]);

        switch (state_type(state))
        {

        case    GAS_STATE:
            p_gas = (Gam)*(Energy(state) - 0.5*rho*( sqr(Mom(state)[0]/rho) +
                                                     sqr(Mom(state)[1]/rho) +
                                                     sqr(Mom(state)[2]/rho) )
                            - 0.5*B_len_sqr  );

            cf_speed = sqrt( (gamma*p_gas + B_len_sqr) / rho );
            break;
        default:
            screen("ERROR in POLY_magnetosonic_speed_MHD(), no such state type\n");
            clean_up(ERROR);
        }

        return cf_speed;
}


/// get 7 tris to form reverse stencil for P2 reconstruction.
/// This is for the case where only 5 tris at vertex
LOCAL void p2_reverse_sten_5pt_vertex(
         TRI       *tri,
         int       side,
         TRI       *tris[],
         int       *N)
{
         int       N_edge = 0, tmp_side, n_side, old_tmp_side, side3, side4, side5;
         TRI       *nbtri, *tmp_tri, *old_tmp_tri, *tri1, *tri3, *tri4, *tri5;

         /// tri 0
         N_edge = 0;
         tris[N_edge] = tri;
         N_edge++;

         /// tri 1
         nbtri = Tri_on_side(tri,(side+2)%3);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }
         n_side = (tmp_side+1)%3;
         tri1 = tmp_tri = tris[N_edge] = Tri_on_side(nbtri,n_side);
         N_edge++;

         /// tri 2
         nbtri = Tri_on_side(tri,(side+1)%3);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }
         n_side = (tmp_side+2)%3;
         tris[N_edge] = tmp_tri = Tri_on_side(nbtri,n_side);
         N_edge++;  

         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(nbtri == Tri_on_side(tmp_tri,tmp_side))
                 break;
         }
         n_side = (tmp_side+1)%3;

         //// tri 3
         tri3 = nbtri = Tri_on_side(tmp_tri,(tmp_side+1)%3);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(tmp_tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }
         tris[N_edge] = nbtri;
         N_edge++;
         side3 = (tmp_side+1)%3;

         //// tri 4
         tri4 = tmp_tri = Tri_on_side(nbtri,(tmp_side+2)%3);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(nbtri == Tri_on_side(tmp_tri,tmp_side))
                 break;
         }
         tris[N_edge] = tmp_tri;
         N_edge++;
         side4 = (tmp_side+1)%3;

         //// tri 5
         tri5 = nbtri = Tri_on_side(tmp_tri,(tmp_side+2)%3);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(tmp_tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }
         tris[N_edge] = nbtri;
         N_edge++;
         side5 = (tmp_side+1)%3;

         if(tri5 != tri1)
         {
             //// tri 6
             n_side = (side5+1)%3;
             nbtri = Tri_on_side(tri5,n_side);
             tris[N_edge] = nbtri;
             N_edge++;
         }
         else
         {
             //// tri 6
             nbtri = Tri_on_side(tri4,side4);
             tris[N_edge] = nbtri;
             N_edge++;
         }

         *N = N_edge;
}

/// get 2 tris to form reverse stencil for P1 reconstruction.
/// See Figure P2 Zone center reconstruction, Reverse stencil
LOCAL void get_reverse_sten_P1(
         TRI       *tri,
         int       side,
         TRI       *tris[],
         int       *N)
{
         int       N_edge = 0, tmp_side, n_side, old_tmp_side;
         TRI       *nbtri, *tmp_tri, *old_tmp_tri;

         /// tri 0
         N_edge = 0;
         tris[N_edge] = tri;
         N_edge++;

         /// tri 1
         nbtri = Tri_on_side(tri,(side+2)%3);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }
         n_side = (tmp_side+1)%3;
         tmp_tri = tris[N_edge] = Tri_on_side(nbtri,n_side);
         N_edge++;

         /// tri 2
         nbtri = Tri_on_side(tri,(side+1)%3);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }
         n_side = (tmp_side+2)%3;
         tmp_tri = tris[N_edge] = Tri_on_side(nbtri,n_side);
         N_edge++;

         //////
         *N = N_edge;
}

/// get 7 tris to form reverse stencil for P2 reconstruction.
LOCAL void get_reverse_sten_P2(
         TRI       *tri,
         int       side,
         TRI       *tris[],
	 int       *N)
{
         int       N_edge = 0, tmp_side, n_side, old_tmp_side;
         TRI       *nbtri, *tmp_tri, *old_tmp_tri;

         /// tri 0
         N_edge = 0;
         tris[N_edge] = tri;
         N_edge++;

         /// tri 1
         nbtri = Tri_on_side(tri,(side+2)%3);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }
         n_side = (tmp_side+1)%3;
         tmp_tri = tris[N_edge] = Tri_on_side(nbtri,n_side);
         N_edge++;

         /// tri 2
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(nbtri == Tri_on_side(tmp_tri,tmp_side))
                 break;
         }
         n_side = (tmp_side+1)%3;
         nbtri = tris[N_edge] = Tri_on_side(tmp_tri,n_side);
         N_edge++;

         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(tmp_tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }
         old_tmp_side = tmp_side;
         old_tmp_tri = nbtri;

         /// tri 3
         nbtri = Tri_on_side(tri,(side+1)%3);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }
         n_side = (tmp_side+2)%3;
         tmp_tri = tris[N_edge] = Tri_on_side(nbtri,n_side);
         N_edge++;

         /// tri 4
         n_side = (old_tmp_side+2)%3;
         nbtri = tris[N_edge] = Tri_on_side(old_tmp_tri,n_side);
         N_edge++;

         /// tri 5
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(old_tmp_tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }
         tris[N_edge] = Tri_on_side(nbtri,(tmp_side+1)%3);
         N_edge++;
 
         /// tri 6
         tris[N_edge] = Tri_on_side(nbtri,(tmp_side+2)%3);
         N_edge++;
        
         //////
         *N = N_edge; 
}

/// get 6 tris to form one-sided stencil for P2 reconstruction.
LOCAL void get_one_sided_sten_P2(
         TRI       *tri,
         int       side,
         TRI       *tris[],
	 int       *N)
{
         int       N_edge = 0, tmp_side, n_side, tmp_side_old;
         TRI       *nbtri, *tmp_tri;

         /// tri 0
         N_edge = 0;
         tris[N_edge] = tri;
         N_edge++;

         /// tri 1
         nbtri = Tri_on_side(tri,side);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }

         tris[N_edge] = nbtri;
         N_edge++;
         n_side = (tmp_side+1)%3;
         tmp_side_old = tmp_side;

         /// tri 2
         tris[N_edge] = Tri_on_side(nbtri,n_side); 
         N_edge++;
         
         /// tri 3
         tris[N_edge] = Tri_on_side(nbtri,(n_side+1)%3);
         N_edge++;
        

         /// tri 4.
         tmp_tri = Tri_on_side(nbtri,n_side);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(nbtri == Tri_on_side(tmp_tri,tmp_side))
                 break;
         }
         n_side = (tmp_side+2)%3;
         tris[N_edge] = Tri_on_side(tmp_tri,n_side);
         N_edge++; 
        
         /// tri 5.
         n_side = (tmp_side_old+2)%3;
         tmp_tri = Tri_on_side(nbtri,n_side);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(nbtri == Tri_on_side(tmp_tri,tmp_side))
                 break;
         }
         n_side = (tmp_side+1)%3;
         tris[N_edge] = Tri_on_side(tmp_tri,n_side);
         N_edge++; 

         //////
         *N = N_edge;
}

// Reconstructed coeffs. are stored in limit_store for CV polynomials
LOCAL void Subcell_limiting_2nd_degreeP2(
         TRI       *tri,
         TRI       *nbtri[3],
         Mid_soln  *midsoln,
         Limiting_store **store,
         int       rk_iter)
{
         Locstate st, nbst[20], st2, tmpst;
         int      cv_indx, num_CVs, i, k;
         int      on_SV_side, on_SV_side2, nbcv_indx[20], nbcv_side[20];
         TRI      *tris[20];
         float    *cent, *nbcent[20], diam;
         float    uxave[8], nbuxave[20][8];
         float    uyave[8], nbuyave[20][8];
         // float    rside[3], Ax[20][2][2], Ay[20][2][2];
         // float    c_num_x[20], c_num_y[20]; // condition number of stencils
         double    ***Ax, ***Ay, *c_num_x, *c_num_y;
         float    coefx[20][2], coefy[20][2], rside[3];
         int      is_bad_stenx[20], is_bad_steny[20];
         float    u3, u4, u5, u4_0, u4_1, arrya[20], arryb[20], w[20];
         int      debug = NO;
         float    eps = 0.1, avg1, avg2, avg4, sqrt_area, area, tmp_sqrt_area;

         /***
         if(tri->id == 14)
         {
             debug = YES;
             printf("tri[%d] entered Subcell_limiting_2nd_degreeP2()\n", tri->id);
         }
         ***/

         if(rk_iter == RK_STEP || NULL == midsoln)
         {
             st = tri->st;
             // for(i = 0; i < 3; i++)
             //     nbst[i] = nbtri[i]->st;
         }
         else
         {
             st = midsoln[tri->id].st[rk_iter];
             // for(i = 0; i < 3; i++)
             //     nbst[i] = midsoln[nbtri[i]->id].st[rk_iter];
         }

         st2 = midsoln[tri->id].st[0];
         Set_params(st2,st);
         set_type_of_state(st2,state_type(st));

         cent = fg_centroid(tri);
         diam = fg_diam(tri);
         area = fg_area(tri);
         sqrt_area = sqrt(fg_area(tri));

         /***
         for(i=0; i < MAX_N_COEF; i++)
         {
             dg_B(st2)[0][i] =dg_B(st)[0][i];
             dg_B(st2)[1][i] =dg_B(st)[1][i];
             Mag(st2)[0] = Mag(st)[0];
             Mag(st2)[1] = Mag(st)[1];
         }
         ***/

         // num_CVs = get_tri_near_neighbr_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         num_CVs = get_tri_neighbr_CV_stencil_from_set_HR_sten(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);

         for(i = 0; i < num_CVs; i++)
             nbcent[i] = tris[i]->CVcent[nbcv_indx[i]];

         if(alltri_HR_sten_2[tri->id].HR_sten_set_2nd == NO)
         {
             tri_array(&Ax,num_CVs,2,2,sizeof(double));
             tri_array(&Ay,num_CVs,2,2,sizeof(double));
             vector(&c_num_x,num_CVs,sizeof(double));
             vector(&c_num_y,num_CVs,sizeof(double));
             for(i = 0; i < num_CVs; i++)
             {
                 Ax[i][0][0] = 2.0*(nbcent[i][0]-cent[0])/area;
                 Ax[i][0][1] =     (nbcent[i][1]-cent[1])/area;
                 Ax[i][1][0] = 2.0*(nbcent[(i+1)%num_CVs][0]-cent[0])/area;
                 Ax[i][1][1] =     (nbcent[(i+1)%num_CVs][1]-cent[1])/area;
                 c_num_x[i] = cond_num2(Ax,i); // c_num_x[i] = 1.0;
                 Ay[i][0][0] =     (nbcent[i][0]-cent[0])/area;
                 Ay[i][0][1] = 2.0*(nbcent[i][1]-cent[1])/area;
                 Ay[i][1][0] =     (nbcent[(i+1)%num_CVs][0]-cent[0])/area;
                 Ay[i][1][1] = 2.0*(nbcent[(i+1)%num_CVs][1]-cent[1])/area;
                 c_num_y[i] = cond_num2(Ay,i); // c_num_y[i] = 1.0;
             }
             alltri_HR_sten_2[tri->id].HR_sten_set_2nd = YES;
             alltri_HR_sten_2[tri->id].Ax = Ax;
             alltri_HR_sten_2[tri->id].Ay = Ay;
             alltri_HR_sten_2[tri->id].c_num_x = c_num_x; alltri_HR_sten_2[tri->id].c_num_y = c_num_y;
         }
         else
         {
             Ax = alltri_HR_sten_2[tri->id].Ax;
             Ay = alltri_HR_sten_2[tri->id].Ay;
             c_num_x = alltri_HR_sten_2[tri->id].c_num_x; c_num_y = alltri_HR_sten_2[tri->id].c_num_y;
         }

         ///// TMP
         if(debug == YES)
         {
             char s[256];
             printf("Sten, number of CVs[%d] tri diam %g, \n", num_CVs, diam);
             for(i = 0; i < num_CVs; i++)
             {
                 printf("%d's CV[%d] in tri[%d], cent[%g %g]\n",
                      i, nbcv_indx[i], tris[i]->id, nbcent[i][0], nbcent[i][1]);
                 // print_tri_crds(tris[i]);
             }
             for(i = 0; i < 3; i++)
             {
                 // sprintf(s,"neighbr[%d] state", i);
                 // verbose_print_state(s, nbst[i]);
             }
         }

         u_x_average(tri,st,uxave);
         u_y_average(tri,st,uyave);

         for(k = 0; k < N_EQN; k++)
             uxave[k] /= sqrt_area;
         for(k = 0; k < N_EQN; k++)
             uyave[k] /= sqrt_area;

         for(i = 0; i < num_CVs; i++)
         {
             tmp_sqrt_area = sqrt(fg_area(tris[i]));

             if(rk_iter == RK_STEP)
                 tmpst = tris[i]->st;
             else
                 tmpst = midsoln[tris[i]->id].st[rk_iter];
             CV_u_x_average_indx_MHD(tris[i], nbcv_indx[i], tmpst, nbuxave[i]);
             for(k = 0; k < N_EQN; k++)
                 nbuxave[i][k] /= tmp_sqrt_area;
             CV_u_y_average_indx_MHD(tris[i], nbcv_indx[i], tmpst, nbuyave[i]);
             for(k = 0; k < N_EQN; k++)
                 nbuyave[i][k] /= tmp_sqrt_area;
         }

         /***
         if(rk_iter == RK_STEP)
         {
             for(i = 0; i < num_CVs; i++)
             {
                 CV_u_x_average_indx(tris[i],nbcv_indx[i],nbuxave[i]);
                 CV_u_y_average_indx(tris[i],nbcv_indx[i],nbuyave[i]);
             }
         }
         else
         {
             for(i = 0; i < num_CVs; i++)
             {
                 CV_u_x_average_indx_from_store(tris[i],nbcv_indx[i],store[rk_iter],nbuxave[i]);
                 CV_u_y_average_indx_from_store(tris[i],nbcv_indx[i],store[rk_iter],nbuyave[i]);
                 ////// TMP
                 if(debug == YES)
                 {
                     printf("dens, cv[%d], ux, uy[%g %g]\n", i, nbuxave[i][0], nbuyave[i][0]);
                 }
             }
         }
         ***/

         NEW_extrema_detec(uxave,nbuxave,num_CVs,is_bad_stenx);
         NEW_extrema_detec(uyave,nbuyave,num_CVs,is_bad_steny);

         /***
         for(i = 0; i < num_CVs; i++)
         {
             Ax[i][0][0] = (nbcent[i][0]-cent[0]);
             Ax[i][0][1] = (nbcent[i][1]-cent[1]);
             Ax[i][1][0] = (nbcent[(i+1)%num_CVs][0]-cent[0]);
             Ax[i][1][1] = (nbcent[(i+1)%num_CVs][1]-cent[1]);
             c_num_x[i] = cond_num(Ax[i]); // c_num_x[i] = 1.0;
             Ay[i][0][0] = (nbcent[i][0]-cent[0]);
             Ay[i][0][1] = (nbcent[i][1]-cent[1]);
             Ay[i][1][0] = (nbcent[(i+1)%num_CVs][0]-cent[0]);
             Ay[i][1][1] = (nbcent[(i+1)%num_CVs][1]-cent[1]);
             c_num_y[i] = cond_num(Ay[i]); // c_num_y[i] = 1.0;
         }
         for(i = 0; i < num_CVs; i++)
         {
             Ax[i][0][0] *= 2.0;
             Ax[i][1][0] *= 2.0;
             Ay[i][0][1] *= 2.0;
             Ay[i][1][1] *= 2.0;
             // c_num_x[i] = cond_num(Ax[i]); // c_num_x[i] = 1.0;
             // c_num_y[i] = cond_num(Ay[i]); // c_num_y[i] = 1.0;
         }
         ***/

         for(k = 0; k < N_EQN; k++)
         {
             // if(k == 5 || k == 6) continue; // mag[0] and mag[1] do not need limiting.

             for(i = 0; i < num_CVs; i++)
             {
                 rside[0] = nbuxave[i][k] - uxave[k];
                 rside[1] = nbuxave[(i+1)%num_CVs][k] - uxave[k];
                 // comp_coef(Ax[i],rside,coefx[i]);
                 comp_coef2(Ax,rside,coefx[i],i);

                 rside[0] = nbuyave[i][k] - uyave[k];
                 rside[1] = nbuyave[(i+1)%num_CVs][k] - uyave[k];
                 // comp_coef(Ay[i],rside,coefy[i]);
                 comp_coef2(Ay,rside,coefy[i],i);

                 if(debug == YES && k == 0)
                 {
                     printf("sten[%d], uxy,uyy[%g %g]\n", i,
                          coefy[i][0], coefy[i][1]);
                 }
             }
             ///////////// WENO
             // if(debugging("weno_w"))
             {
                 for(i = 0; i < num_CVs; i++)
                 {
                     arrya[i] = coefx[i][0];
                     arryb[i] = coefx[i][1];
                 }
                 WENO_mod(arrya,arryb,c_num_x,num_CVs,diam,w);
                 u3 = 0.0, u4_0 = 0.0;
                 for(i = 0; i < num_CVs; i++)
                 {
                     u3 += w[i]*coefx[i][0];
                     u4_0 += w[i]*coefx[i][1];
                 }
                 if(is_bad_stenx[k] == YES)
                 {
                     u3 = 0.0; u4_0 = 0.0;
                 }
                 ///////---------------////
                 for(i = 0; i < num_CVs; i++)
                 {
                     arrya[i] = coefy[i][0];
                     arryb[i] = coefy[i][1];
                 }
                 WENO_mod(arrya,arryb,c_num_y,num_CVs,diam,w);
                 if(debug == YES && k == 0)
                 {
                     for(i = 0; i < num_CVs; i++)
                         printf("weight[%d] = %g\n", i, w[i]);
                 }

                 u4_1 = 0.0; u5 = 0.0;
                 for(i = 0; i < num_CVs; i++)
                 {
                     u4_1 += w[i]*coefy[i][0];
                     u5 += w[i]*coefy[i][1];
                 }
                 if(is_bad_steny[k] == YES)
                 {
                     u4_1 = 0.0; u5 = 0.0;
                 }
                 u4 = minmod((1+0.05)*minmod(u4_0,u4_1), 0.5*(u4_0 + u4_1));
                 /////// End WENO
             }
             /***
             else if(debugging("cent_bias"))
             {
                 avg1 = 0.0; u3 = coefx[0][0];
                 avg2 = 0.0; u4_0 = coefx[0][1];
                 for(i = 0; i < num_CVs; i++)
                 {
                     avg1 += coefx[i][0];
                     u3 = minmod(coefx[i][0],u3);
                     avg2 += coefx[i][1];
                     u4_0 = minmod(coefx[i][1],u4_0);
                 }
                 avg1 /= num_CVs;
                 avg2 /= num_CVs;
                 u3 = minmod(((1+eps)*u3), avg1);
                 // u4_0 = minmod(((1+eps)*u4_0), avg2);
                 avg4 = avg2;

                 avg1 = 0.0; u4_1 = coefy[0][0];
                 avg2 = 0.0; u5 = coefy[0][1];
                 for(i = 0; i < num_CVs; i++)
                 {
                     avg1 += coefy[i][0];
                     u4_1 = minmod(coefy[i][0],u4_1);
                     avg2 += coefy[i][1];
                     u5 = minmod(coefy[i][1],u5);
                 }
                 avg1 /= num_CVs;
                 avg2 /= num_CVs;
                 u5 = minmod(((1+eps)*u5), avg2);
                 // u4_1 = minmod(((1+eps)*u4_1), avg1);

                 avg4 = (avg4 + avg1)/2.0;
                 // u4 = minmod((1+0.05)*minmod(u4_0,u4_1), 0.5*(u4_0 + u4_1)); // 0.05, 0.01
                 u4 = minmod((1+eps)*minmod(u4_0,u4_1), avg4); // 0.05, 0.01
             }
             ****/

             switch(k)
             {
             case 0:
                 dg_Dens(st2)[3] = u3;
                 dg_Dens(st2)[4] = u4;
                 dg_Dens(st2)[5] = u5;
                 /**
                 if(isnan(u3) || isnan(u4) || isnan(u5))
                 {
                     printf("ERROR: Subcell_limiting_2nd_degreeP2(),tri[%d] %g %g %g is nan\n", tri->id, u3, u4, u5);
                     clean_up(ERROR);
                 }
                 if(debug == YES)
                 {
                     printf("Reconstructed dens 2nd degree %g %g %g\n", u3, u4, u5);
                 }
                 ***/
             break;
             case 1:
                 dg_Mom(st2)[0][3] = u3;
                 dg_Mom(st2)[0][4] = u4;
                 dg_Mom(st2)[0][5] = u5;
             break;
             case 2:
                 dg_Mom(st2)[1][3] = u3;
                 dg_Mom(st2)[1][4] = u4;
                 dg_Mom(st2)[1][5] = u5;
             break;
             case 3:
                 dg_Mom(st2)[2][3] = u3;
                 dg_Mom(st2)[2][4] = u4;
                 dg_Mom(st2)[2][5] = u5;
             break;
             case 4:
                 dg_Energy(st2)[3] = u3;
                 dg_Energy(st2)[4] = u4;
                 dg_Energy(st2)[5] = u5;
             break;
             case 5:
                 dg_B(st2)[0][3] = u3;
                 dg_B(st2)[0][4] = u4;
                 dg_B(st2)[0][5] = u5;
             break;
             case 6:
                 dg_B(st2)[1][3] = u3;
                 dg_B(st2)[1][4] = u4;
                 dg_B(st2)[1][5] = u5;
             break;
             case 7:
                 dg_B(st2)[2][3] = u3;
                 dg_B(st2)[2][4] = u4;
                 dg_B(st2)[2][5] = u5;
             break;
             default:
                 printf("ERROR: Subcell_limiting_2nd_degreeP2(), implement case %d\n", k);
                 clean_up(ERROR);
             break;
             }
         }
         // re-enforce divergence-free
         if(dg_B(st2)[0][3] >= 0.0)
             dg_B(st2)[0][3] = min(fabs(dg_B(st2)[0][3]), 0.5*fabs(dg_B(st2)[1][4]));
         else
             dg_B(st2)[0][3] = -min(fabs(dg_B(st2)[0][3]), 0.5*fabs(dg_B(st2)[1][4]));

         dg_B(st2)[1][4] = -2.0*dg_B(st2)[0][3];

         if(dg_B(st2)[0][4] >= 0.0)
             dg_B(st2)[0][4] = min(fabs(dg_B(st2)[0][4]), 2.0*fabs(dg_B(st2)[1][5]));
         else
             dg_B(st2)[0][4] = -min(fabs(dg_B(st2)[0][4]), 2.0*fabs(dg_B(st2)[1][5]));

         dg_B(st2)[1][5] = -0.5*dg_B(st2)[0][4];
}

LOCAL void Subcell_limiting_1st_degreeP2_multi_pt_limit(
         TRI       *tri,
         TRI       *nbtri[3],
         Mid_soln  *midsoln,
         Limiting_store **store,
         int       rk_iter)
{
         Locstate st, nbst[3], st2, nbst2[3];
         float    uave[4], nbuave[9][4], nbuave2[30][4], nbuave3[30][4], nbuave4[30][4], nbuaveipt[30][10][4]; 
                                                                 // [cv id][#pts][ state of conservative variable]
         float    Rave[4], nbRave[9][4], nbRave2[30][4], nbRave3[30][4], nbRave4[30][4], nbRaveipt[30][10][4];
         float    Lave[4], nbLave[9][8], newcent[30][MAXD];
         int      dim = 2, indx;
         int      cv_indx, num_CVs, i, j, k, ipt;
         int      on_SV_side, on_SV_side2, nbcv_indx[9], nbcv_side[9];
         TRI      *tris[9];
         float    *cent, *nbcent[9], diam, midpt[3][3];
         float    rside[3], ***A;
         float    coef[9][2];
         float    u0, u1, u2, w[9], arrya[9], arryb[9];
         double   **Lmass_matrix = tri->Lmass_matrix;
         double   *c_num;
         int      is_bad_sten[9], debug,  N_PTS = 4;
         float    avg1, avg2;
         float    eps = 0.1; // 0.005, 0.05
         double   qcrds[MAXD], *pcrds[4], qcrds_cv[30][8][3]; //[cv id][#pts][crds]
         double   len[30][8], dist_sum;

         /**
         if(tri->id == 1153)
         {
             debug = YES;
             printf("tri[%d] entered Subcell_limiting_1st_degreeP2()\n", tri->id);
         }
         **/

         if(rk_iter == RK_STEP)
         {
             st = tri->st;
             for(i = 0; i < 3; i++)
                 nbst[i] = nbtri[i]->st;
         }
         else
         {
             st = midsoln[tri->id].st[rk_iter];
             for(i = 0; i < 3; i++)
                 nbst[i] = midsoln[nbtri[i]->id].st[rk_iter];
         }
         st2 = midsoln[tri->id].st[0];
         diam = fg_diam(tri);
         cent = fg_centroid(tri);

         num_CVs = get_tri_near_neighbr_CV_stencil(tri,nbtri,0,nbcv_indx,nbcv_side,tris,&num_CVs);
         for(i = 0; i < num_CVs; i++)
             nbcent[i] = tris[i]->CVcent[nbcv_indx[i]];

         /// find crds of points where we evaulate function values.
         for(i = 0; i < num_CVs; i++)
         {
             for(j = 0; j < 3; j++)
                 pcrds[j] = Coords( Point_of_tri(tris[i])[j] );
             for(j = 0; j < 3; j++)
             {
                 for(k = 0; k < dim; k++)
                     midpt[j][k] = (pcrds[(j+1)%3][k] + pcrds[j][k])*0.5;
             }

             /// use vertex ---- 1
             if(nbcv_side[i] == nbcv_indx[i])
             {
                 for(j = 0; j < dim; j++)
                     qcrds_cv[i][0][j] = pcrds[nbcv_side[i]][j];
                 // len[i][0] = 1.0/(sqr(qcrds_cv[i][0][0] - cent[0]) + sqr(qcrds_cv[i][0][1] - cent[1]));
             }
             else
             {
                 for(j = 0; j < dim; j++)
                    qcrds_cv[i][0][j] = pcrds[(nbcv_side[i]+1)%3][j];
                 // len[i][0] = 1.0/(sqr(qcrds_cv[i][0][0] - cent[0]) + sqr(qcrds_cv[i][0][1] - cent[1]));
             }

             /// use mid point on adjacent edge midpt[nbcv_side[i]] ---- 2
             for(j = 0; j < dim; j++)
                 qcrds_cv[i][1][j] = midpt[nbcv_side[i]][j];
             // len[i][1] = 1.0/(sqr(qcrds_cv[i][1][0] - cent[0]) + sqr(qcrds_cv[i][1][1] - cent[1]));

             /// use mid point on the opposite side   ---- 3
             if(nbcv_side[i] == nbcv_indx[i])
             {
                  //// midpt[(nbcv_side[i]+2)%3];
                 for(j = 0; j < dim; j++)
                     qcrds_cv[i][2][j] = midpt[(nbcv_side[i]+2)%3][j];
                 // len[i][2] = 1.0/(sqr(qcrds_cv[i][2][0] - cent[0]) + sqr(qcrds_cv[i][2][1] - cent[1]));
             }
             else
             {
                  //// midpt[(nbcv_side[i]+1)%3];
                 for(j = 0; j < dim; j++)
                     qcrds_cv[i][2][j] = midpt[(nbcv_side[i]+1)%3][j];
                 // len[i][2] = 1.0/(sqr(qcrds_cv[i][2][0] - cent[0]) + sqr(qcrds_cv[i][2][1] - cent[1]));
             }

             /// use CV centroid ---- 4
             for(j = 0; j < dim; j++)
                 qcrds_cv[i][3][j] = nbcent[i][j];
             len[i][3] = 1.0/(sqr(qcrds_cv[i][3][0] - cent[0]) + sqr(qcrds_cv[i][3][1] - cent[1]));
             //// use mid point of partial cell on adjacent edge 
             for(j = 0; j < dim; j++)
                 qcrds_cv[i][4][j] = 0.5*(qcrds_cv[i][1][j] + qcrds_cv[i][0][j]);
         }

         /***
         for(i = 0; i < num_CVs; i++)
         {
            
             for(j = 0; j <  N_PTS; j++)
                 len[i][j] = 1.0/(sqr(qcrds_cv[i][j][0] - cent[0]) + sqr(qcrds_cv[i][j][1] - cent[1]));
             dist_sum = 0.0; 
             for(j = 0; j <  N_PTS; j++)
                 dist_sum += len[i][j];
             for(j = 0; j <  N_PTS; j++)
                 len[i][j] /= dist_sum; 
         }
         ***/

         //// 05172010, point-wise HR
         if(alltri_HR_sten_2[tri->id].HR_sten_set_1st_pt == NO)
         {
             tri_array(&A,num_CVs,2,2,sizeof(double));
             vector(&c_num,num_CVs,sizeof(double));

             for(i = 0; i < num_CVs; i++)
             {
                 newcent[i][0] = newcent[i][1] = 0.0;
                 for(ipt = 0; ipt < N_PTS; ipt++)
                 {
                     newcent[i][0] += qcrds_cv[i][ipt][0];
                     newcent[i][1] += qcrds_cv[i][ipt][1];
                 }
                 newcent[i][0] /= N_PTS;
                 newcent[i][1] /= N_PTS;
             }
             /***
             for(i = 0; i < num_CVs; i++)
             {
                 A[i][0][0] = (nbcent[i][0]-cent[0]);
                 A[i][0][1] = (nbcent[i][1]-cent[1]);
                 A[i][1][0] = (nbcent[(i+1)%num_CVs][0]-cent[0]);
                 A[i][1][1] = (nbcent[(i+1)%num_CVs][1]-cent[1]);

                 c_num[i] = cond_num2(A,i); // c_num[i] =1.0;
             }
             ***/
             for(i = 0; i < num_CVs; i++)
             {
                 A[i][0][0] = (newcent[i][0]-cent[0]);
                 A[i][0][1] = (newcent[i][1]-cent[1]);
                 A[i][1][0] = (newcent[(i+1)%num_CVs][0]-cent[0]);
                 A[i][1][1] = (newcent[(i+1)%num_CVs][1]-cent[1]);

                 c_num[i] = cond_num2(A,i); // c_num[i] =1.0;
             }

             alltri_HR_sten_2[tri->id].HR_sten_set_1st_pt = YES;
             alltri_HR_sten_2[tri->id].c_num_pt = c_num;
             alltri_HR_sten_2[tri->id].A_pt = A;
         }
         else
         {
             A = alltri_HR_sten_2[tri->id].A_pt;
             c_num = alltri_HR_sten_2[tri->id].c_num_pt;
         }

         uave[0] = Dens(st);
         uave[1] = Mom(st)[0];
         uave[2] = Mom(st)[1];
         uave[3] = Energy(st);

         R_degree2_term_average(tri,st2,Rave);

         if(rk_iter == RK_STEP)
         {
             for(i = 0; i < num_CVs; i++)
             {
                 /**
                 //// avg_st_on_cv_ver2(tris[i], nbcv_indx[i], NULL, nbuave[i]);
                 //// vertex 1
                 con_u_at_CV_pt(tris[i], nbcv_indx[i], qcrds_cv[i][0], tris[i]->CVcent[nbcv_indx[i]], nbuave[i]);
                 //// vertex 2
                 con_u_at_CV_pt(tris[i], nbcv_indx[i], qcrds_cv[i][1], tris[i]->CVcent[nbcv_indx[i]], nbuave2[i]);
                 //// vertex 3
                 con_u_at_CV_pt(tris[i], nbcv_indx[i], qcrds_cv[i][2], tris[i]->CVcent[nbcv_indx[i]], nbuave3[i]);
                 //// vertex 4, 05172010
                 con_u_at_CV_pt(tris[i], nbcv_indx[i], qcrds_cv[i][3], tris[i]->CVcent[nbcv_indx[i]], nbuave4[i]);
                 **/
                 for(ipt = 0; ipt < N_PTS; ipt++)
                     con_u_at_CV_pt(tris[i], nbcv_indx[i], qcrds_cv[i][ipt], tris[i]->CVcent[nbcv_indx[i]], nbuaveipt[i][ipt]);
             }
         }
         else
         {
             for(i = 0; i < num_CVs; i++)
             {
                 /****
                 //// avg_st_on_cv_from_store(tris[i], nbcv_indx[i], NULL, store[rk_iter], nbuave[i]);
                 con_u_at_CV_pt_from_store(tris[i], nbcv_indx[i], qcrds_cv[i][0], tris[i]->CVcent[nbcv_indx[i]],
                         store[rk_iter], nbuave[i]);
                 //// vertex 2
                 con_u_at_CV_pt_from_store(tris[i], nbcv_indx[i], qcrds_cv[i][1], tris[i]->CVcent[nbcv_indx[i]],
                         store[rk_iter], nbuave2[i]);
                 //// vertex 3
                 con_u_at_CV_pt_from_store(tris[i], nbcv_indx[i], qcrds_cv[i][2], tris[i]->CVcent[nbcv_indx[i]],
                         store[rk_iter], nbuave3[i]);
                 //// vertex 4, 05172010 
                 con_u_at_CV_pt_from_store(tris[i], nbcv_indx[i], qcrds_cv[i][3], tris[i]->CVcent[nbcv_indx[i]],
                         store[rk_iter], nbuave4[i]);
                 ****/ 
                 for(ipt = 0; ipt < N_PTS; ipt++)
                     con_u_at_CV_pt_from_store(tris[i], nbcv_indx[i], qcrds_cv[i][ipt], tris[i]->CVcent[nbcv_indx[i]],
                         store[rk_iter], nbuaveipt[i][ipt]);
             }
         }
         for(i = 0; i < num_CVs; i++)
         {
             /****
             comp_CV_mass_matrix_1st_row(MAX_N_COEF,tris[i],dim,nbcv_indx[i],
                              cent,mass_1st_row);
             CV_R_degree2_term_average(tris[i], cv_indx, st2,
                         mass_1st_row, nbRave[i]);
             ****/
             /***
             //// vertex 1
             R_degree2_above_term_pt(tris[i], st2, NULL, qcrds_cv[i][0], cent, nbRave[i]);
             //// vertex 2
             R_degree2_above_term_pt(tris[i], st2, NULL, qcrds_cv[i][1], cent, nbRave2[i]);
             //// vertex 3
             R_degree2_above_term_pt(tris[i], st2, NULL, qcrds_cv[i][2], cent, nbRave3[i]);
             //// vertex 4
             R_degree2_above_term_pt(tris[i], st2, NULL, qcrds_cv[i][3], cent, nbRave4[i]);
             ****/
             for(ipt = 0; ipt < N_PTS; ipt++)
                 R_degree2_above_term_pt(tris[i], st2, NULL, qcrds_cv[i][ipt], cent, nbRaveipt[i][ipt]);
         }
         for(k = 0; k < N_EQN; k++)
         {
             Lave[k] = uave[k]-Rave[k];
             for(i = 0; i < num_CVs; i++)
             {
                 // nbLave[i][k] = nbuave[i][k]-nbRave[i][k];
                 //// use 3 point,
                 //// nbLave[i][k] = ((nbuave[i][k]-nbRave[i][k]) + (nbuave2[i][k]-nbRave2[i][k]) + (nbuave3[i][k]-nbRave3[i][k]))/3.0;
                 //// use 4 points, 05172010
                 /**
                 nbLave[i][k] = ((nbuave[i][k]-nbRave[i][k]) + (nbuave2[i][k]-nbRave2[i][k]) +
                                 (nbuave3[i][k]-nbRave3[i][k]) + (nbuave4[i][k]-nbRave4[i][k]))/4.0;
                 **/
                 nbLave[i][k] = 0.0;
                 for(ipt = 0; ipt < N_PTS; ipt++)
                     nbLave[i][k] += (nbuaveipt[i][ipt][k] - nbRaveipt[i][ipt][k]);
                 nbLave[i][k] /= N_PTS;
             }
         }
         NEW_extrema_detec(Lave,nbLave,num_CVs,is_bad_sten);

         /***
         for(i = 0; i < num_CVs; i++)
         {
             A[i][0][0] = (nbcent[i][0]-cent[0]);
             A[i][0][1] = (nbcent[i][1]-cent[1]);
             A[i][1][0] = (nbcent[(i+1)%num_CVs][0]-cent[0]);
             A[i][1][1] = (nbcent[(i+1)%num_CVs][1]-cent[1]);
             c_num[i] = cond_num(A[i]); // c_num[i] =1.0;
         }
         ***/

         for(k = 0; k < N_EQN; k++)
         {
             // linear part of polynomial
             // tri, nb0, nb1; // tri, nb1, nb2; // tri, nb2, nb0
             for(i = 0; i < num_CVs; i++)
             {
                 rside[0] = nbLave[i][k] - Lave[k];
                 rside[1] = nbLave[(i+1)%num_CVs][k] - Lave[k];
                 // comp_coef(A[i],rside,coef[i]);
                 comp_coef2(A,rside,coef[i],i);
             }
             ///////////// WENO
             if(debugging("weno_w"))
             {
                 for(i = 0; i < num_CVs; i++)
                 {
                     arrya[i] = coef[i][0];
                     arryb[i] = coef[i][1];
                 }
                 WENO_mod_1(arrya, arryb, c_num, num_CVs, w);
                 u1 = u2 = 0.0;
                 for(i = 0; i < num_CVs; i++)
                 {
                     u1 += w[i]*coef[i][0];
                     u2 += w[i]*coef[i][1];
                 }
                 // if(is_bad_sten[k] == YES)
                 // {
                 //     u1 = u2 = 0.0;
                 // }
                 u0 = Lave[k];
                 /////////////////////// END WENO
             }
             else if(debugging("cent_bias"))
             {
                 avg1 = 0.0; u1 = coef[0][0];
                 avg2 = 0.0; u2 = coef[0][1];
                 for(i = 0; i < num_CVs; i++)
                 {
                     avg1 += coef[i][0];
                     u1 = minmod(coef[i][0],u1);
                     avg2 += coef[i][1];
                     u2 = minmod(coef[i][1],u2);
                 }
                 avg1 /= num_CVs;
                 avg2 /= num_CVs;
                 u1 = minmod(((1+eps)*u1), avg1);
                 u2 = minmod(((1+eps)*u2), avg2);
                 u0 = Lave[k];
             }

             switch(k)
             {
             case 0:
                 dg_Dens(st2)[0] = u0;
                 dg_Dens(st2)[1] = u1;
                 dg_Dens(st2)[2] = u2;
             break;
             case 1:
                 dg_Mom(st2)[0][0] = u0;
                 dg_Mom(st2)[0][1] = u1;
                 dg_Mom(st2)[0][2] = u2;
             break;
             case 2:
                 dg_Mom(st2)[1][0] = u0;
                 dg_Mom(st2)[1][1] = u1;
                 dg_Mom(st2)[1][2] = u2;
             break;
             case 3:
                 dg_Energy(st2)[0] = u0;
                 dg_Energy(st2)[1] = u1;
                 dg_Energy(st2)[2] = u2;
             break;
             }
         }

         Dens(st2) = Dens(st);
         Mom(st2)[0] = Mom(st)[0];
         Mom(st2)[1] = Mom(st)[1];
         Energy(st2) = Energy(st);

         /**
         if(debug == YES)
         {
             printf("print tri[%d] reconstructed state\n", tri->id);
             verbose_print_state("Recon St", st2);
         }
         **/

         if(N_EQN == 4 && YES == unphysical_st_at_quadrature(tri, st2))
             tri->redo_limiting = YES;
}

LOCAL void CV_u_x_average_indx_MHD(
         TRI        *tri,
         int        indx,
         Locstate   st,
         float      *uxave)
{
        int i;
        double  **mass = tri->CVmass_matrix;

         uxave[0] = dg_Dens(st)[1]*mass[indx][0] + 2.0*dg_Dens(st)[3]*mass[indx][1] 
                  + dg_Dens(st)[4]*mass[indx][2];
         uxave[4] = dg_Energy(st)[1]*mass[indx][0] + 2.0*dg_Energy(st)[3]*mass[indx][1] 
                  + dg_Energy(st)[4]*mass[indx][2];

         for(i = 0; i < 3; i++)
             uxave[i+1] = dg_Mom(st)[i][1]*mass[indx][0] + 2.0*dg_Mom(st)[i][3]*mass[indx][1] 
                        + dg_Mom(st)[i][4]*mass[indx][2];

         for(i = 0; i < 3; i++)
             uxave[i+5] = dg_B(st)[i][1]*mass[indx][0] + 2.0*dg_B(st)[i][3]*mass[indx][1] 
                        + dg_B(st)[i][4]*mass[indx][2];

        for(i = 0; i < N_EQN; i++)
            uxave[i] /= mass[indx][0];
         /*
         uxave[0] = tri->cv_soln->cv_dg_rho[cv_indx][1];
         uxave[1] = tri->cv_soln->cv_dg_m[0][cv_indx][1];
         uxave[2] = tri->cv_soln->cv_dg_m[1][cv_indx][1];
         uxave[3] = tri->cv_soln->cv_dg_e[cv_indx][1];
         */
}

LOCAL void CV_u_y_average_indx_MHD(
         TRI       *tri,
         int       indx,
         Locstate  st,
         float     *uyave)
{
        int i;
        double  **mass = tri->CVmass_matrix;

         uyave[0] = dg_Dens(st)[2]*mass[indx][0] + dg_Dens(st)[4]*mass[indx][1] 
                  + 2.0*dg_Dens(st)[5]*mass[indx][2];
         uyave[4] = dg_Energy(st)[2]*mass[indx][0] + dg_Energy(st)[4]*mass[indx][1] 
                  + 2.0*dg_Energy(st)[5]*mass[indx][2];

         for(i = 0; i < 3; i++)
             uyave[i+1] = dg_Mom(st)[i][2]*mass[indx][0] + dg_Mom(st)[i][4]*mass[indx][1] 
                        + 2.0*dg_Mom(st)[i][5]*mass[indx][2];
         for(i = 0; i < 3; i++)
             uyave[i+5] = dg_B(st)[i][2]*mass[indx][0] + dg_B(st)[i][4]*mass[indx][1] 
                        + 2.0*dg_B(st)[i][5]*mass[indx][2];

         for(i = 0; i < N_EQN; i++)
             uyave[i] /= mass[indx][0];
         /*
         uyave[0] = tri->cv_soln->cv_dg_rho[cv_indx][2];
         uyave[1] = tri->cv_soln->cv_dg_m[0][cv_indx][2];
         uyave[2] = tri->cv_soln->cv_dg_m[1][cv_indx][2];
         uyave[3] = tri->cv_soln->cv_dg_e[cv_indx][2];
         */
}

LOCAL void CV_u_x_average_indx(
         TRI       *tri,
         int       cv_indx,
         float     *uxave)
{
         uxave[0] = tri->cv_soln->cv_dg_rho[cv_indx][1];
         uxave[1] = tri->cv_soln->cv_dg_m[0][cv_indx][1];
         uxave[2] = tri->cv_soln->cv_dg_m[1][cv_indx][1];
         uxave[3] = tri->cv_soln->cv_dg_e[cv_indx][1];
}

LOCAL void CV_u_y_average_indx(
         TRI       *tri,
         int       cv_indx,
         float     *uyave)
{
         uyave[0] = tri->cv_soln->cv_dg_rho[cv_indx][2];
         uyave[1] = tri->cv_soln->cv_dg_m[0][cv_indx][2];
         uyave[2] = tri->cv_soln->cv_dg_m[1][cv_indx][2];
         uyave[3] = tri->cv_soln->cv_dg_e[cv_indx][2];
}

LOCAL void CV_u_x_average_indx_from_store(
         TRI       *tri,
         int       cv_indx,
         Limiting_store *store,
         float     *uxave)
{
         uxave[0] = store[tri->id].cv_dg_rho[cv_indx][1];
         uxave[1] = store[tri->id].cv_dg_m[0][cv_indx][1];
         uxave[2] = store[tri->id].cv_dg_m[1][cv_indx][1];
         uxave[3] = store[tri->id].cv_dg_e[cv_indx][1];
}

LOCAL void CV_u_y_average_indx_from_store(
         TRI       *tri,
         int       cv_indx,
         Limiting_store *store,
         float     *uyave)
{
         uyave[0] = store[tri->id].cv_dg_rho[cv_indx][2];
         uyave[1] = store[tri->id].cv_dg_m[0][cv_indx][2];
         uyave[2] = store[tri->id].cv_dg_m[1][cv_indx][2];
         uyave[3] = store[tri->id].cv_dg_e[cv_indx][2];
}

// 2nd degree above terms of polynomial at point 
LIB_LOCAL void R_degree2_above_term_pt(
         TRI       *tri,
         Locstate  st,
         double    **Lmass_matrix,
         float     *crds,
         float     *cent,
         float     *con_u)
{        
         float      area, val;
         int        i;
             
         for(i = 0; i < N_EQN; i++)
             con_u[i] = 0.0;
         for(i = 3; i < MAX_N_COEF; i++)
         {
             val = vh_val(crds,cent,i);
             con_u[0] += dg_Dens(st)[i]*val;
         }   
         if(N_EQN == 1) return;
         
         for(i = 3; i < MAX_N_COEF; i++)
         {
             val = vh_val(crds,cent,i);
             con_u[1] += dg_Mom(st)[0][i]*val;
             con_u[2] += dg_Mom(st)[1][i]*val;
             con_u[3] += dg_Energy(st)[i]*val;
         }
}

// Reconstructed coeffs. are stored in RK_STEP[0]
EXPORT void limiting_2nd_degree(
     TRI       *tri,            
         TRI       *nbtri[],    
         int       tri_n,       
         Mid_soln  *midsoln,    
         int       rk_iter)     
{                               
         Locstate st, nbst[20], st2;
         float    uxave[8], nbuxave[20][8]; 
         float    uyave[8], nbuyave[20][8]; 
         int      i, dim = 2, k;
         double   *cent, *nbcent[20];
         float    rside[3], rside2[3], least_soln1[3], least_soln2[3];
         float    coefx[20][2], coefy[20][2];
         float    u3, u4, u5, u4_0, u4_1, avg3, avg4, avg5, arrya[20], arryb[20], w[20];
         // double   **Lmass_matrix = tri->Lmass_matrix;
         float    eps = 0.0;
         float    dirx[3], diry[3], theta[3]; // dirx, diry: cos of the angle
         float    ud[3][2]; // soln uxx, uxy, uyy computed using one line 
         int      idirx, idiry;
         static double **mat = NULL, **mat_tran, **AA, **inv;
         float    diam, sqr_diam;
         int      is_bad_stenx[20], is_bad_steny[20];
         float    mid[3][2], Ax_edge[3][2], Ay_edge[3][2];
         double   ***Ax = NULL, ***Ay, *c_num_x, *c_num_y, sqrt_area, area, tmp_sqrt_area;

         // printf("ERROR: limiting_2nd_degree(), revise for MHD\n");
         // clean_up(ERROR);

         if(mat == NULL)
         {
             matrix(&(AA), 3, 3, sizeof(double));
             matrix(&(inv), 3, 3, sizeof(double));
             matrix(&(mat_tran), 3, 3, sizeof(double));
             matrix(&(mat), 3, 3, sizeof(double));

             // tri_array(&Ax,20,2,2,sizeof(double));
             // tri_array(&Ay,20,2,2,sizeof(double));
             // vector(&c_num_x,20,sizeof(double));
             // vector(&c_num_y,20,sizeof(double));
         }

         if(NULL == midsoln)
         {
             st = tri->st;
             for(i = 0; i < tri_n; i++)
                 nbst[i] = nbtri[i]->st;
         }
         else
         {
             if(rk_iter == RK_STEP)
             {
                 st = tri->st;
                 for(i = 0; i < tri_n; i++)
                     nbst[i] = nbtri[i]->st;
             }
             else
             {
                 st = midsoln[tri->id].st[rk_iter];
                 for(i = 0; i < tri_n; i++)
                     nbst[i] = midsoln[nbtri[i]->id].st[rk_iter];
             }
         }

         st2 = midsoln[tri->id].st[0];
         Set_params(st2,st);
         set_type_of_state(st2,state_type(st));

         // TMP, no need, for debugging only
         // assign(st2, st, sizest);
         // END TMP

         diam = fg_diam(tri);
         sqr_diam = sqr(diam);
         area = fg_area(tri);
         sqrt_area = sqrt(fg_area(tri));

         cent = fg_centroid(tri);
         for(i = 0; i < tri_n; i++)
             nbcent[i] = fg_centroid(nbtri[i]);

         /****
         for(i=0; i < MAX_N_COEF; i++)
         {
             dg_B(st2)[0][i] =dg_B(st)[0][i];
             dg_B(st2)[1][i] =dg_B(st)[1][i];
             Mag(st2)[0] = Mag(st)[0];
             Mag(st2)[1] = Mag(st)[1];
         }
         ****/

         /*
         for(i = 0; i < 3; i++)
         {
             mid[i][0] = (Coords(Point_of_tri(tri)[i])[0] + Coords(Point_of_tri(tri)[(i+1)%3])[0])*0.5;
             mid[i][1] = (Coords(Point_of_tri(tri)[i])[1] + Coords(Point_of_tri(tri)[(i+1)%3])[1])*0.5;
             Ax_edge[i][0] = 2.0*(mid[i][0] - cent[0]);
             Ax_edge[i][1] = mid[i][1] - cent[1];
             Ay_edge[i][0] = (mid[i][0] - cent[0]);
             Ay_edge[i][1] = 2.0*(mid[i][1] - cent[1]);
         }
         */

         u_x_average(tri,st,uxave);
         u_y_average(tri,st,uyave);

         for(k = 0; k < N_EQN; k++)
             uxave[k] /= sqrt_area; 
         for(k = 0; k < N_EQN; k++)
             uyave[k] /= sqrt_area; 

         for(i = 0; i < tri_n; i++)
         {
             tmp_sqrt_area = sqrt(fg_area(nbtri[i]));

             u_x_average(nbtri[i],nbst[i],nbuxave[i]);
             for(k = 0; k < N_EQN; k++)
                 nbuxave[i][k] /= tmp_sqrt_area; 
             u_y_average(nbtri[i],nbst[i],nbuyave[i]);
             for(k = 0; k < N_EQN; k++)
                 nbuyave[i][k] /= tmp_sqrt_area; 
         }

         // Shu_V_smooth_dect(tri,midsoln,rk_iter,uxave,nbuxave,is_bad_stenx);
         // Shu_V_smooth_dect(tri,midsoln,rk_iter,uyave,nbuyave,is_bad_steny);
         // extrema_detec(uxave,nbuxave,is_bad_stenx);
         // extrema_detec(uyave,nbuyave,is_bad_steny);
         NEW_extrema_detec(uxave,nbuxave,tri_n,is_bad_stenx);
         NEW_extrema_detec(uyave,nbuyave,tri_n,is_bad_steny);
         /**
         for(k = 0; k < N_EQN; k++)
         {
             is_bad_stenx[k] = is_bad_steny[k] = NO;
         }
         **/

         if(alltri_HR_sten_2[tri->id].HR_sten_set_2nd == NO)
         {
             tri_array(&Ax,tri_n,2,2,sizeof(double));
             tri_array(&Ay,tri_n,2,2,sizeof(double));
             vector(&c_num_x,tri_n,sizeof(double));
             vector(&c_num_y,tri_n,sizeof(double));

             for(i = 0; i < tri_n; i++)
             {
                 Ax[i][0][0] = 2.0*(nbcent[i][0]-cent[0])/area;
                 Ax[i][0][1] = (nbcent[i][1]-cent[1])/area;
                 Ax[i][1][0] = 2.0*(nbcent[(i+1)%tri_n][0]-cent[0])/area;
                 Ax[i][1][1] = (nbcent[(i+1)%tri_n][1]-cent[1])/area;
                 c_num_x[i] = cond_num2(Ax,i); // c_num_x[i] = 1.0;
                 Ay[i][0][0] = (nbcent[i][0]-cent[0])/area;
                 Ay[i][0][1] = 2.0*(nbcent[i][1]-cent[1])/area;
                 Ay[i][1][0] = (nbcent[(i+1)%tri_n][0]-cent[0])/area;
                 Ay[i][1][1] = 2.0*(nbcent[(i+1)%tri_n][1]-cent[1])/area;
                 c_num_y[i] = cond_num2(Ay,i); // c_num_y[i] = 1.0;
             }
             alltri_HR_sten_2[tri->id].HR_sten_set_2nd = YES;
             alltri_HR_sten_2[tri->id].Ax = Ax;
             alltri_HR_sten_2[tri->id].Ay = Ay;
             alltri_HR_sten_2[tri->id].c_num_x = c_num_x;
             alltri_HR_sten_2[tri->id].c_num_y = c_num_y;
         }
         else
         {
             Ax = alltri_HR_sten_2[tri->id].Ax;
             Ay = alltri_HR_sten_2[tri->id].Ay;
             c_num_x = alltri_HR_sten_2[tri->id].c_num_x;
             c_num_y = alltri_HR_sten_2[tri->id].c_num_y;
         }

         for(k = 0; k < N_EQN; k++)
         {
             // if(k == 5 || k == 6) continue; // mag[0] and mag[1] do not need limiting.
             // d_x u polynomial
             // d_y u polynomial
             // tri, nb0, nb1 // tri, nb1, nb2 // tri, nb2, nb0
             for(i = 0; i < tri_n; i++)
             {
                 rside[0] = nbuxave[i][k] - uxave[k];
                 rside[1] = nbuxave[(i+1)%tri_n][k] - uxave[k];
                 // comp_coef(Ax[i],rside,coefx[i]);
                 comp_coef2(Ax,rside,coefx[i],i);

                 rside[0] = nbuyave[i][k] - uyave[k];
                 rside[1] = nbuyave[(i+1)%tri_n][k] - uyave[k];
                 // comp_coef(Ay[i],rside,coefy[i]);
                 comp_coef2(Ay,rside,coefy[i],i);
             }

             ///////////// WENO
             for(i = 0; i < tri_n; i++)
             {
                 arrya[i] = coefx[i][0];
                 arryb[i] = coefx[i][1];
             }

             /****
             printf("111 after first tri_n=%d\n",tri_n);
             for(i = 0; i < tri_n; i++)
             {
                 printf("coefx[%d][0]=%g\n",i,coefx[i][0]);
                 printf("coefx[%d][1]=%g\n",i,coefx[i][1]);
             }
             printf("111 before WENO tri_n=%d\n",tri_n);
             ****/
             WENO_mod(arrya,arryb,c_num_x,tri_n,diam,w);

             /****
             printf("222 after WENO tri_n=%d\n",tri_n);
             for(i = 0; i < tri_n; i++)
             {
                 printf("coefx[%d][0]=%g\n",i,coefx[i][0]);
                 printf("coefx[%d][1]=%g\n",i,coefx[i][1]);
             }
             ****/


             u3 = 0.0, u4_0 = 0.0;
             for(i = 0; i < tri_n; i++)
             {
                 u3 += w[i]*coefx[i][0];
                 u4_0 += w[i]*coefx[i][1];
             }

             // WENO_mod_cand1_P2(arrya,c_num_x,3,sqr_diam,w);
             // u3 = w[0]*coefx[0][0] + w[1]*coefx[1][0] + w[2]*coefx[2][0];
             // WENO_mod_cand1_P2(arryb,c_num_x,3,sqr_diam,w);
             // u4_0 = w[0]*coefx[0][1] + w[1]*coefx[1][1] + w[2]*coefx[2][1];

             if(is_bad_stenx[k] == YES)
             {
                 u3 = 0.0; u4_0 = 0.0;
             }

             for(i = 0; i < tri_n; i++)
             {
                 arrya[i] = coefy[i][0];
                 arryb[i] = coefy[i][1];
             }

             WENO_mod(arrya,arryb,c_num_y,tri_n,diam,w);
             // WENO_mod_cand1_P2(arrya,c_num_y,3,sqr_diam,w);
             // u4_1 = w[0]*coefy[0][0] + w[1]*coefy[1][0] + w[2]*coefy[2][0];
             // WENO_mod_cand1_P2(arryb,c_num_y,3,sqr_diam,w);
             // u5 =   w[0]*coefy[0][1] + w[1]*coefy[1][1] + w[2]*coefy[2][1];
             u4_1 = 0.0; u5 = 0.0;
             for(i = 0; i < tri_n; i++)
             {
                 u4_1 += w[i]*coefy[i][0];
                 u5 += w[i]*coefy[i][1];
             }

             if(is_bad_steny[k] == YES)
             {
                 u4_1 = 0.0; u5 = 0.0;
             }

             u4 = minmod((1+0.05)*minmod(u4_0,u4_1), 0.5*(u4_0 + u4_1));
             /////// End WENO

             /////// limit by edge center values
             /**
             limit_by_edge_cent_val(coefx,Ax_edge,rside);
             u3 = rside[0]; u4_0 = rside[1];
             limit_by_edge_cent_val(coefy,Ay_edge,rside);
             u4_1 = rside[0]; u5 = rside[1];
             u4 = minmod2((1+0.01)*minmod2(u4_0,u4_1), 0.5*(u4_0 + u4_1));
             **/
             /////// END of limit by edge center values

             switch(k)
             {
             case 0:
                 dg_Dens(st2)[3] = u3;
                 dg_Dens(st2)[4] = u4;
                 dg_Dens(st2)[5] = u5;
             break;
             case 1:
                 dg_Mom(st2)[0][3] = u3;
                 dg_Mom(st2)[0][4] = u4;
                 dg_Mom(st2)[0][5] = u5;
             break;
             case 2:
                 dg_Mom(st2)[1][3] = u3;
                 dg_Mom(st2)[1][4] = u4;
                 dg_Mom(st2)[1][5] = u5;
             break;
             case 3:
                 dg_Mom(st2)[2][3] = u3;
                 dg_Mom(st2)[2][4] = u4;
                 dg_Mom(st2)[2][5] = u5;
             break;
             case 4:
                 dg_Energy(st2)[3] = u3;
                 dg_Energy(st2)[4] = u4;
                 dg_Energy(st2)[5] = u5;
             break;
             case 5:
                 dg_B(st2)[0][3] = u3;
                 dg_B(st2)[0][4] = u4;
                 dg_B(st2)[0][5] = u5;
             break;
             case 6:
                 dg_B(st2)[1][3] = u3;
                 dg_B(st2)[1][4] = u4;
                 dg_B(st2)[1][5] = u5;
             break;
             case 7:
                 dg_B(st2)[2][3] = u3;
                 dg_B(st2)[2][4] = u4;
                 dg_B(st2)[2][5] = u5;
             break;
             }
         }

         // re-enforce divergence-free
         if(dg_B(st2)[0][3] >= 0.0)
             dg_B(st2)[0][3] = min(fabs(dg_B(st2)[0][3]), 0.5*fabs(dg_B(st2)[1][4]));
         else
             dg_B(st2)[0][3] = -min(fabs(dg_B(st2)[0][3]), 0.5*fabs(dg_B(st2)[1][4]));

         dg_B(st2)[1][4] = -2.0*dg_B(st2)[0][3];

         if(dg_B(st2)[0][4] >= 0.0)
             dg_B(st2)[0][4] = min(fabs(dg_B(st2)[0][4]), 2.0*fabs(dg_B(st2)[1][5]));
         else
             dg_B(st2)[0][4] = -min(fabs(dg_B(st2)[0][4]), 2.0*fabs(dg_B(st2)[1][5]));

         dg_B(st2)[1][5] = -0.5*dg_B(st2)[0][4];
         // printf("2nd degree 2.0Bx_3 = %13.12g, By_4 = %13.12g, Bx_4 = %13.12g, 2.0By_5 = %13.12g\n",
         //         2.0*dg_B(st2)[0][3], dg_B(st2)[1][4], dg_B(st2)[0][4], 2.0*dg_B(st2)[1][5]);

         // TMP
         debug_flag = NO;
}

EXPORT void limiting_1st_degree(
         TRI       *tri,
         TRI       *nbtri[],
         int       tri_n,
         Mid_soln  *midsoln,
         int       rk_iter)
{
         Locstate st, nbst[20], st2, nbst2[20];
         float    uave[8], nbuave[20][8];
         float    Rave[8], nbRave[20][8];
         float    Lave[8], nbLave[20][8];
         int      i, dim = 2, indx, k;
         double    *cent, *nbcent[20], sqrt_area;
         float    rside[3], rside2[3], least_soln1[3];
         float    coef[20][2];
         float    u0, u1, u2, avg1, avg2, w[20], arrya[20], arryb[20];
         // double **Lmass_matrix = tri->Lmass_matrix;
         float    eps = 0.5, diam; // 0.005, 0.05
         float    dir[3], theta[3];
         int      idir, is_bad_sten[20];
         static double **mat = NULL, **mat_tran, **AA, **inv;
         float    mid[3][2], A_edge[3][2], sv_coef[6];
         static double   *c_num, ***A;

         if(mat == NULL)
         {
             matrix(&(AA), 3, 3, sizeof(double));
             matrix(&(inv), 3, 3, sizeof(double));
             matrix(&(mat_tran), 3, 3, sizeof(double));
             matrix(&(mat), 3, 3, sizeof(double));

             tri_array(&A,20,2,2,sizeof(double));
             vector(&c_num,20,sizeof(double));
         }

         if(NULL == midsoln)
         {
             st = tri->st;
             for(i = 0; i < tri_n; i++)
                 nbst[i] = nbtri[i]->st;
         }
         else
         {
             if(rk_iter == RK_STEP)
             {
                 st = tri->st;
                 for(i = 0; i < tri_n; i++)
                     nbst[i] = nbtri[i]->st;
             }
             else
             {
                 st = midsoln[tri->id].st[rk_iter];
                 for(i = 0; i < tri_n; i++)
                     nbst[i] = midsoln[nbtri[i]->id].st[rk_iter];
             }
         }

         st2 = midsoln[tri->id].st[0];
         diam = fg_diam(tri);
         cent = fg_centroid(tri);
         sqrt_area = sqrt(fg_area(tri));

         for(i = 0; i < tri_n; i++)
             nbcent[i] = fg_centroid(nbtri[i]);

         if(alltri_HR_sten_2[tri->id].HR_sten_set_1st == NO)
         {
             tri_array(&A,tri_n,2,2,sizeof(double));
             vector(&c_num,tri_n,sizeof(double));

             for(i = 0; i < tri_n; i++)
             {
                 A[i][0][0] = (nbcent[i][0]-cent[0])/sqrt_area;
                 A[i][0][1] = (nbcent[i][1]-cent[1])/sqrt_area;
                 A[i][1][0] = (nbcent[(i+1)%tri_n][0]-cent[0])/sqrt_area;
                 A[i][1][1] = (nbcent[(i+1)%tri_n][1]-cent[1])/sqrt_area;
                 // c_num[i] = cond_num(A[i]); // c_num[i] =1.0;
                 c_num[i] = cond_num2(A,i); // c_num[i] =1.0;
             }
             alltri_HR_sten_2[tri->id].HR_sten_set_1st = YES;
             alltri_HR_sten_2[tri->id].c_num = c_num;
             alltri_HR_sten_2[tri->id].A = A;
         }
         else
         {
             A = alltri_HR_sten_2[tri->id].A;
             c_num = alltri_HR_sten_2[tri->id].c_num;
         }

         uave[0] = Dens(st);
         for(i = 0; i < 3; i++)
             uave[i+1] = Mom(st)[i];
         uave[4] = Energy(st);
         uave[5] = Mag(st)[0];
         uave[6] = Mag(st)[1];
         uave[7] = Mag(st)[2];

         R_degree2_term_average(tri,st2,Rave);

         // tri_n neighbor tris
         for(i = 0; i < tri_n; i++)
         {
             nbuave[i][0] = Dens(nbst[i]);
             nbuave[i][4] = Energy(nbst[i]);
             for(k = 0; k < 3; k++)
                 nbuave[i][k+1] = Mom(nbst[i])[k];
             for(k = 0; k < 3; k++)
                 nbuave[i][k+5] = Mag(nbst[i])[k];

             // comp_mass_matrix_1st_row(MAX_N_COEF,nbtri[i],dim,fg_centroid(tri),mass_1st_row);
             comp_Mag_mass_matrix_1st_row(MAX_N_COEF,nbtri[i],dim,fg_centroid(tri),sqrt_area,mass_1st_row);

             R_degree2_term_average_Liu(nbtri[i],st2,mass_1st_row,nbRave[i]);
         }

         for(k = 0; k < N_EQN; k++)
         {
             Lave[k] = uave[k]-Rave[k];
             for(i = 0; i < tri_n; i++)
                 nbLave[i][k] = nbuave[i][k]-nbRave[i][k];
         }

         NEW_extrema_detec(Lave,nbLave,tri_n,is_bad_sten);

         /*****
         for(i = 0; i < 3; i++)
         {
             mid[i][0] = (Coords(Point_of_tri(tri)[i])[0] + Coords(Point_of_tri(tri)[(i+1)%3])[0])*0.5;
             mid[i][1] = (Coords(Point_of_tri(tri)[i])[1] + Coords(Point_of_tri(tri)[(i+1)%3])[1])*0.5;
             A_edge[i][0] = mid[i][0] - cent[0];
             A_edge[i][1] = mid[i][1] - cent[1];
         }

         for(i = 0; i < 3; i++)
         {
             A[i][0][0] = (nbcent[i][0]-cent[0]);
             A[i][0][1] = (nbcent[i][1]-cent[1]);
             A[i][1][0] = (nbcent[(i+1)%3][0]-cent[0]);
             A[i][1][1] = (nbcent[(i+1)%3][1]-cent[1]);
             c_num[i] = cond_num(A[i]); // c_num[i] =1.0;
         }
         unify_weight(c_num, 3, w); // For center biased limiting
         *****/

         for(k = 0; k < N_EQN; k++)
         {
             // if(k == 5 || k == 6) continue; // mag[0] and mag[1] do not need limiting.
             // linear part of polynomial
             // tri, nb0, nb1
             // tri, nb1, nb2
             // tri, nb2, nb0
             for(i = 0; i < tri_n; i++)
             {
                 rside[0] = nbLave[i][k] - Lave[k];
                 rside[1] = nbLave[(i+1)%tri_n][k] - Lave[k];
                 // comp_coef(A[i],rside,coef[i]);
                 comp_coef2(A,rside,coef[i],i);
             }

             ///////////// WENO
             for(i = 0; i < tri_n; i++)
             {
                 arrya[i] = coef[i][0];
                 arryb[i] = coef[i][1];
             }
             WENO_mod_1(arrya, arryb, c_num, tri_n, w);
             // WENO_mod_1_sqr_weight(arrya, arryb, c_num, num_CVs, w);
             u1 = u2 = 0.0;
             for(i = 0; i < tri_n; i++)
             {
                 u1 += w[i]*coef[i][0];
                 u2 += w[i]*coef[i][1];
             }
             if(is_bad_sten[k] == YES)
             {
                 u1 = u2 = 0.0;
             }
             u0 = Lave[k];
             /////////////////////// END WENO

             ////////// Zero moment //////////////
             u0 = Lave[k];

             switch(k)
             {
             case 0:
                 dg_Dens(st2)[0] = u0;
                 dg_Dens(st2)[1] = u1;
                 dg_Dens(st2)[2] = u2;
             break;
             case 1:
                 dg_Mom(st2)[0][0] = u0;
                 dg_Mom(st2)[0][1] = u1;
                 dg_Mom(st2)[0][2] = u2;
             break;
             case 2:
                 dg_Mom(st2)[1][0] = u0;
                 dg_Mom(st2)[1][1] = u1;
                 dg_Mom(st2)[1][2] = u2;
             break;
             case 3:
                 dg_Mom(st2)[2][0] = u0;
                 dg_Mom(st2)[2][1] = u1;
                 dg_Mom(st2)[2][2] = u2;
             break;
             case 4:
                 dg_Energy(st2)[0] = u0;
                 dg_Energy(st2)[1] = u1;
                 dg_Energy(st2)[2] = u2;
             break;
             case 5:
                 dg_B(st2)[0][0] = u0;
                 dg_B(st2)[0][1] = u1;
                 dg_B(st2)[0][2] = u2;
             break;
             case 6:
                 dg_B(st2)[1][0] = u0;
                 dg_B(st2)[1][1] = u1;
                 dg_B(st2)[1][2] = u2;
             break;
             case 7:
                 dg_B(st2)[2][0] = u0;
                 dg_B(st2)[2][1] = u1;
                 dg_B(st2)[2][2] = u2;
             break;
             }
         }

         /****
         for(i = 0; i < 2; i++)
         {
             for(k = 0; k < MAX_N_COEF; k++)
                 dg_B(st2)[i][k] = dg_B(st)[i][k];
             Mag(st2)[i] = Mag(st)[i];
         }
         ****/

         Dens(st2) = Dens(st);
         Energy(st2) = Energy(st);
         for(i = 0; i < 3; i++)
             Mom(st2)[i] = Mom(st)[i];
         for(i = 0; i < 3; i++)
             Mag(st2)[i] = Mag(st)[i];

         ///// Re-enforce divergence-free property
         if(dg_B(st2)[0][1] >= 0.0)
             dg_B(st2)[0][1] = min(fabs(dg_B(st2)[0][1]), fabs(dg_B(st2)[1][2]));
         else
             dg_B(st2)[0][1] = -min(fabs(dg_B(st2)[0][1]), fabs(dg_B(st2)[1][2]));
         dg_B(st2)[1][2] = -dg_B(st2)[0][1];

         // printf("1st degree Bx_1 = %13.12g, By_2 = %13.12g\n\n", dg_B(st2)[0][1], dg_B(st2)[1][2]);

         /***
         if((N_EQN == 4 || N_EQN == 8) && YES == unphysical_st_at_quadrature(tri, st2))
             tri->redo_limiting = YES;
         else
             tri->redo_limiting = NO;
         ***/
         /**
         if(overshoot_state(st2,tri))
         {
             build_1st_order_poly(tri,nbtri,midsoln,uave,nbuave,rk_iter);
         }
         **/

         // TMP
         debug_flag = NO;
}

EXPORT void accuracy_Mag_field_on_dual_cell_L1_DG(
        Front      *fr)
{
        SURFACE      **surf = fr->mesh->surfaces;
        int          total_polyg = 0, debug = NO, By_offset;
        int          i, j, k, tmpi, outside, dual_id, N_sides;
        POLYGON      *polyg, *nbpolyg;
        TRI          **tris;
        double       pcrds[3][3], qcrds[14][2];
        double       *cent, econ_u[12], con_u[12];
        POINT        *pts;
        double       Bxansarray[13],  Byansarray[13], Bx_ans, By_ans;
        double w1 =-0.149570044467670, w2 = 0.053347235608839,
               w3 = 0.175615257433204,  w4 = 0.077113760890257;
        double total_Bx = 0.0, total_By = 0.0, area, Bx_peak, By_peak;
        double Bn_peak, tmp_Bn_peak;
        double **piece_cent, one_3rd, sqrt_area;

        debug_print("Dual_cell","Entered accuracy_Mag_field_on_dual_cell_L1_DG()\n");

        one_3rd = 1.0/3.0; 
        Bx_peak = By_peak = Bn_peak = -HUGE_VAL;

        for(i = 0; i < 3; i++)
            pcrds[i][2] = 0.0;

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (polyg = first_polyg(*surf); !at_end_of_polyg_list(polyg,*surf);
                 polyg = polyg->next )
            {
                if(polyg->closed == NO) continue;

                tris = tri_at_polyg_vert(polyg);
                outside = NO;
                for(i = 0; i < polyg->n_sides; i++)
                {
                    if(tris[i]->BC_type == SUBDOMAIN)
                    {
                        outside = YES;
                        break;
                    }
                }
                if(outside == YES) continue;

                By_offset = polyg->n_sides+1;
                N_sides = polyg->n_sides;

                pcrds[0][0] = polyg_centroid(polyg)[0];
                pcrds[0][1] = polyg_centroid(polyg)[1];

                piece_cent = polyg->_piece_cent;

                for(i = 0; i < polyg->n_sides; i++)
                {
                    pcrds[1][0] = Coords(Point_of_polyg(polyg)[i])[0];
                    pcrds[1][1] = Coords(Point_of_polyg(polyg)[i])[1];

                    pcrds[2][0] = Coords(Point_of_polyg(polyg)[(i+1)%N_sides])[0];
                    pcrds[2][1] = Coords(Point_of_polyg(polyg)[(i+1)%N_sides])[1];

                    // for(k = 0; k < 2; k++)
                    //     piece_cent[i][k] = one_3rd*(pcrds[0][k]+ pcrds[1][k] + pcrds[2][k]);

                    tri_quadrature_13_pts(pcrds[0], pcrds[1], pcrds[2], qcrds);
                    // area = triangle_area_3d(pcrds[0], pcrds[1], pcrds[2]);
                    // sqrt_area = sqrt(area);
                    sqrt_area = polyg->_piece_sqrt_area[i];
                    area = sqr(sqrt_area);

                    for(tmpi = 0; tmpi < 13; tmpi++)
                    {
                        mag_vort_sol(qcrds[tmpi],fr->time,econ_u);
                        // mag_vort_sol(qcrds[tmpi],0.0,econ_u);
                        con_u[5] = polyg_sub_reg_dgB(polyg)[i][0][0] + 
                                   polyg_sub_reg_dgB(polyg)[i][0][1]*(qcrds[tmpi][0]- piece_cent[i][0])/sqrt_area + 
                                   polyg_sub_reg_dgB(polyg)[i][0][2]*(qcrds[tmpi][1]- piece_cent[i][1])/sqrt_area;
                        con_u[6] = polyg_sub_reg_dgB(polyg)[i][1][0] + 
                                   polyg_sub_reg_dgB(polyg)[i][1][1]*(qcrds[tmpi][0]- piece_cent[i][0])/sqrt_area + 
                                   polyg_sub_reg_dgB(polyg)[i][1][2]*(qcrds[tmpi][1]- piece_cent[i][1])/sqrt_area;

                        Bxansarray[tmpi] = fabs(econ_u[5]-con_u[5]);
                        Byansarray[tmpi] = fabs(econ_u[6]-con_u[6]);

                        if(fabs(econ_u[5]-con_u[5]) > Bx_peak)
                        {
                            Bx_peak = fabs(econ_u[5]-con_u[5]);
                            dual_id = polyg->id;
                        }
                        if(fabs(econ_u[6]-con_u[6]) > By_peak)
                            By_peak = fabs(econ_u[6]-con_u[6]);
                    }
                    // area = triangle_area_3d(pcrds[0], pcrds[1], pcrds[2]);
                    Bx_ans = (w1*Bxansarray[0] +
                             w2*(Bxansarray[1] + Bxansarray[2] + Bxansarray[3]) +
                             w3*(Bxansarray[4] + Bxansarray[5] + Bxansarray[6]) +
                             w4*(Bxansarray[7] + Bxansarray[8] + Bxansarray[9] +
                                 Bxansarray[10] + Bxansarray[11] + Bxansarray[12]));
                    By_ans = (w1*Byansarray[0] +
                             w2*(Byansarray[1] + Byansarray[2] + Byansarray[3]) +
                             w3*(Byansarray[4] + Byansarray[5] + Byansarray[6]) +
                             w4*(Byansarray[7] + Byansarray[8] + Byansarray[9] +
                                 Byansarray[10] + Byansarray[11] + Byansarray[12]));
                    total_Bx += fabs(Bx_ans)*area;
                    total_By += fabs(By_ans)*area;

                }///END::: for(i = 0; i < polyg->n_sides; i++)
            }/// END::: for (polyg = first_polyg(*surf); ... 
        } /// END:::: for(surf = fr->mesh->surfaces; surf && *surf; surf++)

        pp_global_sum(&total_Bx, 1);
        pp_global_sum(&total_By, 1);
        pp_global_max(&Bx_peak, 1);
        pp_global_max(&By_peak, 1);

        printf("\n\nIN accuracy_Mag_field_on_dual_cell_L1_DG(): time = %e\n", fr->time);
        printf("On duall cells, total_Bx = %24.20g, total_By = %24.20g\n",
                   total_Bx, total_By);
        printf("On duall cells, Bx_peak = %24.20g, By_peak = %24.20g on dual cell %d\n",
                   Bx_peak, By_peak, dual_id);

        debug_print("Dual_cell","Left accuracy_Mag_field_on_dual_cell_L1_DG()\n");
}

EXPORT void accuracy_Mag_field_on_dual_cell_L1(
        Front      *fr)
{
        SURFACE      **surf = fr->mesh->surfaces;
        int          total_polyg = 0, debug = NO, By_offset;
        int          i, j, tmpi, outside, dual_id, N_sides;
        POLYGON      *polyg, *nbpolyg;
        TRI          **tris;
        double       pcrds[3][3], qcrds[14][2]; 
        double       *cent, econ_u[12], con_u[12];
        POINT        *pts;
        double       ***conformal_basis_tri;
        double       val[6];
        double       Bxansarray[13],  Byansarray[13], Bx_ans, By_ans;
        double w1 =-0.149570044467670, w2 = 0.053347235608839,
               w3 = 0.175615257433204,  w4 = 0.077113760890257;
        double total_Bx = 0.0, total_By = 0.0, area, Bx_peak, By_peak;
        double Bn_peak, tmp_Bn_peak;

        debug_print("Dual_cell","Entered accuracy_Mag_field_on_dual_cell_L1()\n");
 
        Bx_peak = By_peak = Bn_peak = -HUGE_VAL;     

        for(i = 0; i < 3; i++)
            pcrds[i][2] = 0.0;

        for(surf = fr->mesh->surfaces; surf && *surf; surf++) 
        {
            for (polyg = first_polyg(*surf); !at_end_of_polyg_list(polyg,*surf);
                 polyg = polyg->next )
            {
                if(polyg->closed == NO) continue;

                tris = tri_at_polyg_vert(polyg);  
                outside = NO;
                for(i = 0; i < polyg->n_sides; i++)
                {
                    if(tris[i]->BC_type == SUBDOMAIN)
                    {
                        outside = YES;
                        break;
                    }
                }
                if(outside == YES) continue;

                By_offset = polyg->n_sides+1;
                N_sides = polyg->n_sides;

                ///// TMP
                /****
                if(polyg->id == 447)
                {
                    printf("\n&&&&&&&&&&&&&&&&&&&&& &&&&&&\n");
                    printf("In accuracy_Mag_field_on_dual_cell_L1()\n");

                    print_polyg_crds(polyg);
                    for(i = 0; i < polyg->n_sides+1; i++)
                    {
                        printf("Nodal[%d] Bx = %e, By = %e\n", i,
                              Nodal_B_of_polyg(polyg)[0][i],
                              Nodal_B_of_polyg(polyg)[1][i]);
                    }
                    debug = YES;
                }
                else
                    debug = NO;
                *****/

                conformal_basis_tri = polyg->conformal_basis_tri;
                pcrds[0][0] = polyg_centroid(polyg)[0];
                pcrds[0][1] = polyg_centroid(polyg)[1];
                for(i = 0; i < polyg->n_sides; i++)
                {
                    pcrds[1][0] = Coords(Point_of_polyg(polyg)[i])[0];    
                    pcrds[1][1] = Coords(Point_of_polyg(polyg)[i])[1];    

                    pcrds[2][0] = Coords(Point_of_polyg(polyg)[(i+1)%N_sides])[0];    
                    pcrds[2][1] = Coords(Point_of_polyg(polyg)[(i+1)%N_sides])[1];    
  
                    tri_quadrature_13_pts(pcrds[0], pcrds[1], pcrds[2], qcrds);
                    for(tmpi = 0; tmpi < 13; tmpi++)
                    {
                        mag_vort_sol(qcrds[tmpi],fr->time,econ_u);

                        val[0] = conformal_basis_tri[i][0][0] + 
                                 qcrds[tmpi][0]*conformal_basis_tri[i][0][1] +
                                 qcrds[tmpi][1]*conformal_basis_tri[i][0][2];
                        val[1] = conformal_basis_tri[i][1][0] + 
                                 qcrds[tmpi][0]*conformal_basis_tri[i][1][1] +
                                 qcrds[tmpi][1]*conformal_basis_tri[i][1][2];
                        val[2] = conformal_basis_tri[i][2][0] + 
                                 qcrds[tmpi][0]*conformal_basis_tri[i][2][1] +
                                 qcrds[tmpi][1]*conformal_basis_tri[i][2][2];

                        con_u[5] = Nodal_B_of_polyg(polyg)[0][i]*val[1] +
                                   Nodal_B_of_polyg(polyg)[0][(i+1)%N_sides]*val[2] +
                                   Nodal_B_of_polyg(polyg)[0][N_sides]*val[0];
                               // Bubble_B_of_polyg(polyg)[0][0]*sqr(qcrds[tmpi][0]) + 
                               // Bubble_B_of_polyg(polyg)[0][1]*qcrds[tmpi][0]*qcrds[tmpi][1] +
                               // Bubble_B_of_polyg(polyg)[0][2]*sqr(qcrds[tmpi][1]);

                        con_u[6] = Nodal_B_of_polyg(polyg)[1][i]*val[1] +
                                   Nodal_B_of_polyg(polyg)[1][(i+1)%N_sides]*val[2] +
                                   Nodal_B_of_polyg(polyg)[1][N_sides]*val[0];
                               // Bubble_B_of_polyg(polyg)[1][0]*sqr(qcrds[tmpi][0]) + 
                               // Bubble_B_of_polyg(polyg)[1][1]*qcrds[tmpi][0]*qcrds[tmpi][1] +
                               // Bubble_B_of_polyg(polyg)[1][2]*sqr(qcrds[tmpi][1]);

                        Bxansarray[tmpi] = fabs(econ_u[5]-con_u[5]);
                        Byansarray[tmpi] = fabs(econ_u[6]-con_u[6]);

                        if(debug == YES)
                        {
                            printf("sub-region[%d], exact Bx, reconstruct Bx: %e, %e, diff %e\n", 
                                i, econ_u[5], con_u[5], fabs(econ_u[5]-con_u[5]));
                        }

                        if(fabs(econ_u[5]-con_u[5]) > Bx_peak)
                        {
                            Bx_peak = fabs(econ_u[5]-con_u[5]);  
                            dual_id = polyg->id;
                        }
                        if(fabs(econ_u[6]-con_u[6]) > By_peak)
                            By_peak = fabs(econ_u[6]-con_u[6]);  
                    }

                    area = triangle_area_3d(pcrds[0], pcrds[1], pcrds[2]);
                    Bx_ans = (w1*Bxansarray[0] +
                             w2*(Bxansarray[1] + Bxansarray[2] + Bxansarray[3]) +
                             w3*(Bxansarray[4] + Bxansarray[5] + Bxansarray[6]) +
                             w4*(Bxansarray[7] + Bxansarray[8] + Bxansarray[9] +
                                 Bxansarray[10] + Bxansarray[11] + Bxansarray[12]));
                    By_ans = (w1*Byansarray[0] +
                             w2*(Byansarray[1] + Byansarray[2] + Byansarray[3]) +
                             w3*(Byansarray[4] + Byansarray[5] + Byansarray[6]) +
                             w4*(Byansarray[7] + Byansarray[8] + Byansarray[9] +
                                 Byansarray[10] + Byansarray[11] + Byansarray[12]));
                    total_Bx += fabs(Bx_ans)*area;
                    total_By += fabs(By_ans)*area;

             
                    // printf("accumulate Bx L1 %e, cell %d\n", total_Bx, polyg->id);
                    // Bn_for_accurate_magnetic_vort(pcrds[1], pcrds[2], 0.0, &tmp_Bn_peak);
                }//// END:::: for(i = 0; i < polyg->n_sides; i++)
            }
        }

        pp_global_sum(&total_Bx, 1);
        pp_global_sum(&total_By, 1);
        pp_global_max(&Bx_peak, 1);
        pp_global_max(&By_peak, 1);

        printf("\n\nIN accuracy_Mag_field_on_dual_cell_L1(): time = %e\n", fr->time);
        printf("On duall cells, total_Bx = %24.20g, total_By = %24.20g\n",
                   total_Bx, total_By);
        printf("On duall cells, Bx_peak = %24.20g, By_peak = %24.20g on dual cell %d\n",
                   Bx_peak, By_peak, dual_id);

        debug_print("Dual_cell","Left accuracy_Mag_field_on_dual_cell_L1()\n");
}

EXPORT void Init_Construct_Mag_field_on_dual_cell(
        Wave       *wv,
        Front      *fr)
{
        SURFACE           **surf = fr->mesh->surfaces;
        int               total_polyg = 0; 
        int               i, tmpi; 
        POLYGON           *polyg, *nbpolyg;

        debug_print("Dual_cell","Entered Init_Construct_Mag_field_on_dual_cell()\n");

        i = 0;
        for(; surf && *surf;  surf++)
        {
            for (polyg = first_polyg(*surf);
                !at_end_of_polyg_list(polyg,*surf); polyg = polyg->next)
            {
                polyg->id = i;
                i++;
            }
        }
        total_polyg = i;

        if(NULL == dual_cell_sten_2)
        {
            vector(&dual_cell_sten_2,total_polyg,sizeof(Dual_cell_sten));
            for(i = 0; i < total_polyg; i++)
                dual_cell_sten_2[i].Bsten_set = NO;
        }

        ////// TMP --- debug
        /******
        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (polyg = first_polyg(*surf); !at_end_of_polyg_list(polyg,*surf);
                 polyg = polyg->next )
            {
                if(polyg->id == 393)
                {
                    printf("\n----------------------------------\n");
                    printf("In Init_Construct_Mag_field_on_dual_cell()\n");
                    print_polyg_crds(polyg);

                    for(i = 0; i < polyg->n_sides; i++)
                    {
                        nbpolyg = Polyg_on_side(polyg, i);
                        printf("dual-cell[%d] side[%d] neighbor %p\n",polyg->id, i, nbpolyg);                         
                        if(nbpolyg != NULL)
                        {
                            for(tmpi=0; nbpolyg->n_sides; tmpi++)
                            {
                                if(Polyg_on_side(nbpolyg, tmpi) == polyg)
                                    break;
                            }
                            printf("     dual-cell[%5d], side[%d] Bn[%e, %e]\n",
                                      polyg->id, i, polyg_side_dgB(polyg)[i][0],
                                      polyg_side_dgB(polyg)[i][1]);
                            printf("crsp dual-cell[%5d], side[%d] Bn[%e, %e]\n\n",
                                      nbpolyg->id, tmpi, polyg_side_dgB(nbpolyg)[tmpi][0],
                                      polyg_side_dgB(nbpolyg)[tmpi][1]);
                        }
                    }
                }
            }
        }
        ******/  
        ////// END::: TMP --- debug

        if(MAX_N_COEF == 1)
        {
            Construct_Mag_field_on_dual_cell_P1_no_div_free_constraint(fr,NULL,
                   NULL,RK_STEP,total_polyg);
        }
        else if(MAX_N_COEF == 3)
        {
            // printf("Entered Init_Construct_Mag_field_on_dual_cell() 0\n");
            check_b_match_on_dual_edges(fr, NULL, RK_STEP);

            // Construct_Mag_field_on_dual_cell_P1(fr,NULL,
            //          NULL,RK_STEP,total_polyg);
            // printf("\n--- Init_Construct_Mag_field_on_dual_cell()\n");
            // printf("Before Construct_Mag_field_on_dual_cell_P1_DG()\n"); fflush(stdout);
            Construct_Mag_field_on_dual_cell_P1_DG(fr,NULL,
                     NULL,RK_STEP,total_polyg);
            // printf("after Construct_Mag_field_on_dual_cell_P1_DG()\n"); fflush(stdout);
#if defined(__MPI__)
            // 01-28-2015. Do not need to do parallel comm. for blast_MHD problem. 
            //  blast_MHD problem uses Dirchlet BC
            if(debugging("blast_MHD"))
            {
                NULL;
            }
            else
                parallel_update_dual_cell_B_buffer(fr, NULL, RK_STEP); // 01/26/2014
#else
#endif // if defined(__MPI__)
            Construct_Mag_field_on_tri_mesh_P1_DG(NULL,fr, NULL, NULL, RK_STEP, 0.0);

            // Construct_Mag_field_on_dual_cell_P1_no_div_free_constraint(fr,NULL,
            //        NULL,RK_STEP,total_polyg);
        }
        else
        {
            printf("ERROR: Init_Construct_Mag_field_on_dual_cell(), implement\n");
            clean_up(ERROR);
        }

        for(i = 0; i < total_polyg; i++)
        {
            if(YES == dual_cell_sten_2[i].Bsten_set)
            {
                free_these(2, dual_cell_sten_2[i].MB_A, dual_cell_sten_2[i].MB_B);
            }
        }
 
        free(dual_cell_sten_2);
        dual_cell_sten_2 = NULL;

        if(debugging("mag_acc_vert") || debugging("rotor") ||
           debugging("decay_alfven") || debugging("Orszag_T") ||
           debugging("field_loop"))
        {
#if defined(__MPI__)
            // printf("Before update_db_Mach_buffer()\n"); fflush(stdout);
            update_db_Mach_buffer(fr,NULL,RK_STEP,NULL);
            // printf("after update_db_Mach_buffer()\n"); fflush(stdout);

            if(debugging("decay_alfven"))
            {
                // impose_decay_alfven_BC(fr,NULL,RK_STEP,0.0); // exact BC on tri for decay_alfven
            }
#else
            if(debugging("mag_acc_vert"))
                impose_mag_acc_vert_BC(fr,NULL,RK_STEP,0.0);
            else if(debugging("decay_alfven") || debugging("field_loop"))
            {
                // impose_decay_alfven_BC(fr,NULL,RK_STEP,0.0); //01-28-2015 added. May not be needed.
                update_buffer(fr,NULL,RK_STEP,NULL); //01-28-2015, test periodic BC
            }
            else
                update_buffer(fr,NULL,RK_STEP,NULL);
#endif // if defined(__MPI__)
        }

        if(debugging("mag_acc_vert"))
        {
            // accuracy_Mag_field_on_dual_cell_L1(fr);
            // accuracy_Mag_field_on_dual_cell_L1_DG(fr);
            printf("\nAfter Init_Construct_Mag_field_on_dual_cell()\n\n");
            accurate_mag_vort_L1(fr);
            printf("\n\n");
        }

        // printf("WARNING: Stop in Init_Construct_Mag_field_on_dual_cell()\n");
        // clean_up(0);

        debug_print("Dual_cell","Left Init_Construct_Mag_field_on_dual_cell()\n");
}

EXPORT void Construct_Mag_field_on_both_grids(
        Front              *fr,
        Mid_soln           *midsoln,
        Dual_cell_Mid_soln *dual_cell_midsoln,
        int                rk_step, 
        int                total_cell,
        double             time,
        int                limiter_on)
{
        int        i, tmpi, j;
        SURFACE    **surf;
        POLYGON    *polyg, *nbpolyg;
        int        debug = NO, N_dual_pairs, N_fix_iteration = 0;
        double     len;
        DUAL_CELL_PAIR    *dual_pairs;
        bool       parallel_zero_B_dual = NO;

        debug_print("Dual_cell","Entered Construct_Mag_field_on_both_grids()\n");

        // printf("Entered Construct_Mag_field_on_dual_cell(), rk_step %d\n", rk_step); fflush(stdout); 

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (polyg = first_polyg(*surf); !at_end_of_polyg_list(polyg,*surf);
                 polyg = polyg->next )
            {
                if(polyg->closed != YES) continue;

                ///// Start: consistency check
                len = 0.0;
                if(rk_step == RK_STEP)
                {
                    for(i = 0; i < polyg->n_sides; i++)
                    {
                        len += polyg_side_dgB(polyg)[i][0]*
                                 polyg_length_side(polyg)[i]; 
                    }
                }
                else
                {
                    for(i = 0; i < polyg->n_sides; i++)
                    {
                        len += dual_cell_midsoln[polyg->id].edge_dgBn[rk_step][i][0]*
                                 polyg_length_side(polyg)[i]; 
                    }
                }

                if(fabs(len) > 1.0e-11)
                {
                    printf("ERROR: Construct_Mag_field_on_both_grids()\n");
                    printf("dual cell [%d] not divergence-free, error %e\n",
                            polyg->id, len);
                    print_polyg_crds(polyg);
                    clean_up(ERROR);
                }
                ///// End: consistency check

                //// TMP
                /***
                if(polyg->id == 200 || polyg->id == 240)
                {
                    printf("\n\n--- Construct_Mag_field_on_both_grids(%d), rk_step %d\n\n", polyg->id, rk_step);
                    for(i = 0; i < polyg->n_sides; i++)
                    { 
                        nbpolyg = Polyg_on_side(polyg, i);
                        for(tmpi=0; nbpolyg->n_sides; tmpi++)
                        {
                            if(Polyg_on_side(nbpolyg, tmpi) == polyg)
                                break;
                        }
                        if(rk_step == RK_STEP)
                        {
                            printf("     dual-cell[%d], side[%d] Bn[%14.12g, %14.12g]\n",
                                      polyg->id, i, polyg_side_dgB(polyg)[i][0],
                                      polyg_side_dgB(polyg)[i][1]);
                            printf("crsp dual-cell[%d], side[%d] Bn[%14.12g, %14.12g]\n\n",
                                      nbpolyg->id, tmpi, polyg_side_dgB(nbpolyg)[tmpi][0],
                                      polyg_side_dgB(nbpolyg)[tmpi][1]);
                        }
                        else
                        {
                            printf("     dual-cell[%d], side[%d] Bn[%14.12g, %14.12g]\n",
                                      polyg->id, i, dual_cell_midsoln[polyg->id].edge_dgBn[rk_step][i][0],
                                      dual_cell_midsoln[polyg->id].edge_dgBn[rk_step][i][1]);
                            printf("crsp dual-cell[%d], side[%d] Bn[%14.12g, %14.12g]\n\n",
                                      nbpolyg->id, tmpi, dual_cell_midsoln[nbpolyg->id].edge_dgBn[rk_step][tmpi][0],
                                      dual_cell_midsoln[nbpolyg->id].edge_dgBn[rk_step][tmpi][1]);
                        }
                        // break;
                    }
                } /// END::: TMP
                ***/
            }
        }

        if(NULL == dual_cell_sten_2)
        {
            vector(&dual_cell_sten_2,total_cell,sizeof(Dual_cell_sten));
            for(i = 0; i < total_cell; i++)
                dual_cell_sten_2[i].Bsten_set = NO;
        }

        if(MAX_N_COEF == 1)
        {
            // printf("ERROR: Construct_Mag_field_on_both_grids(), implement\n");
            // printf("MAX_N_COEF = 1\n");
            Construct_Mag_field_on_dual_cell_P1_no_div_free_constraint(fr,midsoln,
                 dual_cell_midsoln,rk_step,total_cell);

            // clean_up(ERROR);
        }

        if(MAX_N_COEF == 3)
        {
            Construct_Mag_field_on_dual_cell_P1_DG(fr,midsoln,
                    dual_cell_midsoln,rk_step,total_cell);
            if(YES == limiter_on)
            {
                do{
                    /// TMP
                    /***
                    if(fr->step >= 107579)
                    {
                        printf("\n\n------ Num of fix_iteration (%d)---\n", N_fix_iteration);
                        TRI   *tri;
                        POLYGON           *tmp_polyg[3];
                        int           tmpii;
                        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
                        {
                            for(tri = first_tri(*surf); !at_end_of_tri_list(tri,*surf); tri = tri->next)
                            {
                                if(tri->id == 788)
                                {
                                    printf("\n\n********************************\n");
                                    printf("\n\ntri %d before Positivity_press_dens_ () iteration %d.\n", 
                                         tri->id,N_fix_iteration);
                                    printf("tri neighbors %d, %d, %d\n", Tri_on_side(tri,0)->id,
                                          Tri_on_side(tri,1)->id, Tri_on_side(tri,2)->id);
                                      
                                    for(tmpii = 0; tmpii < 3; tmpii++)
                                        tmp_polyg[tmpii] = fg_polyg_at_tri_vert(tri)[tmpii];
                                    printf("Check Bn from polygons (%d, %d, %d)\n\n",
                                           tmp_polyg[0]->id, tmp_polyg[1]->id, tmp_polyg[2]->id);
                                    match_Bn_at_comm_dual_cell_edges(tmp_polyg[0], tmp_polyg[1], 
                                                                 dual_cell_midsoln, rk_step);
                                    match_Bn_at_comm_dual_cell_edges(tmp_polyg[1], tmp_polyg[2], 
                                                                 dual_cell_midsoln, rk_step);
                                    match_Bn_at_comm_dual_cell_edges(tmp_polyg[2], tmp_polyg[0], 
                                                                 dual_cell_midsoln, rk_step);
                                }
                            }
                        }
                    }
                    ***/
                    /// END: TMP
 
                    dual_pairs = Positivity_press_dens_at_tri_quadrature(fr,midsoln,
                        dual_cell_midsoln,rk_step,total_cell, &N_dual_pairs);

                /// TMP
                /***
                if(fr->step >= 107579)
                {
                    printf("\n\n++++ after Positivity_press_dens_at_tri_quadrature \n");
                    for(i = 0; i < N_dual_pairs; i++)
                    {
                        printf("***(%d)identified trouble dual cell %d, tris %p, %p\n",i, 
                             dual_pairs[i].dual_cells[0]->id, dual_pairs[i].tri[0], dual_pairs[i].tri[1]); 
                        if(dual_pairs[i].tri[0] != NULL) printf("tri[0] = %d\n", dual_pairs[i].tri[0]->id);
                        if(dual_pairs[i].tri[1] != NULL) printf("tri[1] = %d\n", dual_pairs[i].tri[1]->id);
                    }
                }
                ***/
                /// END::: TMP

                /// Also include all dual cells centered at vertices of tris in the dual_pairs. 
                /// This is because zero_moments() of states on a tri could affect edge B state of 
                /// dual cells centered at vertices of the tri. Then this modification of B state of 
                /// dual cells could make a tri with center on vertices of the dual cell have non-zero
                /// divergence.  
                    N_dual_pairs = add_dual_cells_on_vert_tri_in_dual_pairs(fr,dual_pairs,N_dual_pairs);

#if defined(__MPI__)
                    if(YES == check_dual_cell_on_subdomain(fr,dual_pairs,N_dual_pairs))
                    {
                        parallel_zero_B_dual = YES;
                    }
#endif // if defined(__MPI__)

                /// TMP
                /***
                if(fr->step >= 107579)
                {
                    printf("\n\n------ Num of fix_iteration (%d)---, parallel_zero = %d, N_dual_pairs = %d\n", 
                                N_fix_iteration, parallel_zero_B_dual, N_dual_pairs);
                    for(i = 0; i < N_dual_pairs; i++)
                    {
                        printf("***(%d)identified trouble dual cell %d, tris %p, %p\n",i, 
                             dual_pairs[i].dual_cells[0]->id, dual_pairs[i].tri[0], dual_pairs[i].tri[1]); 
                        if(dual_pairs[i].tri[0] != NULL) printf("tri[0] = %d\n", dual_pairs[i].tri[0]->id);
                        if(dual_pairs[i].tri[1] != NULL) printf("tri[1] = %d\n", dual_pairs[i].tri[1]->id);
                        fflush(stdout);
                    }
                }
                ***/
                /// END::: TMP

                    if(N_dual_pairs != 0)
                    {
                        // Dual_cell_edge_B_reconstruction_P1(fr,midsoln,dual_cell_midsoln,rk_step,time);
                        for(i = 0; i < N_dual_pairs; i++)
                        {
                            if(dual_pairs[i].tri[0] != NULL)
                            {
                                if(rk_step == RK_STEP)
                                {
                                    zero_moments(dual_pairs[i].tri[0]->st);
                                    zero_moments(dual_pairs[i].tri[1]->st);
                                }
                                else
                                {
                                    zero_moments(midsoln[dual_pairs[i].tri[0]->id].st[rk_step]);
                                    zero_moments(midsoln[dual_pairs[i].tri[1]->id].st[rk_step]);
                                }  
                            }
                        }

                        for(i = 0; i < N_dual_pairs; i++)
                        {
                            /***
                            double *cent, eps = 1.0e-10;
                            cent = polyg_centroid(dual_pairs[i].dual_cells[0]);
                            if(fabs(cent[0] - 0.51) < eps &&
                               fabs(cent[1] - 0.37) < eps)
                            {           
                                printf("In Construct_Mag_field_on_both_grids(),"
                                       " dual cell %d , N_fix_iteration = %d\n",
                                         polyg->id, N_fix_iteration);
                                print_polyg_crds(dual_pairs[i].dual_cells[0]); //print_tri_crds
                            }
                            ***/

                            single_Dual_cell_edge_B_reconstruction_P1(dual_pairs[i].dual_cells[0],
                               midsoln,dual_cell_midsoln,rk_step, NO); 
                                               // (fr->step >= 107579)?YES:NO
                            // TMP
                            /***
                            if(fr->step >= 107579)
                            {
                                printf("\n\n------ Num of fix_iteration (%d)---after single_Dual_cell_edge_B_recon(%d)\n", 
                                     N_fix_iteration, i);
                                TRI   *tri;
                                POLYGON           *tmp_polyg[3];
                                int               tmpii;
                                for(surf = fr->mesh->surfaces; surf && *surf; surf++)
                                {
                                    for(tri = first_tri(*surf); !at_end_of_tri_list(tri,*surf); tri = tri->next)
                                    {
                                        if(tri->id == 788)
                                        {
                                            printf("\n********************************\n");
                                            printf("\n\ntri %d after single_Dual_cell_edge_B_recon iteration %d for dual %d \n",
                                             tri->id,N_fix_iteration, dual_pairs[i].dual_cells[0]->id);
                                            for(tmpii = 0; tmpii < 3; tmpii++)
                                            tmp_polyg[tmpii] = fg_polyg_at_tri_vert(tri)[tmpii];
                                            printf("Check Bn from polygons (%d, %d, %d)\n\n",
                                           tmp_polyg[0]->id, tmp_polyg[1]->id, tmp_polyg[2]->id);
                                            match_Bn_at_comm_dual_cell_edges(tmp_polyg[0], tmp_polyg[1],
                                                                 dual_cell_midsoln, rk_step);
                                            match_Bn_at_comm_dual_cell_edges(tmp_polyg[1], tmp_polyg[2],
                                                                 dual_cell_midsoln, rk_step);
                                            match_Bn_at_comm_dual_cell_edges(tmp_polyg[2], tmp_polyg[0],
                                                                 dual_cell_midsoln, rk_step);
                                        }
                                    }
                                }
                            }
                            ***/
                            // END::: TMP
                        } /// END::: for(i = 0; i < N_dual_pairs; i++)
                    }

                    Modify_B_of_unphysical_state_at_tri_quadrature(fr,midsoln,
                        dual_cell_midsoln,rk_step,total_cell, dual_pairs, N_dual_pairs);
           
                    // TMP
                    /***
                    if(fr->step >= 107579)
                    {
                        printf("\n\n------ Num of fix_iteration (%d)---\n", N_fix_iteration);
                        TRI   *tri;
                        POLYGON           *tmp_polyg[3];
                        int               tmpii;
                        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
                        {
                            for(tri = first_tri(*surf); !at_end_of_tri_list(tri,*surf); tri = tri->next)
                            {
                                if(tri->id == 788)
                                {
                                    printf("\n\n********************************\n");
                                    printf("\n\ntri %d after ----Modify_B_of_unphysical_state_at_tri_q iteration %d.\n",
                                     tri->id,N_fix_iteration);
                                    for(tmpii = 0; tmpii < 3; tmpii++)
                                        tmp_polyg[tmpii] = fg_polyg_at_tri_vert(tri)[tmpii];
                                    printf("Check Bn from polygons (%d, %d, %d)\n\n",
                                           tmp_polyg[0]->id, tmp_polyg[1]->id, tmp_polyg[2]->id);
                                    match_Bn_at_comm_dual_cell_edges(tmp_polyg[0], tmp_polyg[1],
                                                                 dual_cell_midsoln, rk_step);
                                    match_Bn_at_comm_dual_cell_edges(tmp_polyg[1], tmp_polyg[2],
                                                                 dual_cell_midsoln, rk_step);
                                    match_Bn_at_comm_dual_cell_edges(tmp_polyg[2], tmp_polyg[0],
                                                                 dual_cell_midsoln, rk_step);
                                }
                            }
                        }
                    }
                    ***/
                    // END::: TMP

                    N_fix_iteration++;
                    free(dual_pairs);
                }while( N_dual_pairs != 0 && N_fix_iteration < 4);

#if defined(__MPI__)
/*
 *   In parallel, a tri need to fix negative pressure may be on the subdomain 
 *   boudary. THis makes a dual cell in the buffer being modified. However, this 
 *   modification may not be seen by the processor which actually compute the dual
 *   cell. It causes consistency problem.
 */
                /* Check if dual cell is on subdomain boundary. It's likely that 
                 *  a dual cell in subdomain buffer is modified, while the corresponding dual
                 *  cell inside the subdomain is not. 
                 */
                if(YES == pp_max_status(parallel_zero_B_dual))
                {
                    Zero_out_edge_B_variance_on_dual_cell(fr,midsoln,dual_cell_midsoln,rk_step);
                    /*Maybe use: Construct_Mag_field_on_dual_cell_P1_DG() here*/
                    Construct_Mag_field_on_dual_cell_P1_DG(fr,midsoln,
                        dual_cell_midsoln,rk_step,total_cell);
                    // Build_B_on_dual_cell_second_time(fr,midsoln,dual_cell_midsoln,rk_step);
                }
#endif // if defined(__MPI__)

                if(N_dual_pairs != 0)
                {
                    // printf("WARNING: Construct_Mag_field_on_both_grids(), found negative pressure\n");
                    // printf("N_dual_pairs = %d\n", N_dual_pairs);

                    dual_pairs = Positivity_press_dens_at_tri_quadrature(fr,midsoln,
                        dual_cell_midsoln,rk_step,total_cell, &N_dual_pairs);
                    for(i = 0; i < N_dual_pairs; i++)
                    {
                        // printf("trouble dual cell %d\n", dual_pairs[i].dual_cells[0]->id);
                    }
                    free(dual_pairs);
                    // clean_up(ERROR);
                }

                //// TMP, for debugging
                /***
                if(fr->step >= 107579)
                {
                     TRI   *tri;
                     POLYGON           *tmp_polyg[3];
                     int               tmpii;
                     for(surf = fr->mesh->surfaces; surf && *surf; surf++)
                     {
                         for(tri = first_tri(*surf); !at_end_of_tri_list(tri,*surf); tri = tri->next)
                         {
                             if(tri->id == 788)
                             {
                                 printf("\n\n********************************\n");
                                 printf("\n\ntri %d after fixing negative pressure() before comm.\n", tri->id);
                                 for(tmpii = 0; tmpii < 3; tmpii++)
                                     tmp_polyg[tmpii] = fg_polyg_at_tri_vert(tri)[tmpii];
                                 printf("Check Bn from polygons (%d, %d, %d)\n\n", 
                                        tmp_polyg[0]->id, tmp_polyg[1]->id, tmp_polyg[2]->id);
                                 match_Bn_at_comm_dual_cell_edges(tmp_polyg[0], tmp_polyg[1], dual_cell_midsoln, rk_step);
                                 match_Bn_at_comm_dual_cell_edges(tmp_polyg[1], tmp_polyg[2], dual_cell_midsoln, rk_step);
                                 match_Bn_at_comm_dual_cell_edges(tmp_polyg[2], tmp_polyg[0], dual_cell_midsoln, rk_step);
                             }
                         }
                     }
                }
                ***/
                //// END::: TMP, for debugging
            } // if(YES == limiter_on)

#if defined(__MPI__)
            /*???? This parallel_update_dual_cell_edge_B_buffer() should before construction. Moreover, B on dual cell
             * should be communicated for buffer zone after reconstruction.
             */
            parallel_update_dual_cell_B_buffer(fr, dual_cell_midsoln, rk_step); // 02/16/2014
#else
            /**
            if(debugging("reg_T_per_BC"))
            {
                printf("ERROR: Construct_Mag_field_on_both_grids(),"
                       " implement comm for periodic BC on dual cell\n");
                clean_up(ERROR);
            }
            **/
#endif // if defined(__MPI__)

            Construct_Mag_field_on_tri_mesh_P1_DG(NULL,fr,midsoln,
                    dual_cell_midsoln,rk_step, time);
            // Construct_Mag_field_on_dual_cell_P1(fr,midsoln,
            //         dual_cell_midsoln,rk_step,total_cell);
            // Construct_Mag_field_on_dual_cell_P1_no_div_free_constraint(fr,midsoln,
            //      dual_cell_midsoln,rk_step,total_cell);

            // printf("ERROR: Construct_Mag_field_on_dual_cell(), test MAX_N_COEF = 3 case\n");
            // clean_up(ERROR);
        } // END::: if(MAX_N_COEF == 3)
      
        if(MAX_N_COEF != 1 && MAX_N_COEF != 3)
        {
            printf("ERROR: Construct_Mag_field_on_both_grids(), implement\n");
            clean_up(ERROR);
        }

        // printf("Left Construct_Mag_field_on_dual_cell()\n");
        debug_print("Dual_cell","Left Construct_Mag_field_on_both_grids()\n");
}

/// Arithmatic mean of magnetic field value at dual cell centroid.
LOCAL void B_at_dual_cell_cent(
	POLYGON       *polyg,
        Front         *fr,
        Mid_soln      *midsoln,
        int           rk_step,
        double        *B_cent)
{
        int           i, j, debug = NO;
        double        *crds = polyg_centroid(polyg);
        TRI           *tris[40]; 
        double        B[2], sqrt_area, *tri_cent; 
        Locstate      sts[40]; 
        double        val[40];


        /***
        if(polyg->id == 1)
        {
            printf("\n\n Polygon %d entered B_at_dual_cell_cent()\n", polyg->id);
            debug = YES;
        }
        ***/ 

        for(i = 0; i < polyg->n_sides; i++)
        {
            tris[i] = tri_at_polyg_vert(polyg)[i];
        }

        if(rk_step == RK_STEP)
        {
            for(i = 0; i < polyg->n_sides; i++)
                sts[i] = tris[i]->st;
        }
        else
        {
            for(i = 0; i < polyg->n_sides; i++)
                sts[i] = midsoln[tris[i]->id].st[rk_step];
        }

        B[0] = B[1] = 0.0;
        for(i = 0; i < polyg->n_sides; i++)
        {
            tri_cent = fg_centroid(tris[i]);
            sqrt_area = sqrt(fg_area(tris[i]));

            for(j = 0; j < MAX_N_COEF; j++)
                val[j] = vh_val_loc_div_free_basis(crds,tri_cent,sqrt_area,j);

            for(j = 0; j < MAX_N_COEF; j++)
                B[0] += dg_B(sts[i])[0][j]*val[j];
            for(j = 0; j < MAX_N_COEF; j++)
                B[1] += dg_B(sts[i])[1][j]*val[j];
        }

        B_cent[0] = B[0]/polyg->n_sides;
        B_cent[1] = B[1]/polyg->n_sides;
}


EXPORT void Bn_at_dual_cell_edges(
	POLYGON            *polyg,
	Mid_soln           *midsoln,
	Dual_cell_Mid_soln *dual_cell_midsoln,
	int                rk_step,
	double             Bn[][N_COEF_EDGE])
{
	int     i, j;
        POINT      *p0, *p1;
        double    Bn_avg;

        ///// TMP
        /***
        for(i = 0; i < polyg->n_sides; i++)
        {
            p0 = Point_of_polyg(polyg)[i];
            p1 = Point_of_polyg(polyg)[(i+1)%polyg->n_sides];
            Bn_for_accurate_magnetic_vort(Coords(p0), Coords(p1), 0.0, &Bn_avg);
            Bn[i][0] = Bn_avg;
            Bn[i][1] = 0.0;
        }
        return;
        ***/
        ///// END:::: TMP

        if(rk_step == RK_STEP)
        {
            for(i = 0; i < polyg->n_sides; i++)
            {
                for(j = 0; j < N_COEF_EDGE; j++)
                    Bn[i][j] = polyg_side_dgB(polyg)[i][j];
            }
        }
        else
        {
            for(i = 0; i < polyg->n_sides; i++)
            {
                for(j = 0; j < N_COEF_EDGE; j++)
                    Bn[i][j] = dual_cell_midsoln[polyg->id].edge_dgBn[rk_step][i][j];
            }
        }
}


EXPORT void Construct_Mag_field_on_dual_cell_P1_no_div_free_constraint(
        Front              *fr,
        Mid_soln           *midsoln,
        Dual_cell_Mid_soln *dual_cell_midsoln,
        int                rk_step, 
        int                total_cell)
{
        SURFACE    **surf; 
        POLYGON    *polyg;
        double     x_crds[20], y_crds[20], *cent;
        int        i, j, k, By_offset, tmpk;
        POINT      *p0, *p1, *pc;
        double     phi0[20][3], phi1[20][3], phi2[20][3];  
        double     **tmpA, **tmpB, rsideB[30], rsideA[30];
        double     len, t[3], nor[3], Bn[20][N_COEF_EDGE];
        int        M, N, P, LDA, LDB, l, LWORK, INFO;
        double     AA[400], BB[400], CC[400], DD[400], XX[400], work[800], tmp;
        double     B_cent[3];
        int        debug = NO;
        double     val[25], sqrt_area, *tri_cent, vertB[3];
        double     ***conformal_basis_tri, econ_u[10], **I;
        TRI        **tris;
        Locstate   sts[20];

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (polyg = first_polyg(*surf); !at_end_of_polyg_list(polyg,*surf);
                 polyg = polyg->next )
            {
               if(NO == polyg->closed) continue;

               /***
               if(polyg->id == 393)
               // if(polyg->id == 1)
               {
                   debug = YES;
                   printf("\n\n-------------------------------------------\n");
                   printf("entered Construct_Mag_field_on_dual_cell_P1_no_div_free_constraint()\n\n");
                   printf("Dual cell %d, reconstruct B\n\n", polyg->id);
                   print_polyg_crds(polyg);
                   printf("-----------------------------------\n\n");
                   gview_output_polygon("visual", 0, polyg, "polygon1", NULL);
               }
               else
                   debug = NO;
               ***/

               Bn_at_dual_cell_edges(polyg,midsoln,dual_cell_midsoln,rk_step,Bn);
               B_at_dual_cell_cent(polyg,fr,midsoln,rk_step,B_cent);

               cent = polyg_centroid(polyg);
               x_crds[0] = cent[0];
               y_crds[0] = cent[1];
               // By_offset: off set for where eqns for Y-component of B starts.
               By_offset = polyg->n_sides+1;

               //M: number of avgs and slopes of Bn on edges + mean value of B at dual cell centroid
               M = 2*(polyg->n_sides) + 2;
               //N: = D.O.F, which are nodal values of X- and Y- components of B
               N = 2*(polyg->n_sides+1);  // number of nodal values = 2*(n_sides+1)

               if(dual_cell_sten_2[polyg->id].Bsten_set == NO)
               {
                   dual_cell_sten_2[polyg->id].Bsten_set = YES;
                   // matrix(&tmpA,M,N,sizeof(double));
                   matrix(&tmpB,M,N,sizeof(double));

                   dual_cell_sten_2[polyg->id].MB_A = NULL;
                   dual_cell_sten_2[polyg->id].MB_B = tmpB;

                   for(i = 0; i < M; i++)
                   {
                       for(j = 0; j < N; j++)
                           tmpB[i][j] = 0.0;
                   }
     
                   conformal_basis_tri = polyg->conformal_basis_tri;

                   for(i = 0; i < polyg->n_sides; i++)
                   {
                       p0 = Point_of_polyg(polyg)[i];
                       p1 = Point_of_polyg(polyg)[(i+1)%polyg->n_sides];
                       x_crds[1] = Coords(p0)[0];
                       y_crds[1] = Coords(p0)[1];

                       x_crds[2] = Coords(p1)[0];
                       y_crds[2] = Coords(p1)[1];

                       // C0_conformal_basis_tri(x_crds,y_crds,phi0[i],phi1[i],phi2[i]);
                       for(tmpk = 0; tmpk < MAX_N_COEF; tmpk++)
                           phi0[i][tmpk] = conformal_basis_tri[i][0][tmpk];
                       for(tmpk = 0; tmpk < MAX_N_COEF; tmpk++)
                           phi1[i][tmpk] = conformal_basis_tri[i][1][tmpk];
                       for(tmpk = 0; tmpk < MAX_N_COEF; tmpk++)
                           phi2[i][tmpk] = conformal_basis_tri[i][2][tmpk];

                       for(k = 0; k < 2; k++)
                           t[k] = polyg_side_vector(polyg)[i][k];
                       nor[0] = t[1]; nor[1] = -t[0];

                       //// The slope of the trace is equal to slope of Bn
                       //// evolved on the dual cell edge 
                       tmpB[i][i] = -0.5*nor[0];
                       tmpB[i][(i+1)%polyg->n_sides] = 0.5*nor[0];
                       tmpB[i][i+By_offset] = -0.5*nor[1];
                       tmpB[i][(i+1)%polyg->n_sides+By_offset] = 0.5*nor[1];
                       // rsideA[i] = Bn[i][1];
                       rsideB[i] = 0.0; // Test constant normal Bn case.

                       /// The trace of average of normal component of magnetic field 
                       /// supported on dual cell  must be identical to 
                       /// Bn evolved on the dual cell edge. 

                       tmpB[i+polyg->n_sides][i] = 0.5*nor[0]; // coeff. of Bx on node i for (i+n_sides)th eqn
                       tmpB[i+polyg->n_sides][(i+1)%polyg->n_sides] = 0.5*nor[0];
                       tmpB[i+polyg->n_sides][i+By_offset] = 0.5*nor[1]; // coeff. of By on node i for (i+n_sides)th eqn
                       tmpB[i+polyg->n_sides][(i+1)%polyg->n_sides + By_offset] = 0.5*nor[1];
                       rsideB[i+polyg->n_sides] = Bn[i][0];
                   }/// END::: for(i = 0; i < polyg->n_sides; i++)                

                   /// This is to match B value at centroid of dual cell 
                   /// with B value at this location from triangles
                   tmpB[polyg->n_sides + polyg->n_sides][polyg->n_sides] = 1.0;
                   tmpB[polyg->n_sides + polyg->n_sides+1][polyg->n_sides+By_offset] = 1.0;
                   rsideB[polyg->n_sides + polyg->n_sides] = B_cent[0];
                   rsideB[polyg->n_sides + polyg->n_sides+1] = B_cent[1];
               }
               else
               {
                   tmpB = dual_cell_sten_2[polyg->id].MB_B;
                   for(i = 0; i < polyg->n_sides; i++)
                   {
                       // rsideA[i] = Bn[i][1];
                       rsideB[i+polyg->n_sides] = Bn[i][0];
                       rsideB[i] = 0.0;  // test for constant normal Bn case.
                   }
                   rsideB[polyg->n_sides + polyg->n_sides] = B_cent[0];
                   rsideB[polyg->n_sides + polyg->n_sides+1] = B_cent[1];
               }

               /**
               if(debug == YES)
               {
                   printf("\n----------------------\n\n");
                   print_matrix("Mass matrix:", M, M, tmpA, "%e ");
                   printf("\n----------------------\n");
                   printf("Before find inverse of tmpA\n");
                   inverse_matrix(tmpA, M, tmpB);
                   printf("After find inverse of tmpA\n");
               }
               **/

               // solve_by_LU(tmpA, M, rsideA, XX);
               solve_by_gj(tmpB, M, rsideB, XX);

               // print_general_vector("soln", XX, N, "\n");
               // clean_up(0);

               if(rk_step == RK_STEP)
               {
                   for(i = 0; i < polyg->n_sides+1; i++)
                       Nodal_B_of_polyg(polyg)[0][i] = XX[i];
                   for(i = 0; i < polyg->n_sides+1; i++)
                       Nodal_B_of_polyg(polyg)[1][i] = XX[i+By_offset];
               }
               else
               {
                   for(i = 0; i < polyg->n_sides+1; i++)
                       dual_cell_midsoln[polyg->id].Nodal_B[rk_step][0][i] = XX[i];
                   for(i = 0; i < polyg->n_sides+1; i++)
                       dual_cell_midsoln[polyg->id].Nodal_B[rk_step][1][i] = XX[i+By_offset];
               }

               if(debug==YES)
               {
                   trace_of_dual_cell_P1(polyg,rk_step, dual_cell_midsoln, midsoln);
                   printf("centroid B from tri %14.13g, %14.13g\n", B_cent[0], B_cent[1]);
                   printf("Computed centroid B %14.13g, %14.13g\n",
                                Nodal_B_of_polyg(polyg)[0][polyg->n_sides],
                                Nodal_B_of_polyg(polyg)[1][polyg->n_sides]);
                   // printf("WARNING: Stop in Construct_Mag_field_on_dual_cell_P1_no_div_free_constraint()\n");
                   // clean_up(0);
               }
            } /// END::: for (polyg = first_polyg(*surf);...)
        }
}

LOCAL void trace_of_dual_cell_P1(
	POLYGON    *polyg,
        int        rk_step,
        Dual_cell_Mid_soln *dual_cell_midsoln,
        Mid_soln           *midsoln)
{
        double     **tmpA, **tmpB;
        int        i, j, k, By_offset, tmpk, N_sides;
        double     ***conformal_basis_tri = polyg->conformal_basis_tri;
        double     len, t[3], nor[3], Bn[20][N_COEF_EDGE];
        double     phi0[20][3], phi1[20][3], phi2[20][3];
        double     traceBn[20][3];
        double     XX[50], x_crds[3], y_crds[3], dx, dy, x_bar, y_bar; 
        POINT      *p0, *p1;

        Bn_at_dual_cell_edges(polyg,midsoln,dual_cell_midsoln,rk_step,Bn);

        if(dual_cell_sten_2[polyg->id].Bsten_set == YES)
        {
            tmpA = dual_cell_sten_2[polyg->id].MB_A;
            tmpB = dual_cell_sten_2[polyg->id].MB_B;
        }
        else
        {
            printf("ERROR: trace_of_dual_cell_P1()\n");
            clean_up(ERROR);
        }

        By_offset = polyg->n_sides+1;
        N_sides = polyg->n_sides;

        if(rk_step == RK_STEP)
        {
            for(i = 0; i < polyg->n_sides+1; i++)
                XX[i] = Nodal_B_of_polyg(polyg)[0][i];
            for(i = 0; i < polyg->n_sides+1; i++)
                XX[i+By_offset] = Nodal_B_of_polyg(polyg)[1][i];
            XX[2*By_offset]   = Bubble_B_of_polyg(polyg)[0][0];
            XX[2*By_offset+1] = Bubble_B_of_polyg(polyg)[0][1];
            XX[2*By_offset+2] = Bubble_B_of_polyg(polyg)[0][2];
            XX[2*By_offset+3] = Bubble_B_of_polyg(polyg)[1][0];
            XX[2*By_offset+4] = Bubble_B_of_polyg(polyg)[1][1];
            XX[2*By_offset+5] = Bubble_B_of_polyg(polyg)[1][2];
        }
        else
        {
            for(i = 0; i < polyg->n_sides+1; i++)
                XX[i] = dual_cell_midsoln[polyg->id].Nodal_B[rk_step][0][i];
            for(i = 0; i < polyg->n_sides+1; i++)
                XX[i+By_offset] = dual_cell_midsoln[polyg->id].Nodal_B[rk_step][1][i];
            XX[2*By_offset]   = dual_cell_midsoln[polyg->id].Bubble_B[rk_step][0][0];
            XX[2*By_offset+1] = dual_cell_midsoln[polyg->id].Bubble_B[rk_step][0][1];
            XX[2*By_offset+2] = dual_cell_midsoln[polyg->id].Bubble_B[rk_step][0][2];
            XX[2*By_offset+3] = dual_cell_midsoln[polyg->id].Bubble_B[rk_step][1][0];
            XX[2*By_offset+4] = dual_cell_midsoln[polyg->id].Bubble_B[rk_step][1][1];
            XX[2*By_offset+5] = dual_cell_midsoln[polyg->id].Bubble_B[rk_step][1][2];
        }

        for(i = 0; i < polyg->n_sides; i++)
        {
            p0 = Point_of_polyg(polyg)[i];
            p1 = Point_of_polyg(polyg)[(i+1)%N_sides];
            x_crds[1] = Coords(p0)[0];
            y_crds[1] = Coords(p0)[1];

            x_crds[2] = Coords(p1)[0];
            y_crds[2] = Coords(p1)[1];

            dx = 0.5*(x_crds[2] - x_crds[1]);
            dy = 0.5*(y_crds[2] - y_crds[1]);
            x_bar = 0.5*(x_crds[2] + x_crds[1]);
            y_bar = 0.5*(y_crds[2] + y_crds[1]);

            // contribution from nodal values
            traceBn[i][0] = tmpB[i+N_sides][i]*XX[i] + 
                  tmpB[i+N_sides][(i+1)%N_sides]*XX[(i+1)%N_sides] + 
                  tmpB[i+N_sides][i+By_offset]*XX[i+By_offset] +
                  tmpB[i+N_sides][(i+1)%N_sides+By_offset]*XX[(i+1)%N_sides + By_offset];

            // contribution from bubble functions
            /**
            traceBn[i][0] += tmpB[i+N_sides][2*By_offset]*XX[2*By_offset] +
                             tmpB[i+N_sides][2*By_offset+1]*XX[2*By_offset+1] + 
                             tmpB[i+N_sides][2*By_offset+2]*XX[2*By_offset+2] + 
                             tmpB[i+N_sides][2*By_offset+3]*XX[2*By_offset+3] + 
                             tmpB[i+N_sides][2*By_offset+4]*XX[2*By_offset+4] + 
                             tmpB[i+N_sides][2*By_offset+5]*XX[2*By_offset+5];
            **/
            printf("side[%d], given avg Bn = %e, trace avg = %e\n",
                      i, Bn[i][0], traceBn[i][0]);
        }

        printf("\n");
        for(i = 0; i < polyg->n_sides; i++)
        {
            p0 = Point_of_polyg(polyg)[i];
            p1 = Point_of_polyg(polyg)[(i+1)%N_sides];
            x_crds[1] = Coords(p0)[0];
            y_crds[1] = Coords(p0)[1];

            x_crds[2] = Coords(p1)[0];
            y_crds[2] = Coords(p1)[1];

            dx = 0.5*(x_crds[2] - x_crds[1]);
            dy = 0.5*(y_crds[2] - y_crds[1]);
            x_bar = 0.5*(x_crds[2] + x_crds[1]);
            y_bar = 0.5*(y_crds[2] + y_crds[1]);

            // contribution from nodal values by exact match
            traceBn[i][1] = tmpB[i][i]*XX[i] +
                  tmpB[i][(i+1)%N_sides]*XX[(i+1)%N_sides] +
                  tmpB[i][i+By_offset]*XX[i+By_offset] +
                  tmpB[i][(i+1)%N_sides+By_offset]*XX[(i+1)%N_sides + By_offset];
            /**
            // contribution from nodal values by least square
            traceBn[i][1] = tmpA[i][i]*XX[i] +
                  tmpA[i][(i+1)%N_sides]*XX[(i+1)%N_sides] +
                  tmpA[i][i+By_offset]*XX[i+By_offset] +
                  tmpA[i][(i+1)%N_sides+By_offset]*XX[(i+1)%N_sides + By_offset];

            // contribution from bubble functions
            traceBn[i][1] += tmpA[i][2*By_offset]*XX[2*By_offset] +
                             tmpA[i][2*By_offset+1]*XX[2*By_offset+1] +
                             tmpA[i][2*By_offset+2]*XX[2*By_offset+2] + 
                             tmpA[i][2*By_offset+3]*XX[2*By_offset+3] + 
                             tmpA[i][2*By_offset+4]*XX[2*By_offset+4] + 
                             tmpA[i][2*By_offset+5]*XX[2*By_offset+5];
            **/
            if(N_COEF_EDGE != 1)
                printf("side[%d], given slope = %e, trace slope = %e\n",
                      i, Bn[i][1], traceBn[i][1]);
            else
                printf("side[%d], given slope = %e, trace slope = %e\n",
                      i, 0.0, traceBn[i][1]);
        }

        printf("\n");
        /**
        for(i = 0; i < polyg->n_sides; i++)
        {
            p0 = Point_of_polyg(polyg)[i];
            p1 = Point_of_polyg(polyg)[(i+1)%N_sides];
            x_crds[1] = Coords(p0)[0];
            y_crds[1] = Coords(p0)[1];

            x_crds[2] = Coords(p1)[0];
            y_crds[2] = Coords(p1)[1];

            dx = 0.5*(x_crds[2] - x_crds[1]);
            dy = 0.5*(y_crds[2] - y_crds[1]);
            x_bar = 0.5*(x_crds[2] + x_crds[1]);
            y_bar = 0.5*(y_crds[2] + y_crds[1]);

            // contribution from bubble functions
            traceBn[i][2] = tmpA[i+N_sides][2*By_offset]*XX[2*By_offset] +
                            tmpA[i+N_sides][2*By_offset+1]*XX[2*By_offset+1] +
                            tmpA[i+N_sides][2*By_offset+2]*XX[2*By_offset+2] + 
                            tmpA[i+N_sides][2*By_offset+3]*XX[2*By_offset+3] +
                            tmpA[i+N_sides][2*By_offset+4]*XX[2*By_offset+4] +
                            tmpA[i+N_sides][2*By_offset+5]*XX[2*By_offset+5];

            printf("side[%d], given 2nd moment = %e, trace 2nd moment = %e\n",
                      i, 0.0, traceBn[i][2]);
        }
        **/
}

LOCAL void trace_of_dual_cell_P1_with_quad_bubbles(
	POLYGON    *polyg,
        int        rk_step,
        Dual_cell_Mid_soln *dual_cell_midsoln,
        Mid_soln           *midsoln)
{
        double     **tmpA, **tmpB;
        int        i, j, k, By_offset, tmpk, N_sides;
        double     ***conformal_basis_tri = polyg->conformal_basis_tri;
        double     *cent; 
        double     len, t[3], nor[3], Bn[20][N_COEF_EDGE];
        double     phi0[20][3], phi1[20][3], phi2[20][3];
        double     traceBn[20][3];
        double     XX[50], x_crds[3], y_crds[3], dx, dy, x_bar, y_bar; 
        double     last_e_ceof[40], one_3rd;
        POINT      *p0, *p1;

        one_3rd = 1.0/3.0;
        Bn_at_dual_cell_edges(polyg,midsoln,dual_cell_midsoln,rk_step,Bn);

        if(dual_cell_sten_2[polyg->id].Bsten_set == YES)
        {
            tmpA = dual_cell_sten_2[polyg->id].MB_A;
            tmpB = dual_cell_sten_2[polyg->id].MB_B;
        }
        else
        {
            printf("ERROR: trace_of_dual_cell_P1()\n");
            clean_up(ERROR);
        }

        cent = polyg_centroid(polyg);
        x_crds[0] = cent[0];
        y_crds[0] = cent[1];

        By_offset = polyg->n_sides+1;
        N_sides = polyg->n_sides;

        if(rk_step == RK_STEP)
        {
            for(i = 0; i < polyg->n_sides+1; i++)
                XX[i] = Nodal_B_of_polyg(polyg)[0][i];
            for(i = 0; i < polyg->n_sides+1; i++)
                XX[i+By_offset] = Nodal_B_of_polyg(polyg)[1][i];
            XX[2*By_offset]   = Bubble_B_of_polyg(polyg)[0][0];
            XX[2*By_offset+1] = Bubble_B_of_polyg(polyg)[0][1];
            XX[2*By_offset+2] = Bubble_B_of_polyg(polyg)[0][2];
            XX[2*By_offset+3] = Bubble_B_of_polyg(polyg)[1][0];
            XX[2*By_offset+4] = Bubble_B_of_polyg(polyg)[1][1];
            XX[2*By_offset+5] = Bubble_B_of_polyg(polyg)[1][2];
        }
        else
        {
            for(i = 0; i < polyg->n_sides+1; i++)
                XX[i] = dual_cell_midsoln[polyg->id].Nodal_B[rk_step][0][i];
            for(i = 0; i < polyg->n_sides+1; i++)
                XX[i+By_offset] = dual_cell_midsoln[polyg->id].Nodal_B[rk_step][1][i];
            XX[2*By_offset]   = dual_cell_midsoln[polyg->id].Bubble_B[rk_step][0][0];
            XX[2*By_offset+1] = dual_cell_midsoln[polyg->id].Bubble_B[rk_step][0][1];
            XX[2*By_offset+2] = dual_cell_midsoln[polyg->id].Bubble_B[rk_step][0][2];
            XX[2*By_offset+3] = dual_cell_midsoln[polyg->id].Bubble_B[rk_step][1][0];
            XX[2*By_offset+4] = dual_cell_midsoln[polyg->id].Bubble_B[rk_step][1][1];
            XX[2*By_offset+5] = dual_cell_midsoln[polyg->id].Bubble_B[rk_step][1][2];
        }

        for(i = 0; i < polyg->n_sides-1; i++)
        {
            p0 = Point_of_polyg(polyg)[i];
            p1 = Point_of_polyg(polyg)[(i+1)%N_sides];
            x_crds[1] = Coords(p0)[0];
            y_crds[1] = Coords(p0)[1];

            x_crds[2] = Coords(p1)[0];
            y_crds[2] = Coords(p1)[1];

            dx = 0.5*(x_crds[2] - x_crds[1]);
            dy = 0.5*(y_crds[2] - y_crds[1]);
            x_bar = 0.5*(x_crds[2] + x_crds[1]);
            y_bar = 0.5*(y_crds[2] + y_crds[1]);

            // contribution from nodal values
            traceBn[i][0] = tmpB[i+N_sides][i]*XX[i] + 
                  tmpB[i+N_sides][(i+1)%N_sides]*XX[(i+1)%N_sides] + 
                  tmpB[i+N_sides][i+By_offset]*XX[i+By_offset] +
                  tmpB[i+N_sides][(i+1)%N_sides+By_offset]*XX[(i+1)%N_sides + By_offset];

            // contribution from bubble functions
            traceBn[i][0] += tmpB[i+N_sides][2*By_offset]*XX[2*By_offset] +
                             tmpB[i+N_sides][2*By_offset+1]*XX[2*By_offset+1] + 
                             tmpB[i+N_sides][2*By_offset+2]*XX[2*By_offset+2] + 
                             tmpB[i+N_sides][2*By_offset+3]*XX[2*By_offset+3] + 
                             tmpB[i+N_sides][2*By_offset+4]*XX[2*By_offset+4] + 
                             tmpB[i+N_sides][2*By_offset+5]*XX[2*By_offset+5];

            printf("side[%d], given avg Bn = %e, trace avg = %e\n",
                      i, Bn[i][0], traceBn[i][0]);
        }

        p0 = Point_of_polyg(polyg)[N_sides-1];
        p1 = Point_of_polyg(polyg)[0];
        x_crds[1] = Coords(p0)[0];
        y_crds[1] = Coords(p0)[1];

        x_crds[2] = Coords(p1)[0];
        y_crds[2] = Coords(p1)[1];

        dx = 0.5*(x_crds[2] - x_crds[1]);
        dy = 0.5*(y_crds[2] - y_crds[1]);
        x_bar = 0.5*(x_crds[2] + x_crds[1]);
        y_bar = 0.5*(y_crds[2] + y_crds[1]);

        // contribution from nodal based linear functions for traces
        for(k = 0; k < 2; k++)
            t[k] = polyg_side_vector(polyg)[N_sides-1][k];
        nor[0] = t[1]; nor[1] = -t[0];
        last_e_ceof[N_sides-1] = 0.5*nor[0];
        last_e_ceof[0] = 0.5*nor[0];
        last_e_ceof[N_sides-1+By_offset] = 0.5*nor[1];
        last_e_ceof[By_offset] = 0.5*nor[1];

        traceBn[N_sides-1][0] = last_e_ceof[N_sides-1]*XX[N_sides-1] +
                  last_e_ceof[0]*XX[0] +
                  last_e_ceof[N_sides-1+By_offset]*XX[N_sides-1+By_offset] +
                  last_e_ceof[By_offset]*XX[By_offset];

        // contribution from bubble functions for traces
        last_e_ceof[2*By_offset]   = (sqr(x_bar) + one_3rd*sqr(dx))*nor[0];
        last_e_ceof[2*By_offset+1] = (x_bar*y_bar + one_3rd*dx*dy )*nor[0];
        last_e_ceof[2*By_offset+2] = (sqr(y_bar) + one_3rd*sqr(dy))*nor[0];
        last_e_ceof[2*By_offset+3] = (sqr(x_bar) + one_3rd*sqr(dx))*nor[1];
        last_e_ceof[2*By_offset+4] = (x_bar*y_bar + one_3rd*dx*dy )*nor[1];
        last_e_ceof[2*By_offset+5] = (sqr(y_bar) + one_3rd*sqr(dy))*nor[1];

        traceBn[N_sides-1][0] += last_e_ceof[2*By_offset]*XX[2*By_offset] +
                             last_e_ceof[2*By_offset+1]*XX[2*By_offset+1] +
                             last_e_ceof[2*By_offset+2]*XX[2*By_offset+2] +
                             last_e_ceof[2*By_offset+3]*XX[2*By_offset+3] +
                             last_e_ceof[2*By_offset+4]*XX[2*By_offset+4] +
                             last_e_ceof[2*By_offset+5]*XX[2*By_offset+5];
        
        printf("side[%d], given avg Bn = %e, trace avg = %e\n",
                      N_sides-1, Bn[N_sides-1][0], traceBn[N_sides-1][0]);
        printf("\n");

        for(i = 0; i < polyg->n_sides; i++)
        {
            p0 = Point_of_polyg(polyg)[i];
            p1 = Point_of_polyg(polyg)[(i+1)%N_sides];
            x_crds[1] = Coords(p0)[0];
            y_crds[1] = Coords(p0)[1];

            x_crds[2] = Coords(p1)[0];
            y_crds[2] = Coords(p1)[1];

            dx = 0.5*(x_crds[2] - x_crds[1]);
            dy = 0.5*(y_crds[2] - y_crds[1]);
            x_bar = 0.5*(x_crds[2] + x_crds[1]);
            y_bar = 0.5*(y_crds[2] + y_crds[1]);

            // contribution from nodal values by exact match
            traceBn[i][1] = tmpB[i][i]*XX[i] +
                  tmpB[i][(i+1)%N_sides]*XX[(i+1)%N_sides] +
                  tmpB[i][i+By_offset]*XX[i+By_offset] +
                  tmpB[i][(i+1)%N_sides+By_offset]*XX[(i+1)%N_sides + By_offset];
            /**
            // contribution from nodal values by least square
            traceBn[i][1] = tmpA[i][i]*XX[i] +
                  tmpA[i][(i+1)%N_sides]*XX[(i+1)%N_sides] +
                  tmpA[i][i+By_offset]*XX[i+By_offset] +
                  tmpA[i][(i+1)%N_sides+By_offset]*XX[(i+1)%N_sides + By_offset];

            // contribution from bubble functions
            traceBn[i][1] += tmpA[i][2*By_offset]*XX[2*By_offset] +
                             tmpA[i][2*By_offset+1]*XX[2*By_offset+1] +
                             tmpA[i][2*By_offset+2]*XX[2*By_offset+2] + 
                             tmpA[i][2*By_offset+3]*XX[2*By_offset+3] + 
                             tmpA[i][2*By_offset+4]*XX[2*By_offset+4] + 
                             tmpA[i][2*By_offset+5]*XX[2*By_offset+5];
            **/
            if(N_COEF_EDGE != 1)
                printf("side[%d], given slope = %e, trace slope = %e\n",
                      i, Bn[i][1], traceBn[i][1]);
            else
                printf("side[%d], given slope = %e, trace slope = %e\n",
                      i, 0.0, traceBn[i][1]);
        }

        printf("\n");
        /**
        for(i = 0; i < polyg->n_sides; i++)
        {
            p0 = Point_of_polyg(polyg)[i];
            p1 = Point_of_polyg(polyg)[(i+1)%N_sides];
            x_crds[1] = Coords(p0)[0];
            y_crds[1] = Coords(p0)[1];

            x_crds[2] = Coords(p1)[0];
            y_crds[2] = Coords(p1)[1];

            dx = 0.5*(x_crds[2] - x_crds[1]);
            dy = 0.5*(y_crds[2] - y_crds[1]);
            x_bar = 0.5*(x_crds[2] + x_crds[1]);
            y_bar = 0.5*(y_crds[2] + y_crds[1]);

            // contribution from bubble functions
            traceBn[i][2] = tmpA[i+N_sides][2*By_offset]*XX[2*By_offset] +
                            tmpA[i+N_sides][2*By_offset+1]*XX[2*By_offset+1] +
                            tmpA[i+N_sides][2*By_offset+2]*XX[2*By_offset+2] + 
                            tmpA[i+N_sides][2*By_offset+3]*XX[2*By_offset+3] +
                            tmpA[i+N_sides][2*By_offset+4]*XX[2*By_offset+4] +
                            tmpA[i+N_sides][2*By_offset+5]*XX[2*By_offset+5];

            printf("side[%d], given 2nd moment = %e, trace 2nd moment = %e\n",
                      i, 0.0, traceBn[i][2]);
        }
        **/
}

EXPORT void Construct_Mag_field_on_dual_cell_P2(
        Front              *fr,
        Mid_soln           *midsoln,
        Dual_cell_Mid_soln *dual_cell_midsoln,
        int                rk_step,
        int                total_cell)
{
        SURFACE    **surf;
        POLYGON    *polyg;
        double     x_crds[20], y_crds[20], *cent;
        int        i, j, k, By_offset, tmpk, N_sides;
        POINT      *p0, *p1, *pc;
        double     phi0[20][3], phi1[20][3], phi2[20][3];
        double     **tmpA, **tmpB, rsideB[30], rsideA[30];
        double     len, t[3], nor[3], Bn[20][N_COEF_EDGE];
        int        M, N, P, LDA, LDB, l, LWORK, INFO;
        double     AA[1800], BB[1800], CC[800], DD[800], XX[800], work[8000], tmp;
        double     B_cent[3];
        int        debug = NO;
        double     val[45], sqrt_area, *tri_cent, vertB[3];
        double     ***conformal_basis_tri, econ_u[10];
        double     dx, dy, x_bar, y_bar, one_3rd, two_3rd;
        TRI        **tris;
        Locstate   sts[20];
        static double **outputB, **outputA = NULL;

        one_3rd = 1.0/3.0; two_3rd = 2.0/3.0;

        if(NULL == outputA)
        {
           matrix(&outputA,40,40,sizeof(double));
           matrix(&outputB,40,40,sizeof(double));
        }

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (polyg = first_polyg(*surf); !at_end_of_polyg_list(polyg,*surf);
                 polyg = polyg->next )
            {
                if(NO == polyg->closed) continue;

                Bn_at_dual_cell_edges(polyg,midsoln,dual_cell_midsoln,rk_step,Bn);

                // By_offset: offset for where eqns for Y-component of B starts.
                By_offset = polyg->n_sides+1;
                N_sides = polyg->n_sides;

                /// M: number of least square eqn
                /// N: number of unknown;
                /// P: number of constraint eqns, which are satisfied exactly.

                //M: number of slopes of Bn on edges + 
                //          number of 2nd moments of Bn on edges resulting from bubble functions
                //          + mean value of B at dual cell centroid
                M = polyg->n_sides + 2;
                //M: number of slopes of Bn on edges + 
                //          number of 2nd moments of Bn on edges resulting from bubble functions
                //          + mean value of B at dual cell centroid
                //M = 2*polyg->n_sides + 2;
                //N: = D.O.F, which are nodal values of X- and Y- components of B and 6 bubble functions
                N = 2*(polyg->n_sides+1) + 6;
                // N = 2*(polyg->n_sides+1) + 2;  
                //P: = divergence-free conditions of P1 on each triangular sub-region of the dual cell + 
                //     (n_sides-1) average values of Bn evolved on dual cell edges + 2
                //     divergence-free conditions of quadratic bubble functions 
                P = polyg->n_sides + (polyg->n_sides-1) + 2;
                // P = polyg->n_sides + (polyg->n_sides-1);
                if(dual_cell_sten_2[polyg->id].Bsten_set == NO)
                {
                    dual_cell_sten_2[polyg->id].Bsten_set = YES;
                    matrix(&tmpA,2*M,N,sizeof(double));
                    matrix(&tmpB,P+2,N,sizeof(double)); // have extra line to store one "un-needed" constraint
                                                       // eqn. It is un-needed because of imposing divergence-free
                                                       // condition on all triangular sub-regions of the dual cell
                    dual_cell_sten_2[polyg->id].MB_A = tmpA;
                    dual_cell_sten_2[polyg->id].MB_B = tmpB;

                    for(i = 0; i < M; i++)
                    {
                        for(j = 0; j < N; j++)
                            tmpA[i][j] = 0.0;
                    }
                    for(i = 0; i < P+1; i++)
                    {
                        for(j = 0; j < N; j++)
                            tmpB[i][j] = 0.0;
                    }

                    conformal_basis_tri = polyg->conformal_basis_tri;
                    for(i = 0; i < polyg->n_sides; i++)
                    {
                        p0 = Point_of_polyg(polyg)[i];
                        p1 = Point_of_polyg(polyg)[(i+1)%N_sides];
                        x_crds[1] = Coords(p0)[0];
                        y_crds[1] = Coords(p0)[1];

                        x_crds[2] = Coords(p1)[0];
                        y_crds[2] = Coords(p1)[1];

                    }

                } /// END::: if(dual_cell_sten_2[polyg->id].Bsten_set == NO)
                else
                {
                }

               
            } /// END::: for (polyg = first_polyg(*surf); ...)
        }/// END::: for(surf = fr->mesh->surfaces; surf && *surf; surf++)
          
}

LOCAL POLYGON *tmp_construct_rect()
{
	double    coords[4][3] = {{0.0,0.0,0.0},
                                  {1.0,0.0,0.0},
                                  {1.0,1.0,0.0},
                                  {0.0,1.0,0.0}
                                 };
        double    center[3] = {0.5, 0.5, 0.0};
        POINT     *pt[30];
        POLYGON   *polyg;
        double     x_crds[20], y_crds[20], *cent;
        int        i, j, k, By_offset, tmpk, N_sides;
        POINT      *p0, *p1, *pc;
        double     phi0[20][3], phi1[20][3], phi2[20][3];
        double     **tmpA, **tmpB, rsideB[30], rsideA[30], Bbar[4] = {-0.5, 1.0, 0.5, -1.0};
        double     B_1stm[4] = {0.0, 0.0, 0.0, 0.0};
        double     B_2ndm[4] = {0.0, 0.0, 0.0, 0.0};

        double     len, t[3], nor[3], Bn[20][N_COEF_EDGE];
        int        M, N, P, LDA, LDB, l, LWORK, INFO;
        double     AA[1800], BB[1800], CC[800], DD[800], XX[800], work[8000], tmp;
        double     B_cent[3];
        int        debug = YES;
        double     val[45], sqrt_area, *tri_cent, vertB[3];
        double     ***conformal_basis_tri, econ_u[10];
        double     dx, dy, x_bar, y_bar, one_3rd, two_3rd;
        TRI        **tris;
        Locstate   sts[20];
        static double **outputB, **outputA = NULL;


        B_cent[0] = 1.0; B_cent[1] = 0.5;

        one_3rd = 1.0/3.0; two_3rd = 2.0/3.0;

        for(i = 0; i < 4; i++)
        {
            pt[i] = Point(coords[i]);
        }

        polyg = i_make_polygon(pt, 4, center, YES);
        tri_array(&(polyg->conformal_basis_tri),
                4, C0_MAX_N_COEF, C0_MAX_N_COEF, sizeof(double));
        comp_C0_conformal_basis_polygon(polyg);

        for(i = 0; i < 4; i++)
        {
            Bn[i][0] = Bbar[i];
            Bn[i][1] = B_1stm[i];
            Bn[i][2] = B_2ndm[i];
        }

        //// TMP
        {
            x_crds[0] = 0.0; x_crds[1] = 1.0; x_crds[2] = 0.0;
            y_crds[0] = 0.0; y_crds[1] = 0.0; y_crds[2] = 1.0;
            C0_conformal_basis_tri(x_crds,y_crds,phi0[0],phi1[0],phi2[0]);

            printf("\n\nTRI 0 +++++++++++++++++++++++++++++\n");
            printf("node 0(%g, %g)\n", x_crds[0], y_crds[0]);
            printf("node 1(%g, %g)\n", x_crds[1], y_crds[1]);
            printf("node 2(%g, %g)\n", x_crds[2], y_crds[2]);

            print_general_vector("nodal_ceof 0", phi0[0], 3,"\n");
            print_general_vector("nodal_ceof 1", phi1[0], 3,"\n");
            print_general_vector("nodal_ceof 2", phi2[0], 3,"\n");
            printf("\nEND::::TRI 0 +++++++++++++++++++++++++++++\n");

            x_crds[0] = 1.0; x_crds[1] = 1.0; x_crds[2] = 0.0;
            y_crds[0] = 0.0; y_crds[1] = 1.0; y_crds[2] = 1.0;
            C0_conformal_basis_tri(x_crds,y_crds,phi0[1],phi1[1],phi2[1]);

            printf("\n\nTRI 1 +++++++++++++++++++++++++++++\n");
            printf("node 0(%g, %g)\n", x_crds[0], y_crds[0]);
            printf("node 1(%g, %g)\n", x_crds[1], y_crds[1]);
            printf("node 2(%g, %g)\n", x_crds[2], y_crds[2]);

            print_general_vector("nodal_ceof 0", phi0[1], 3,"\n");
            print_general_vector("nodal_ceof 1", phi1[1], 3,"\n");
            print_general_vector("nodal_ceof 2", phi2[1], 3,"\n");
            printf("\nEND::::TRI 1 +++++++++++++++++++++++++++++\n");
        }
        printf("\n\n-------------Begin of rectangle-----------\n\n");
        //// END::: TMP

        if(NULL == outputA)
        {
           matrix(&outputA,40,40,sizeof(double));
           matrix(&outputB,40,40,sizeof(double));
        }

               cent = polyg_centroid(polyg);
               x_crds[0] = cent[0];
               y_crds[0] = cent[1];
               // By_offset: offset for where eqns for Y-component of B starts.
               By_offset = polyg->n_sides+1;
               N_sides = polyg->n_sides;

               /// M: number of least square eqn
               /// N: number of unknown;
               /// P: number of constraint eqns, which are satisfied exactly.

               //M: number of slopes of Bn on edges + 
               //          number of 2nd moments of Bn on edges resulting from bubble functions
               //          + mean value of B at dual cell centroid
               // M = polyg->n_sides + 2;
               //M: number of slopes of Bn on edges + 
               //          number of 2nd moments of Bn on edges resulting from bubble functions
               //          + mean value of B at dual cell centroid
               M = 2*polyg->n_sides + 2;
               // M = 2*(polyg->n_sides); // Do not count B at dual cell centroid at this point.
               //N: = D.O.F, which are nodal values of X- and Y- components of B and 6 bubble functions
               // N = 2*(polyg->n_sides+1) + 6;
               N = 2*(polyg->n_sides+1) + 2;  
               //P: = divergence-free conditions of P1 on each triangular sub-region of the dual cell + 
               //     (n_sides-1) average values of Bn evolved on dual cell edges + 
               //     2 divergence-free conditions of quadratic bubble functions 
               // P = polyg->n_sides + (polyg->n_sides-1) + 2;
               P = (polyg->n_sides-1) + (polyg->n_sides-1); // temporary
               // P = polyg->n_sides + (polyg->n_sides-1);


               matrix(&tmpA,2*M+2,N+6,sizeof(double));
               matrix(&tmpB,P+2,N+6,sizeof(double)); // have extra line to store one "un-needed" constraint
                                                   // eqn. It is un-needed because of imposing divergence-free
                                                   // condition on all triangular sub-regions of the dual cell

               for(i = 0; i < M; i++)
               {
                   for(j = 0; j < N; j++)
                       tmpA[i][j] = 0.0;
               }
               for(i = 0; i < P+1; i++)
               {
                   for(j = 0; j < N; j++)
                       tmpB[i][j] = 0.0;
               }

               conformal_basis_tri = polyg->conformal_basis_tri;


                   for(i = 0; i < polyg->n_sides; i++)
                   {
                       p0 = Point_of_polyg(polyg)[i];
                       p1 = Point_of_polyg(polyg)[(i+1)%N_sides];
                       x_crds[1] = Coords(p0)[0];
                       y_crds[1] = Coords(p0)[1];

                       x_crds[2] = Coords(p1)[0];
                       y_crds[2] = Coords(p1)[1];

                       dx = 0.5*(x_crds[2] - x_crds[1]);
                       dy = 0.5*(y_crds[2] - y_crds[1]);
                       x_bar = 0.5*(x_crds[2] + x_crds[1]);
                       y_bar = 0.5*(y_crds[2] + y_crds[1]);

                       printf("\nside[%d], xbar = %g, ybar = %g, dx = %g, dy = %g\n", i,
                                  x_bar, y_bar, dx, dy);

                       for(tmpk = 0; tmpk < C0_MAX_N_COEF; tmpk++)
                           phi0[i][tmpk] = conformal_basis_tri[i][0][tmpk];
                       for(tmpk = 0; tmpk < C0_MAX_N_COEF; tmpk++)
                           phi1[i][tmpk] = conformal_basis_tri[i][1][tmpk];
                       for(tmpk = 0; tmpk < C0_MAX_N_COEF; tmpk++)
                           phi2[i][tmpk] = conformal_basis_tri[i][2][tmpk];

                       if(debug == YES)
                       {
                           printf("************* sub-region %d nodal basis:\n", i);
                           printf("node 0(%g, %g)\n", x_crds[0], y_crds[0]);
                           printf("node 1(%g, %g)\n", x_crds[1], y_crds[1]);
                           printf("node 2(%g, %g)\n", x_crds[2], y_crds[2]);

                           print_general_vector("nodal_ceof 0", phi0[i], 3,"\n");
                           print_general_vector("nodal_ceof 1", phi1[i], 3,"\n");
                           print_general_vector("nodal_ceof 2", phi2[i], 3,"\n");
                           val[0] = phi0[i][0] + x_crds[0]*phi0[i][1] + y_crds[0]*phi0[i][2];
                           val[1] = phi0[i][0] + x_crds[1]*phi0[i][1] + y_crds[1]*phi0[i][2];
                           val[2] = phi0[i][0] + x_crds[2]*phi0[i][1] + y_crds[2]*phi0[i][2];
                           printf("nodal values for basis 0: %g, %g, %g\n", val[0], val[1], val[2]);

                           val[0] = phi1[i][0] + x_crds[0]*phi1[i][1] + y_crds[0]*phi1[i][2];
                           val[1] = phi1[i][0] + x_crds[1]*phi1[i][1] + y_crds[1]*phi1[i][2];
                           val[2] = phi1[i][0] + x_crds[2]*phi1[i][1] + y_crds[2]*phi1[i][2];
                           printf("nodal values for basis 1: %g, %g, %g\n", val[0], val[1], val[2]);

                           val[0] = phi2[i][0] + x_crds[0]*phi2[i][1] + y_crds[0]*phi2[i][2];
                           val[1] = phi2[i][0] + x_crds[1]*phi2[i][1] + y_crds[1]*phi2[i][2];
                           val[2] = phi2[i][0] + x_crds[2]*phi2[i][1] + y_crds[2]*phi2[i][2];
                           printf("nodal values for basis 2: %g, %g, %g\n", val[0], val[1], val[2]);
                           printf("\n\n");
                       }

                       if(i != (N_sides-1))
                       {
                           /// Constraint equation for divergence-free for P1 nodal basis functions on 
                           /// each triangular sub-region of the dual cell
                           tmpB[i+N_sides-1][i] = phi1[i][1];       // coeff. of Bx on node i for ith eqn
                           tmpB[i+N_sides-1][(i+1)%N_sides] = phi2[i][1];     // coeff. of Bx on node i+1 for ith eqn
                           tmpB[i+N_sides-1][N_sides] = phi0[i][1];  /// coeff. of Bx on centroid for ith eqn
                           tmpB[i+N_sides-1][i+By_offset] = phi1[i][2];     /// coeff. of By on node i for ith eqn
                           tmpB[i+N_sides-1][(i+1)%N_sides+By_offset] = phi2[i][2];   /// coeff. of By on node i+1 for ith eqn
                           tmpB[i+N_sides-1][N_sides+By_offset] = phi0[i][2]; /// coeff. of By on centroid for ith eqn

                           rsideB[i+N_sides-1] = 0.0;
                       }

                       /// The trace of normal component of magnetic field 
                       /// supported on dual cell  must be identical to 
                       /// the Bn evolved on the dual cell edge in the integral sense. 
                       /// Definition of trace of local basis functions are given in Pg 433.
                       /// of An Intro. to the Finite Element Method, 3rd Ed, J.N. Reddy

                       /// Constraint part: The average of trace must be equal to avg of Bn 
                       ///                  evolved on the dual cell edge.
                       len = polyg_length_side(polyg)[i];
                       for(k = 0; k < 2; k++)
                           t[k] = polyg_side_vector(polyg)[i][k];
                       nor[0] = t[1]; nor[1] = -t[0];

                       if(debug == YES)
                       {
                           printf("edge normal = [%g, %g]\n\n", nor[0], nor[1]);
                       }

                       if(i != (N_sides-1))
                       {
                       // DO NOT NEED to impose conservation of Bn on the last edge BECAUSE of
                       // divergence-free conditions. 

                           // coeff. of Bx on node i for (i+n_sides)th eqn
                           tmpB[i][i] = 0.5*nor[0];
                           // coeff. of Bx on node (i+1) for (i+n_sides)th eqn
                           tmpB[i][(i+1)%N_sides] = 0.5*nor[0];
                           // coeff. of By on node i for (i+n_sides)th eqn
                           tmpB[i][i+By_offset] = 0.5*nor[1];
                           // coeff. of By on node (i+1) for (i+n_sides)th eqn
                           tmpB[i][(i+1)%N_sides+By_offset] = 0.5*nor[1];

                           // contribution from bubble functions (y^2, x^2)^T for traces
                           tmpB[i][2*By_offset] = (sqr(y_bar) + one_3rd*sqr(dy))*nor[0];
                           tmpB[i][2*By_offset+1] = (sqr(x_bar) + one_3rd*sqr(dx))*nor[1];

                           // contribution from bubble functions for traces
                           // Orders of coefficients of bubble function: Bx = a_3 x^2 + a_4 xy + a_5 y^2;
                           // Then By = b_3 x^2 + b_4 xy + b_5 y^2
                           // a_3: 2*By_offset
                           // a_4: 2*By_offset + 1
                           // a_5: 2*By_offset + 2
                           // b_3: 2*By_offset + 3
                           // b_4: 2*By_offset + 4
                           // b_5: 2*By_offset + 5
                           // a_3
                           // tmpB[i][2*By_offset]   = (sqr(x_bar) + one_3rd*sqr(dx))*nor[0];
                           // a_4
                           // tmpB[i][2*By_offset+1] = (x_bar*y_bar + one_3rd*dx*dy )*nor[0];
                           // a_5
                           // tmpB[i][2*By_offset+2] = (sqr(y_bar) + one_3rd*sqr(dy))*nor[0];
                           // b_3
                           // tmpB[i][2*By_offset+3] = (sqr(x_bar) + one_3rd*sqr(dx))*nor[1];
                           // b_4
                           // tmpB[i][2*By_offset+4] = (x_bar*y_bar + one_3rd*dx*dy )*nor[1];
                           // b_5
                           // tmpB[i][2*By_offset+5] = (sqr(y_bar) + one_3rd*sqr(dy))*nor[1];

                           // printf("\ncontribution to quadratic of Bn (%g, %g, %g, %g, %g, %g)\n",
                           //           tmpB[i][2*By_offset], tmpB[i][2*By_offset+1],  tmpB[i][2*By_offset+2], 
                           //           tmpB[i][2*By_offset+3], tmpB[i][2*By_offset+4], tmpB[i][2*By_offset+5]);
                           // printf("----------------------------\n");

                           rsideB[i] = Bn[i][0];
                       } // END::: if(i != (N_sides-1))

                       //// least square part (1):
                       //// The slope of the trace is equal to slope of Bn
                       //// evolved on the dual cell edge 
                       //// in the least square sense.
                       /// Contributions of nodal basis functions to trace functions 
                       tmpA[i][i] = -0.5*nor[0];
                       tmpA[i][(i+1)%N_sides] = 0.5*nor[0];
                       tmpA[i][i+By_offset] = -0.5*nor[1];
                       tmpA[i][(i+1)%N_sides+By_offset] = 0.5*nor[1];

                       // contribution from bubble functions  (y^2, x^2)^T for the slope of the trace
                       tmpA[i][2*By_offset] = 2.0*nor[0]*y_bar*dy;
                       tmpA[i][2*By_offset+1] = 2.0*nor[1]*x_bar*dx;

                       // contribution to slope of trace from a_3 associated term
                       // tmpA[i][2*By_offset] = 2.0*nor[0]*x_bar*dx;
                       // contribution to slope of trace from a_4 associated term
                       // tmpA[i][2*By_offset+1] = nor[0]*(x_bar*dy + y_bar*dx);
                       // contribution to slope of trace from a_5 associated term
                       // tmpA[i][2*By_offset+2] = 2.0*nor[0]*y_bar*dy;

                       // contribution to slope of trace from b_3 associated term
                       // tmpA[i][2*By_offset+3] = 2.0*nor[1]*x_bar*dx;
                       // contribution to slope of trace from b_4 associated term
                       // tmpA[i][2*By_offset+4] = nor[1]*(x_bar*dy + y_bar*dx);
                       // contribution to slope of trace from b_5 associated term
                       // tmpA[i][2*By_offset+5] = 2.0*nor[1]*y_bar*dy;

                       rsideA[i] = Bn[i][1];

                       //// least square part (2):
                       //// The 2nd moment of the trace resulting from bubble functions  (y^2, x^2)^T = 0.0
                       //// in the least square sense. 
                       tmpA[i+N_sides][2*By_offset] = two_3rd*(nor[0]*sqr(dy));
                       tmpA[i+N_sides][2*By_offset+1] = two_3rd*(nor[1]*sqr(dx));

                       // contribution to 2nd moment of trace from a_3 associated term
                       // tmpA[i+N_sides][2*By_offset] = two_3rd*nor[0]*sqr(dx);
                       // contribution to 2nd moment of trace from a_4 associated term
                       // tmpA[i+N_sides][2*By_offset+1] = two_3rd*nor[0]*dx*dy;
                       // contribution to 2nd moment of trace from a_5 associated term
                       // tmpA[i+N_sides][2*By_offset+2] = two_3rd*nor[0]*sqr(dy);

                       // contribution to 2nd moment of trace from b_3 associated term
                       // tmpA[i+N_sides][2*By_offset+3] = two_3rd*nor[1]*sqr(dx);
                       // contribution to 2nd moment of trace from b_4 associated term
                       // tmpA[i+N_sides][2*By_offset+4] = two_3rd*nor[1]*dx*dy;
                       // contribution to 2nd moment of trace from b_5 associated term
                       // tmpA[i+N_sides][2*By_offset+5] = two_3rd*nor[1]*sqr(dy);

                       rsideA[i+N_sides] = 0.0;
                   } //// END:::: for(i = 0; i < polyg->n_sides; i++)


                   // Constraint Part again: 
                   // Divergence-free condition for bubble functions.
                   // which are (2*a_3 + b_4)x + (a_4 + 2 b_5)y = 0.0
                   // Positions of these coefficients in the unknown variable vector.
                           // a_3: 2*By_offset
                           // a_4: 2*By_offset + 1
                           // a_5: 2*By_offset + 2
                           // b_3: 2*By_offset + 3
                           // b_4: 2*By_offset + 4
                           // b_5: 2*By_offset + 5
                   // tmpB[2*N_sides-1][2*By_offset]   = 2.0;
                   // tmpB[2*N_sides-1][2*By_offset+4]   = 1.0;
                   // tmpB[2*N_sides][2*By_offset+1]   = 1.0;
                   // tmpB[2*N_sides][2*By_offset+5]   = 2.0;
                   // rsideB[2*N_sides-1] = 0.0; // RHS for 2a_3 + b_4 = 0.0
                   // rsideB[2*N_sides] = 0.0;   // RHS for a_4 + 2b_5 = 0.0

                   /// This is to match B value at centroid of dual cell 
                   /// with B value at this location from triangles
                   /// in least square sense.
                   // Nodal based part for X-component of B at centroid.
                   tmpA[2*N_sides][N_sides] = 1.0;
                   // Nodal based part for Y-component of B at centroid.
                   tmpA[2*N_sides+1][N_sides+By_offset] = 1.0;

                   /// contributions of the bubble functions (y^2, x^2)^T to nodal values of 
                   /// B at centroid. 
                   tmpA[2*N_sides][2*By_offset] = sqr(cent[1]);
                   tmpA[2*N_sides+1][2*By_offset+1] = sqr(cent[0]);

                   /// contributions of the bubble functions to nodal values of 
                   /// X-component of B at centroid. 
                   // a_3
                   // tmpA[2*N_sides][2*By_offset] = sqr(cent[0]);
                   // a_4
                   // tmpA[2*N_sides][2*By_offset+1] = cent[0]*cent[1];
                   // a_5
                   // tmpA[2*N_sides][2*By_offset+2] = sqr(cent[1]);

                   /// contributions of the bubble functions to nodal values of 
                   /// Y-component of B at centroid. 
                   // b_3
                   // tmpA[2*N_sides+1][2*By_offset+3] = sqr(cent[0]);
                   // b_4
                   // tmpA[2*N_sides+1][2*By_offset+4] = cent[0]*cent[1];
                   // b_5
                   // tmpA[2*N_sides+1][2*By_offset+5] = sqr(cent[1]);

                   rsideA[2*N_sides] = B_cent[0];
                   rsideA[2*N_sides+1] = B_cent[1];

                   print_matrix("tmpB", P, N, tmpB, "%20.18g ");
                   print_matrix("tmpA", M, N, tmpA, "%20.18g ");

                   print_general_vector("RHS_B", rsideB, P,"\n");
                   print_general_vector("RHS_A", rsideA, M,"\n");

               LDA = M; LDB = P; LWORK = M+N+P;

               l = 0;
               for(j = 0; j < N; j++)
               {
                   for(i = 0; i < M; i++)
                   {
                       AA[l] = tmpA[i][j];
                       l++;
                   }
               }
               l = 0;
               for(j = 0; j < N; j++)
               {
                   for(i = 0; i < P; i++)
                   {
                       BB[l] = tmpB[i][j];
                       l++;
                   }
               }
               for(i = 0; i < M; i++)  // right side for the least square part
                   CC[i] = rsideA[i];
               for(i = 0; i < P; i++)  // right side for the constrained part
                   DD[i] = rsideB[i];
               FORTRAN_NAME(dgglse)(&M, &N, &P, AA, &LDA, ///// int M,int N,int P, double array A, int LDA
                                  BB, &LDB, CC, DD, XX, ///// double array B, int LDB, double array C, D, X,
                                  work, &LWORK, &INFO);

               if(INFO != 0)
               {
                   printf("WARNING: tmp_construct_rect()\n");
                   printf("dgglse returns %d\n", INFO);
               }

               printf("\n\n &&&&&& Solution Nodal B values:\n");
               for(i = 0; i < polyg->n_sides+1; i++)
               {
                   printf("Node %d: B = %e, %e\n", i, XX[i], XX[i+By_offset]);
               }
               printf("\n");
               printf("coef. a3, a4, a5 of bubble functions: %e %e %e\n",
                            XX[2*By_offset], XX[2*By_offset+1], XX[2*By_offset+2]);
               printf("coef. b3, b4, b5 of bubble functions: %e %e %e\n",
                            XX[2*By_offset+3], XX[2*By_offset+4], XX[2*By_offset+5]);
               printf("2a3 + b4 = %e, a4+2b5 = %e\n",
                      2.0*XX[2*By_offset] + XX[2*By_offset+4],
                      XX[2*By_offset+1] + 2.0*XX[2*By_offset+5]);
               printf("\n");

}

/// On each dual cell, a C0 conformal P1 FE approximation is constructed.
/// Global nodes are labeled in the following order: vertices in counter-clock-wise, centriod  
/// D.O.F are labeled in the following order: first X-component of B at vertices; 
///                                           then Y-component of B at vertices 
/// First few Legendre polynomials are:
/// P_0(x) = 1,
/// P_1(x) = x,
/// P_2(x) = 0.5*(3x^2-1),
/// P_3(x) = 0.5*(5x^3-3x);     x in[-1, 1].
/// For P1 reconstruction, D.O.F, which are nodal values of X- and Y- components of B 
/// and 2 bubble functions are y^2 for X-component of B and x^2 for Y-component of B.
EXPORT void Construct_Mag_field_on_dual_cell_P1(
        Front              *fr,
        Mid_soln           *midsoln,
        Dual_cell_Mid_soln *dual_cell_midsoln,
        int                rk_step, 
        int                total_cell)
{
	SURFACE    **surf;
        POLYGON    *polyg;
        double     x_crds[20], y_crds[20], *cent;
        int        i, j, k, By_offset, tmpk, N_sides;
        POINT      *p0, *p1, *pc;
        double     phi0[20][3], phi1[20][3], phi2[20][3]; 
        double     **tmpA, **tmpB, rsideB[30], rsideA[30];
        double     len, t[3], nor[3], Bn[20][N_COEF_EDGE];
        int        M, N, P, LDA, LDB, l, LWORK, INFO;
        double     AA[1800], BB[1800], CC[800], DD[800], XX[800], work[10000], tmp;
        double     B_cent[3];
        int        debug = NO;
        double     val[45], sqrt_area, *tri_cent, vertB[3]; 
        double     ***conformal_basis_tri, econ_u[10];
        double     dx, dy, x_bar, y_bar, one_3rd, two_3rd;
        TRI        **tris;
        Locstate   sts[20];
        static double **outputB, **outputA = NULL;

        one_3rd = 1.0/3.0; two_3rd = 2.0/3.0;

        // printf("\n\n++++++++++++++++++++++++++++++++++++++++++\n\n");
        // printf("WARNING: In Construct_Mag_field_on_dual_cell_P1(), test rectangle\n");
        // tmp_construct_rect();
        // printf("\n\n++++++++++++++++++++++++++++++++++++++++++\n\n");
        // printf("WARNING: In Construct_Mag_field_on_dual_cell_P1(), exit after tmp_construct_rect()\n");
        // clean_up(0);

        if(NULL == outputA)
        {
           matrix(&outputA,40,40,sizeof(double));
           matrix(&outputB,40,40,sizeof(double));
        }

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (polyg = first_polyg(*surf); !at_end_of_polyg_list(polyg,*surf);
                 polyg = polyg->next )
            {
               if(NO == polyg->closed) continue;

               //// TMP
               if(polyg->id == 447)
               {
                   debug = YES;
                   printf("\n\n-------------------------------------------\n");
                   printf("Dual cell %d, reconstruct B\n\n", polyg->id);
                   // print_polyg_crds(polyg);
                   gview_output_polygon("visual", 0, polyg, "polygon447",NULL);
                   printf("-----------------------------------\n\n");
               }
               else
                   debug = NO;

               Bn_at_dual_cell_edges(polyg,midsoln,dual_cell_midsoln,rk_step,Bn);
               B_at_dual_cell_cent(polyg,fr,midsoln,rk_step,B_cent);

               //// TMP
               if(debug == YES)
               {
                   printf("Bn on dual cell edges:\n");
                   for(i = 0; i < polyg->n_sides; i++)
                   {
                       printf("Side %d: ", i);
                       for(j = 0; j < N_COEF_EDGE; j++)
                           printf("%e ", Bn[i][j]);
                       printf("\n");
                   }

                   tris = tri_at_polyg_vert(polyg);
                   if(rk_step == RK_STEP)
                   {
                       for(i = 0; i < polyg->n_sides; i++)
                           sts[i] = tris[i]->st;
                   }
                   else
                   {
                       for(i = 0; i < polyg->n_sides; i++)
                           sts[i] = midsoln[tris[i]->id].st[rk_step];
                   }

                   for(i = 0; i < polyg->n_sides; i++)
                   {
                       tri_cent = fg_centroid(tris[i]);
                       sqrt_area = sqrt(fg_area(tris[i]));

                       for(j = 0; j < MAX_N_COEF; j++)
                           val[j] = vh_val_loc_div_free_basis(tri_cent,tri_cent,sqrt_area,j);

                       vertB[0] = vertB[1] = 0.0;
                       for(j = 0; j < MAX_N_COEF; j++)
                           vertB[0] += dg_B(sts[i])[0][j]*val[j];
                       for(j = 0; j < MAX_N_COEF; j++)
                           vertB[1] += dg_B(sts[i])[1][j]*val[j];
                       printf("B at dual-cell vert[%d]: %e, %e\n", i, vertB[0], vertB[1]);
                   }
                   printf("B at dual cell cent: %e, %e\n\n", B_cent[0], B_cent[1]); 
               } /// END::: if(debug == YES)
               
               cent = polyg_centroid(polyg);
               x_crds[0] = cent[0]; 
               y_crds[0] = cent[1]; 
               // By_offset: offset for where eqns for Y-component of B starts.
               By_offset = polyg->n_sides+1;
               N_sides = polyg->n_sides;

               /// M: number of least square eqn
               /// N: number of unknown;
               /// P: number of constraint eqns, which are satisfied exactly.

               //M: number of slopes of Bn on edges + 
               //          number of 2nd moments of Bn on edges resulting from bubble functions
               //          + mean value of B at dual cell centroid
               // M = polyg->n_sides + 2;
               //M: number of slopes of Bn on edges + 
               //          number of 2nd moments of Bn on edges resulting from bubble functions
               //          + mean value of B at dual cell centroid
               // M = 2*polyg->n_sides + 2;
               M = 2*(polyg->n_sides); // Do not count B at dual cell centroid at this point.
               //N: = D.O.F, which are nodal values of X- and Y- components of B and 6 bubble functions
               N = 2*(polyg->n_sides+1) + 6;  
               // N = 2*(polyg->n_sides+1) + 2;  
               //P: = divergence-free conditions of P1 on each triangular sub-region of the dual cell + 
               //     (n_sides-1) average values of Bn evolved on dual cell edges + 
               //     2 divergence-free conditions of quadratic bubble functions 
               P = polyg->n_sides + (polyg->n_sides-1) + 2;
               // P = polyg->n_sides + (polyg->n_sides-1);

               if(dual_cell_sten_2[polyg->id].Bsten_set == NO)
               {
                   dual_cell_sten_2[polyg->id].Bsten_set = YES;
                   matrix(&tmpA,2*M,N,sizeof(double)); 
                   matrix(&tmpB,P+2,N,sizeof(double)); // have extra line to store one "un-needed" constraint
                                                       // eqn. It is un-needed because of imposing divergence-free
                                                       // condition on all triangular sub-regions of the dual cell
                   dual_cell_sten_2[polyg->id].MB_A = tmpA;
                   dual_cell_sten_2[polyg->id].MB_B = tmpB;

                   for(i = 0; i < M; i++)
                   {
                       for(j = 0; j < N; j++)
                           tmpA[i][j] = 0.0;
                   }
                   for(i = 0; i < P+1; i++)
                   {
                       for(j = 0; j < N; j++)
                           tmpB[i][j] = 0.0;
                   }

                   conformal_basis_tri = polyg->conformal_basis_tri;

                   for(i = 0; i < polyg->n_sides; i++)
                   {
                       p0 = Point_of_polyg(polyg)[i];    
                       p1 = Point_of_polyg(polyg)[(i+1)%N_sides];    
                       x_crds[1] = Coords(p0)[0]; 
                       y_crds[1] = Coords(p0)[1]; 

                       x_crds[2] = Coords(p1)[0]; 
                       y_crds[2] = Coords(p1)[1]; 

                       dx = 0.5*(x_crds[2] - x_crds[1]);
                       dy = 0.5*(y_crds[2] - y_crds[1]);
                       x_bar = 0.5*(x_crds[2] + x_crds[1]);
                       y_bar = 0.5*(y_crds[2] + y_crds[1]);

                       // C0_conformal_basis_tri(x_crds,y_crds,phi0[i],phi1[i],phi2[i]);
                       for(tmpk = 0; tmpk < C0_MAX_N_COEF; tmpk++)
                           phi0[i][tmpk] = conformal_basis_tri[i][0][tmpk];
                       for(tmpk = 0; tmpk < C0_MAX_N_COEF; tmpk++)
                           phi1[i][tmpk] = conformal_basis_tri[i][1][tmpk];
                       for(tmpk = 0; tmpk < C0_MAX_N_COEF; tmpk++)
                           phi2[i][tmpk] = conformal_basis_tri[i][2][tmpk];
                       
                       if(debug == YES)
                       {
                           printf("************* sub-region %d nodal basis:\n", i);
                           print_general_vector("nodal 0", phi0[i], 3,"\n");
                           print_general_vector("nodal 1", phi1[i], 3,"\n");
                           print_general_vector("nodal 2", phi2[i], 3,"\n");
                           val[0] = phi0[i][0] + x_crds[0]*phi0[i][1] + y_crds[0]*phi0[i][2];
                           val[1] = phi0[i][0] + x_crds[1]*phi0[i][1] + y_crds[1]*phi0[i][2];
                           val[2] = phi0[i][0] + x_crds[2]*phi0[i][1] + y_crds[2]*phi0[i][2];
                           printf("nodal values for basis 0: %g, %g, %g\n", val[0], val[1], val[2]);

                           val[0] = phi1[i][0] + x_crds[0]*phi1[i][1] + y_crds[0]*phi1[i][2];
                           val[1] = phi1[i][0] + x_crds[1]*phi1[i][1] + y_crds[1]*phi1[i][2];
                           val[2] = phi1[i][0] + x_crds[2]*phi1[i][1] + y_crds[2]*phi1[i][2];
                           printf("nodal values for basis 1: %g, %g, %g\n", val[0], val[1], val[2]);

                           val[0] = phi2[i][0] + x_crds[0]*phi2[i][1] + y_crds[0]*phi2[i][2];
                           val[1] = phi2[i][0] + x_crds[1]*phi2[i][1] + y_crds[1]*phi2[i][2];
                           val[2] = phi2[i][0] + x_crds[2]*phi2[i][1] + y_crds[2]*phi2[i][2];
                           printf("nodal values for basis 2: %g, %g, %g\n", val[0], val[1], val[2]);
                           printf("\n\n");
                       }

                       if(i != (N_sides-1))
                       {
                           /// Constraint equation for divergence-free for P1 nodal basis functions on 
                           /// each triangular sub-region of the dual cell
                           tmpB[i+N_sides][i] = phi1[i][1];       // coeff. of Bx on node i for ith eqn
                           tmpB[i+N_sides][(i+1)%N_sides] = phi2[i][1];     // coeff. of Bx on node i+1 for ith eqn
                           tmpB[i+N_sides][N_sides] = phi0[i][1];  /// coeff. of Bx on centroid for ith eqn
                           tmpB[i+N_sides][i+By_offset] = phi1[i][2];     /// coeff. of By on node i for ith eqn
                           tmpB[i+N_sides][(i+1)%N_sides+By_offset] = phi2[i][2];   /// coeff. of By on node i+1 for ith eqn
                           tmpB[i+N_sides][N_sides+By_offset] = phi0[i][2]; /// coeff. of By on centroid for ith eqn

                           rsideB[i+N_sides] = 0.0;
                       }

                       /// The trace of normal component of magnetic field 
                       /// supported on dual cell  must be identical to 
                       /// the Bn evolved on the dual cell edge in the integral sense. 
                       /// Definition of trace of local basis functions are given in Pg 433.
                       /// of An Intro. to the Finite Element Method, 3rd Ed, J.N. Reddy

                       /// Constraint part: The average of trace must be equal to avg of Bn 
                       ///                  evolved on the dual cell edge.
                       len = polyg_length_side(polyg)[i];
                       for(k = 0; k < 2; k++)
                           t[k] = polyg_side_vector(polyg)[i][k];
                       nor[0] = t[1]; nor[1] = -t[0];

                       if(debug == YES)
                       {
                           printf("edge normal = [%g, %g]\n\n", nor[0], nor[1]);
                       }

                       // if(i != (N_sides-1))
                       {
                       // DO NOT NEED to impose conservation of Bn on the last edge BECAUSE of
                       // divergence-free conditions. 

                           // coeff. of Bx on node i for (i+n_sides)th eqn
                           tmpB[i][i] = 0.5*nor[0]; 
                           // coeff. of Bx on node (i+1) for (i+n_sides)th eqn
                           tmpB[i][(i+1)%N_sides] = 0.5*nor[0]; 
                           // coeff. of By on node i for (i+n_sides)th eqn
                           tmpB[i][i+By_offset] = 0.5*nor[1]; 
                           // coeff. of By on node (i+1) for (i+n_sides)th eqn
                           tmpB[i][(i+1)%N_sides+By_offset] = 0.5*nor[1];

                           // contribution from bubble functions (y^2, x^2)^T for traces
                           // tmpB[i+N_sides][2*By_offset] = sqr(y_bar)*nor[0] + one_3rd*nor[0]*sqr(dy);
                           // tmpB[i+N_sides][2*By_offset+1] = sqr(x_bar)*nor[1] + one_3rd*nor[1]*sqr(dx);

                           // contribution from bubble functions for traces
                           // Orders of coefficients of bubble function: Bx = a_3 x^2 + a_4 xy + a_5 y^2;
                           // Then By = b_3 x^2 + b_4 xy + b_5 y^2
                           // a_3: 2*By_offset
                           // a_4: 2*By_offset + 1
                           // a_5: 2*By_offset + 2
                           // b_3: 2*By_offset + 3
                           // b_4: 2*By_offset + 4
                           // b_5: 2*By_offset + 5
                           // a_3
                           tmpB[i][2*By_offset]   = (sqr(x_bar) + one_3rd*sqr(dx))*nor[0]; 
                           // a_4
                           tmpB[i][2*By_offset+1] = (x_bar*y_bar + one_3rd*dx*dy )*nor[0]; 
                           // a_5
                           tmpB[i][2*By_offset+2] = (sqr(y_bar) + one_3rd*sqr(dy))*nor[0]; 
                           // b_3
                           tmpB[i][2*By_offset+3] = (sqr(x_bar) + one_3rd*sqr(dx))*nor[1]; 
                           // b_4
                           tmpB[i][2*By_offset+4] = (x_bar*y_bar + one_3rd*dx*dy )*nor[1]; 
                           // b_5
                           tmpB[i][2*By_offset+5] = (sqr(y_bar) + one_3rd*sqr(dy))*nor[1]; 
    
                           rsideB[i] = Bn[i][0];
                       } // END::: if(i != (N_sides-1))

                       //// least square part (1):
                       //// The slope of the trace is equal to slope of Bn
                       //// evolved on the dual cell edge 
                       //// in the least square sense.
                       /// Contributions of nodal basis functions to trace functions 
                       tmpA[i][i] = -0.5*nor[0];
                       tmpA[i][(i+1)%N_sides] = 0.5*nor[0];
                       tmpA[i][i+By_offset] = -0.5*nor[1];
                       tmpA[i][(i+1)%N_sides+By_offset] = 0.5*nor[1];
                       
                       // contribution from bubble functions  (y^2, x^2)^T for the slope of the trace
                       // tmpA[i][2*By_offset] = 2.0*nor[0]*y_bar*dy;
                       // tmpA[i][2*By_offset+1] = 2.0*nor[1]*x_bar*dx;

                       // contribution to slope of trace from a_3 associated term
                       tmpA[i][2*By_offset] = 2.0*nor[0]*x_bar*dx;
                       // contribution to slope of trace from a_4 associated term
                       tmpA[i][2*By_offset+1] = nor[0]*(x_bar*dy + y_bar*dx);
                       // contribution to slope of trace from a_5 associated term
                       tmpA[i][2*By_offset+2] = 2.0*nor[0]*y_bar*dy;

                       // contribution to slope of trace from b_3 associated term
                       tmpA[i][2*By_offset+3] = 2.0*nor[1]*x_bar*dx;
                       // contribution to slope of trace from b_4 associated term
                       tmpA[i][2*By_offset+4] = nor[1]*(x_bar*dy + y_bar*dx);
                       // contribution to slope of trace from b_5 associated term
                       tmpA[i][2*By_offset+5] = 2.0*nor[1]*y_bar*dy;

                       rsideA[i] = Bn[i][1];

                       //// least square part (2):
                       //// The 2nd moment of the trace resulting from bubble functions  (y^2, x^2)^T = 0.0
                       //// in the least square sense. 
                       // tmpA[i+N_sides][2*By_offset] = two_3rd*(nor[0]*sqr(dy));
                       // tmpA[i+N_sides][2*By_offset+1] = two_3rd*(nor[1]*sqr(dx));
                       // contribution to 2nd moment of trace from a_3 associated term
                       tmpA[i+N_sides][2*By_offset] = two_3rd*nor[0]*sqr(dx);
                       // contribution to 2nd moment of trace from a_4 associated term
                       tmpA[i+N_sides][2*By_offset+1] = two_3rd*nor[0]*dx*dy;
                       // contribution to 2nd moment of trace from a_5 associated term
                       tmpA[i+N_sides][2*By_offset+2] = two_3rd*nor[0]*sqr(dy);

                       // contribution to 2nd moment of trace from b_3 associated term
                       tmpA[i+N_sides][2*By_offset+3] = two_3rd*nor[1]*sqr(dx);
                       // contribution to 2nd moment of trace from b_4 associated term
                       tmpA[i+N_sides][2*By_offset+4] = two_3rd*nor[1]*dx*dy;
                       // contribution to 2nd moment of trace from b_5 associated term
                       tmpA[i+N_sides][2*By_offset+5] = two_3rd*nor[1]*sqr(dy);

                       rsideA[i+N_sides] = 0.0;
                   } //// END:::: for(i = 0; i < polyg->n_sides; i++)

                   // Constraint Part again: 
                   // Divergence-free condition for bubble functions.
                   // which are (2*a_3 + b_4)x + (a_4 + 2 b_5)y = 0.0
                   // Positions of these coefficients in the unknown variable vector.
                           // a_3: 2*By_offset
                           // a_4: 2*By_offset + 1
                           // a_5: 2*By_offset + 2
                           // b_3: 2*By_offset + 3
                           // b_4: 2*By_offset + 4
                           // b_5: 2*By_offset + 5
                   tmpB[2*N_sides-1][2*By_offset]   = 2.0; 
                   tmpB[2*N_sides-1][2*By_offset+4]   = 1.0; 
                   tmpB[2*N_sides][2*By_offset+1]   = 1.0; 
                   tmpB[2*N_sides][2*By_offset+5]   = 2.0; 
                   rsideB[2*N_sides-1] = 0.0; // RHS for 2a_3 + b_4 = 0.0
                   rsideB[2*N_sides] = 0.0;   // RHS for a_4 + 2b_5 = 0.0
                   
                   /// This is to match B value at centroid of dual cell 
                   /// with B value at this location from triangles
                   /// in least square sense.
                   // Nodal based part for X-component of B at centroid.
                   tmpA[2*N_sides][N_sides] = 1.0;
                   // Nodal based part for Y-component of B at centroid.
                   tmpA[2*N_sides+1][N_sides+By_offset] = 1.0;
                  
                   /// contributions of the bubble functions (y^2, x^2)^T to nodal values of 
                   /// B at centroid. 
                   // tmpA[2*N_sides][2*By_offset] = sqr(cent[1]);
                   // tmpA[2*N_sides+1][2*By_offset+1] = sqr(cent[0]);

                   /// contributions of the bubble functions to nodal values of 
                   /// X-component of B at centroid. 
                   // a_3
                   tmpA[2*N_sides][2*By_offset] = sqr(cent[0]);
                   // a_4
                   tmpA[2*N_sides][2*By_offset+1] = cent[0]*cent[1];
                   // a_5
                   tmpA[2*N_sides][2*By_offset+2] = sqr(cent[1]);

                   /// contributions of the bubble functions to nodal values of 
                   /// Y-component of B at centroid. 
                   // b_3
                   tmpA[2*N_sides+1][2*By_offset+3] = sqr(cent[0]);
                   // b_4
                   tmpA[2*N_sides+1][2*By_offset+4] = cent[0]*cent[1];
                   // b_5
                   tmpA[2*N_sides+1][2*By_offset+5] = sqr(cent[1]);

                   rsideA[2*N_sides] = B_cent[0];
                   rsideA[2*N_sides+1] = B_cent[1];
               } /// END::: if(dual_cell_sten_2[polyg->id].Bsten_set == NO)
               else
               {
                   tmpA = dual_cell_sten_2[polyg->id].MB_A;
                   tmpB = dual_cell_sten_2[polyg->id].MB_B;

                   printf("ERROR: Construct_Mag_field_on_dual_cell()\n");
                   printf("Need to revise rk != 1 part\n");
                   clean_up(ERROR);

                   for(i = 0; i < polyg->n_sides; i++)
                   {
                       rsideB[i] = 0.0;
                       rsideB[i+N_sides] = Bn[i][0];

                       rsideA[i] = Bn[i][1]; 
                       rsideA[i+N_sides] = 0.0;
                   }
                   rsideB[2*N_sides-1] = 0.0; // RHS for 2a_3 + b_4 = 0.0
                   rsideB[2*N_sides] = 0.0;   // RHS for a_4 + 2b_5 = 0.0

                   rsideA[2*N_sides] = B_cent[0];
                   rsideA[2*N_sides+1] = B_cent[1];
               }

               LDA = M; LDB = P; LWORK = M+N+P;

               l = 0;
               for(j = 0; j < N; j++)
               {
                   for(i = 0; i < M; i++)
                   {
                       AA[l] = tmpA[i][j];
                       l++;
                   }
               }
               l = 0;
               for(j = 0; j < N; j++)
               {
                   for(i = 0; i < P; i++)
                   {
                       BB[l] = tmpB[i][j];
                       l++;
                   }
               }
               for(i = 0; i < M; i++)  // right side for the least square part
                   CC[i] = rsideA[i];
               for(i = 0; i < P; i++)  // right side for the constrained part
                   DD[i] = rsideB[i];
               FORTRAN_NAME(dgglse)(&M, &N, &P, AA, &LDA, ///// int M,int N,int P, double array A, int LDA
                                  BB, &LDB, CC, DD, XX, ///// double array B, int LDB, double array C, D, X,
                                  work, &LWORK, &INFO);
               if(INFO != 0)
               {
                   printf("WARNING: Construct_Mag_field_on_dual_cell()\n");
                   printf("dgglse returns %d\n", INFO);
               }
               /***
               ///// TMP, just get accurate B values at nodal points.
               for(i = 0; i < polyg->n_sides; i++)
               {
                   p0 = Point_of_polyg(polyg)[i];    
                   mag_vort_sol(Coords(p0),0.0,econ_u);
                   XX[i] = econ_u[5]; 
                   XX[i+By_offset] = econ_u[6]; 
               }
               cent = polyg_centroid(polyg);
               mag_vort_sol(cent,0.0,econ_u);
               XX[polyg->n_sides] = econ_u[5];
               XX[polyg->n_sides+By_offset] = econ_u[6];
               ///// END:::TMP, just get accurate B values at nodal points.
               ***/
               if(debug == YES)
               {
                   print_matrix("tmpB", P, N, tmpB, "%20.18g ");
                   print_matrix("tmpA", M, N, tmpA, "%20.18g ");

                   print_general_vector("RHS_B", rsideB, P,"\n");
                   print_general_vector("RHS_A", rsideA, M,"\n");

                   l = 0;
                   for(j = 0; j < N; j++)
                   {
                       for(i = 0; i < M; i++)
                       {
                           outputA[i][j] = AA[l];
                           l++;
                       }
                   }
                   l = 0;
                   for(j = 0; j < N; j++)
                   {
                       for(i = 0; i < P; i++)
                       {
                           outputB[i][j] = BB[l];
                           l++;
                       }
                   }

                   // print_matrix("outputB", P, P, outputB, "%f ");
                   // print_matrix("outputA", min(M,N), N, outputA, "%f ");
               }

               if(rk_step == RK_STEP)
               {
                   for(i = 0; i < polyg->n_sides+1; i++)    
                       Nodal_B_of_polyg(polyg)[0][i] = XX[i];
                   for(i = 0; i < polyg->n_sides+1; i++)
                       Nodal_B_of_polyg(polyg)[1][i] = XX[i+By_offset];

                   Bubble_B_of_polyg(polyg)[0][0] = XX[2*By_offset];
                   Bubble_B_of_polyg(polyg)[0][1] = XX[2*By_offset+1];
                   Bubble_B_of_polyg(polyg)[0][2] = XX[2*By_offset+2];

                   Bubble_B_of_polyg(polyg)[1][0] = XX[2*By_offset+3];
                   Bubble_B_of_polyg(polyg)[1][1] = XX[2*By_offset+4];
                   Bubble_B_of_polyg(polyg)[1][2] = XX[2*By_offset+5];
               }
               else
               {
                   for(i = 0; i < polyg->n_sides+1; i++)    
                       dual_cell_midsoln[polyg->id].Nodal_B[rk_step][0][i] = XX[i];
                   for(i = 0; i < polyg->n_sides+1; i++)    
                       dual_cell_midsoln[polyg->id].Nodal_B[rk_step][1][i] = XX[i+By_offset];

                   dual_cell_midsoln[polyg->id].Bubble_B[rk_step][0][0] = XX[2*By_offset];
                   dual_cell_midsoln[polyg->id].Bubble_B[rk_step][0][1] = XX[2*By_offset+1];
                   dual_cell_midsoln[polyg->id].Bubble_B[rk_step][0][2] = XX[2*By_offset+2];

                   dual_cell_midsoln[polyg->id].Bubble_B[rk_step][1][0] = XX[2*By_offset+3];
                   dual_cell_midsoln[polyg->id].Bubble_B[rk_step][1][1] = XX[2*By_offset+4];
                   dual_cell_midsoln[polyg->id].Bubble_B[rk_step][1][2] = XX[2*By_offset+5];
               }

               if(debug == YES)
               {
                   printf("\n\n &&&&&& Solution Nodal B values:\n");
                   for(i = 0; i < polyg->n_sides+1; i++)
                   {
                       printf("Node %d: B = %e, %e\n", i, XX[i], XX[i+By_offset]);
                   } 
                   printf("\n");
                   printf("coef. a3, a4, a5 of bubble functions: %e %e %e\n", 
                            XX[2*By_offset], XX[2*By_offset+1], XX[2*By_offset+2]);  
                   printf("coef. b3, b4, b5 of bubble functions: %e %e %e\n", 
                            XX[2*By_offset+3], XX[2*By_offset+4], XX[2*By_offset+5]);  
                   printf("2a3 + b4 = %e, a4+2b5 = %e\n",
                      2.0*XX[2*By_offset] + XX[2*By_offset+4], 
                      XX[2*By_offset+1] + 2.0*XX[2*By_offset+5]);
                   printf("\n");

                   /// now check divergence-free condition for each sub-region
                   for(i = 0; i < polyg->n_sides; i++)
                   {
                       double tmp_div; 
                       tmp_div = phi1[i][1]*XX[i] + phi2[i][1]*XX[(i+1)%N_sides] + 
                                 phi0[i][1]*XX[N_sides] +
                                 phi1[i][2]*XX[i+By_offset] + phi2[i][2]*XX[(i+1)%N_sides+By_offset] + 
                                 phi0[i][2]*XX[N_sides+By_offset];
                       printf("sub-region %d, P1 divergence error %e\n", i, tmp_div);
                   }
                   printf("\n");

                   trace_of_dual_cell_P1_with_quad_bubbles(polyg,rk_step, dual_cell_midsoln, midsoln);
               }

               // printf("\n\n &&&&&& dual-cell %d Solution values:\n", polyg->id);
               // print_general_vector("XX", XX, 2*By_offset+2, "\n");

            }  /// END::::: for (polyg = first_polyg(*surf); ...)
        } /// END::::: for(surf = fr->mesh->surfaces; surf && *surf; surf++)

        printf("WARNING: EXIT in Construct_Mag_field_on_dual_cell()\n");
        clean_up(0);
}


EXPORT void Construct_Mag_field_on_tri_mesh_P1_DG(
        Front              *old_fr,
        Front              *fr,
        Mid_soln           *midsoln,
        Dual_cell_Mid_soln *dual_cell_midsoln,
        int                rk_step,
        double             time)
{

        // printf("---- Enter Construct_Mag_field_on_tri_mesh_P1_DG()---\n\n"); 

        Project_dual_Mag_to_tri_skeleton(fr, midsoln, dual_cell_midsoln, rk_step);

#if defined(__MPI__)
        parallel_update_tri_edge_B_buffer(fr, midsoln, rk_step);
        if(!debugging("mag_acc_vert") && !debugging("decay_alfven") && 
           !debugging("rotor") && !debugging("Orszag_T") && !debugging("blast_MHD") &&
           !debugging("field_loop")) //01-28-2015
        {
            printf("WARNING: in Construct_Mag_field_on_tri_mesh_P1_DG(),"
                      " before parallel_update_tri_edge_B_buffer()\n"); fflush(stdout);
            printf("ERROR: Construct_Mag_field_on_tri_mesh_P1_DG(), "
                   "implementing communication for B on edges of triangles in the buffer zone\n");
            clean_up(ERROR);
        }
        /*01-28-2015. Add updating tri edge B states for tris in global buffer only*/
        if(debugging("blast_MHD"))
            impose_blast_MHD_tri_edge_B_buffer(fr,midsoln,rk_step, time);
        /*END:: 01-28-2015. Add updating tri edge B states for tris in global buffer only*/
#else
        if(debugging("rotor") || debugging("Orszag_T"))
        {
            update_edge_B_buffer(fr,midsoln,rk_step);
        }
        if(debugging("decay_alfven"))
        {
            update_edge_B_buffer(fr,midsoln,rk_step);
            // impose_decay_alfven_tri_edge_B_buffer(fr,midsoln,rk_step, time);
        }
        if(debugging("field_loop"))
        {
            update_edge_B_buffer(fr,midsoln,rk_step);
        }
        if(debugging("mag_acc_vert"))
        {
            impose_mag_acc_vert_tri_edge_B_buffer(fr,midsoln,rk_step, time);
        }
        if(debugging("blast_MHD"))
        {
            // printf("ERROR: Construct_Mag_field_on_tri_mesh_P1_DG()\n");
            // printf("Implement buffer for blast_MHD test case\n");
            // clean_up(ERROR);
            // update_center_and_edge_phys_buffer(old_fr,fr,midsoln,rk_step);
            impose_blast_MHD_tri_edge_B_buffer(fr,midsoln,rk_step, time);
        }
#endif // if defined(__MPI__)

        B_tri_mesh_2nd_reconstruction(fr,midsoln,rk_step,NULL);//from Bn to Bx, By

        // printf("---- Leave Construct_Mag_field_on_tri_mesh_P1_DG()---\n\n"); 

        //// for debugging
        /// check_global_divgerence_free_on_tri_mesh(fr,midsoln,dual_cell_midsoln,rk_step);
        //// END::: for debugging
}

LOCAL void check_global_divgerence_free_on_tri_mesh(
	Front              *fr,
        Mid_soln           *midsoln,
        Dual_cell_Mid_soln *dual_cell_midsoln,
	int                rk_step)
{
        SURFACE           **surf = fr->mesh->surfaces;
        TRI               *tri, *nbtri[3];
        POLYGON           *poly; 
        int               i, k, side, debug_flag, tmp_side;
        double            dg_Bn[MAX_N_POLY_SIDE][N_COEF_EDGE], nb_dg_Bn[3][3][N_COEF_EDGE]; 
        double            sum_Bn; 
 
        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for(tri = first_tri(*surf); !at_end_of_tri_list(tri,*surf); tri = tri->next)
            {
                if(tri->BC_type == SUBDOMAIN)
                    continue;

                /***
                if(tri->id == 606) // 270
                {
                    debug_flag = YES; 
                }
                else
                    debug_flag = NO;
                ***/

                for(i = 0; i < 3; i++)
                    nbtri[i] = Tri_on_side(tri,i);

                if(rk_step == RK_STEP)
                {
                    for(side = 0; side < 3; side++)
                    {
                        for(k=0; k < N_COEF_EDGE; k++)
                            dg_Bn[side][k] = fg_side_dgB(tri)[side][k];
                    }

                    for(i = 0; i < 3; i++)
                    {
                        for(side = 0; side < 3; side++)
                        {
                            for(k=0; k < N_COEF_EDGE; k++)
                                nb_dg_Bn[i][side][k] = fg_side_dgB(nbtri[i])[side][k]; 
                        }
                    }
                }
                else
                {
                    for(side = 0; side < 3; side++)
                    {
                        for(k=0; k < N_COEF_EDGE; k++)
                            dg_Bn[side][k] = midsoln[tri->id].edge_dgBn[side][k][rk_step];
                    }

                    for(i = 0; i < 3; i++)
                    {
                        for(side = 0; side < 3; side++)
                        {
                            for(k=0; k < N_COEF_EDGE; k++)
                                nb_dg_Bn[i][side][k] = midsoln[nbtri[i]->id].edge_dgBn[side][k][rk_step];
                        }
                    }
                }

                for(i = 0; i < 3; i++)
                {
                    if(nbtri[i]->BC_type == SUBDOMAIN) continue;

                    for(side = 0; side < 3; side++)
                    {
                        if(tri == Tri_on_side(nbtri[i], side))
                        {
                            if(fabs(dg_Bn[i][0] + nb_dg_Bn[i][side][0]) > 1.0e-11)
                            {
                                printf("WARNING: check global_divgerence_free_on_tri_mesh()\n");
                                printf("tri(%d)'s %dth neighbor(tri %d) do not have same Bn [%14.12g, %14.12g]\n",
                                     tri->id, i, nbtri[i]->id, dg_Bn[i][0], nb_dg_Bn[i][side][0]);
                            } 
                        }   
                    }
                }

                sum_Bn = 0.0;
                for(side = 0; side < 3; side++)
                {
                    sum_Bn += fg_length_side(tri)[side]*dg_Bn[side][0]/2.0;
                }

                if(fabs(sum_Bn) > 1.0e-12)
                {
                    printf("\nWARNING: check global_divgerence_free_on_tri_mesh()\n");
                    printf("tri(%d) is not divergence-free, error = %14.12g\n\n", tri->id, sum_Bn);
                    print_tri_crds(tri);

                    for(side = 0; side < 3; side++)
                    {
                        printf("side[%d], current Bn[%14.12g, %14.12g]\n", side, dg_Bn[side][0], dg_Bn[side][1]);
                    }

                    printf("\n------------------\n\n");

                    for(side = 0; side < 3; side++)
                    {
                        printf("\nvertice (%d) in polygon (%d)\n", 
                           side, fg_polyg_at_tri_vert(tri)[side]->id);
                        print_polyg_crds(fg_polyg_at_tri_vert(tri)[side]);
                    }

                    //// check divergence for dual cells
                    for(side = 0; side < 3; side++)
                    {
                        poly = fg_polyg_at_tri_vert(tri)[side];
                        if(rk_step == RK_STEP)
                        {
                            for(tmp_side=0; tmp_side < poly->n_sides; tmp_side++)
                            {
                                for(k=0; k < N_COEF_EDGE; k++)
                                    dg_Bn[tmp_side][k] = polyg_side_dgB(poly)[tmp_side][k];
                            }
                        }
                        else
                        {
                            for(tmp_side=0; tmp_side < poly->n_sides; tmp_side++)
                            {
                                for(k=0; k < N_COEF_EDGE; k++)
                                    dg_Bn[tmp_side][k] = dual_cell_midsoln[poly->id].edge_dgBn[rk_step][tmp_side][k];
                            }
                        }

                        sum_Bn = 0.0;
                        for(tmp_side=0; tmp_side < poly->n_sides; tmp_side++) 
                        {
                            sum_Bn += polyg_length_side(poly)[tmp_side]*dg_Bn[tmp_side][0]/2.0;
                        }
                 
                        if(fabs(sum_Bn) > 1.0e-12)
                        {
                            printf("------Polygon(%d) is not divergence-free, error = %14.12g\n\n", 
                                      poly->id, sum_Bn);
                        }
                    }

                    clean_up(ERROR);
                }
            }//// END::: for(tri = first_tri(*surf);
        }
}


LOCAL void Project_dual_Mag_to_tri_skeleton(
        Front              *fr,
        Mid_soln           *midsoln,
        Dual_cell_Mid_soln *dual_cell_midsoln,
        int                rk_step)
{
	SURFACE           **surf = fr->mesh->surfaces;
        TRI               *tri;
        POLYGON           *polyg[3], *temppolyg;
        int               i, side, j, p_sect, dim = 2, k, tmp_side;
        double            *p_pt, *n_pt, bound;
        double            t[3], nor[3];
        double            x_bar, y_bar, dx, dy;
        double            *crds0, *crds1, crs[3], len, len_to_crs, sqrt_area;
        POINT             **pts;
        double            sub_reg_dgB[2][MAX_N_COEF];
        double            **_piece_cent;
        double            p_tri_Be[2][N_COEF_EDGE], n_tri_Be[2][N_COEF_EDGE];
        double            p_Bn[3][N_COEF_EDGE], n_Bn[3][N_COEF_EDGE];
        double            Bn[3][N_COEF_EDGE], polyg_Bn[MAX_N_POLY_SIDE][N_COEF_EDGE];
        int               debug = NO;
        double            sum = 0.0, tmp, tmp2, tmp_sum_Bn;

        /// TMP
        /***
        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for(tri = first_tri(*surf); !at_end_of_tri_list(tri,*surf); tri = tri->next)
            {
                if(tri->BC_type == SUBDOMAIN) 
                {
                    continue;
                }

                for(i = 0; i < 3; i++)
                    polyg[i] = fg_polyg_at_tri_vert(tri)[i];

                match_Bn_at_comm_dual_cell_edges(polyg[0], polyg[1], dual_cell_midsoln, rk_step);
                match_Bn_at_comm_dual_cell_edges(polyg[1], polyg[2], dual_cell_midsoln, rk_step);
                match_Bn_at_comm_dual_cell_edges(polyg[2], polyg[0], dual_cell_midsoln, rk_step);
            }
        }
        ***/
        /// END::: TMP
 
        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for(tri = first_tri(*surf); !at_end_of_tri_list(tri,*surf); tri = tri->next)
            {
                if(tri->BC_type == SUBDOMAIN) 
                {
                    continue;
                }

                for(i = 0; i < 3; i++)
                    polyg[i] = fg_polyg_at_tri_vert(tri)[i];

                /**
                if(tri->id == 12774 || tri->id == 1279)
                {
                    printf("\n\n********************************\n");
                    printf("\n\ntri %d in Project_dual_Mag_to_tri_skeleton()\n", tri->id);
                    printf("Takes Bn from polygons (%d, %d, %d)\n\n", polyg[0]->id, polyg[1]->id, polyg[2]->id);
                    printf("neighboring tris(%d, %d, %d)\n", Tri_on_side(tri,0)->id, 
                             Tri_on_side(tri,1)->id, Tri_on_side(tri,2)->id);
                    print_polyg_crds(polyg[0]);
                    print_polyg_crds(polyg[1]);
                    print_polyg_crds(polyg[2]);
                    printf("edge crossing crds %g, %g, %g\n\n", tri->edge_crs[0], tri->edge_crs[1], tri->edge_crs[2]);
                }
                **/
                /***
                if(tri->id == 2269) /// TMP
                {
                    printf("\n\n********************************\n");
                    printf("\n\ntri %d in Project_dual_Mag_to_tri_skeleton()\n", tri->id);
                    printf("Takes Bn from polygons (%d, %d, %d)\n\n", polyg[0]->id, polyg[1]->id, polyg[2]->id);
                    printf("neighboring tris(%d, %d, %d)\n", Tri_on_side(tri,0)->id, 
                             Tri_on_side(tri,1)->id, Tri_on_side(tri,2)->id);
                    // print_polyg_crds(polyg[0]);
                    // print_polyg_crds(polyg[1]);
                    // print_polyg_crds(polyg[2]);
                    printf("edge crossing crds %g, %g, %g\n\n", tri->edge_crs[0], tri->edge_crs[1], tri->edge_crs[2]);
                    printf("tri(2269) neighbors %d, %d, %d\n",
                          Tri_on_side(Tri_on_side(tri,1),0)->id, 
                          Tri_on_side(Tri_on_side(tri,1),1)->id, 
                          Tri_on_side(Tri_on_side(tri,1),2)->id);

                    match_Bn_at_comm_dual_cell_edges(polyg[0], polyg[1], dual_cell_midsoln, rk_step);
                    match_Bn_at_comm_dual_cell_edges(polyg[1], polyg[2], dual_cell_midsoln, rk_step);
                    match_Bn_at_comm_dual_cell_edges(polyg[2], polyg[0], dual_cell_midsoln, rk_step);

                    for(i = 0; i < 3; i++)
                    {
                        Bn_at_dual_cell_edges(polyg[i],NULL,dual_cell_midsoln,rk_step,polyg_Bn);
                        tmp_sum_Bn = net_Bn_flux_on_polygon_edges(polyg[i], polyg_Bn);
                        printf("\n\n--- Project_dual_Mag_to_tri_skeleton(), polygon(%d), net Bn-flux = %14.12g\n",
                                polyg[i]->id, tmp_sum_Bn);
                        for(tmp_side = 0; tmp_side < polyg[i]->n_sides; tmp_side++)
                        {
                            if(rk_step == RK_STEP)
                            {
                                for(k = 0; k < MAX_N_COEF; k++)
                                    sub_reg_dgB[0][k] = polyg_sub_reg_dgB(polyg[i])[tmp_side][0][k];
                                for(k = 0; k < MAX_N_COEF; k++)
                                    sub_reg_dgB[1][k] = polyg_sub_reg_dgB(polyg[i])[tmp_side][1][k];
                            }
                            else
                            {
                                for(k = 0; k < MAX_N_COEF; k++)
                                    sub_reg_dgB[0][k] = dual_cell_midsoln[polyg[i]->id].sub_reg_dgB[rk_step][tmp_side][0][k];
                                for(k = 0; k < MAX_N_COEF; k++)
                                    sub_reg_dgB[1][k] = dual_cell_midsoln[polyg[i]->id].sub_reg_dgB[rk_step][tmp_side][1][k];
                            }
                            printf("---Portion %d, div-error = %14.12g\n", tmp_side, sub_reg_dgB[0][1]+sub_reg_dgB[1][2]);
                        }
                        check_dual_cell_face_B_match_edge(polyg[i], dual_cell_midsoln, rk_step);
                        printf("\n\n");
                    }
                    printf("\n\n");
                    debug = YES;
                }
                else
                    debug = NO;
                ***/
                // END::: if(tri->id == 1279) /// TMP

                for(side = 0; side < 3; side++)
                {
                    //// TMP
                    /****
                    // if((tri->id == 806 && side == 0) || (tri->id == 807 && side == 2))
                    // if((tri->id == 583 && side == 0) || (tri->id == 526 && side == 1))
                    // if((tri->id == 583 && side == 2) || (tri->id == 582 && side == 0))
                    if((tri->id == 583 && side == 1) || (tri->id == 584 && side == 2))
                    {
                        printf("\n\n -----tri %d side %d in Project_dual_Mag_to_tri_skeleton(), bound = %g\n", 
                              tri->id, side, tri->edge_crs[side]);
                        debug = YES;
                    }
                    else
                        debug = NO;
                    ****/
                    //// END::: TMP

                    for(j = 0; j < polyg[side]->n_sides; j++)
                    {
                        if(tri_at_polyg_vert(polyg[side])[j] == tri)
                            break;
                    }
                    p_sect = (j+polyg[side]->n_sides-1)%(polyg[side]->n_sides);
                    pts = Point_of_polyg(polyg[side]);
                    sqrt_area = polyg[side]->_piece_sqrt_area[p_sect];
                    _piece_cent = polyg[side]->_piece_cent;

                    if(YES == debug)
                    {
                        printf("\nthe 1st part of tri[%d]-edge[%d] is in sector %d of dual cell %d\n", 
                                      tri->id, side, p_sect, polyg[side]->id);
                    }

                    if(rk_step == RK_STEP)
                    {
                        for(k = 0; k < MAX_N_COEF; k++)
                            sub_reg_dgB[0][k] = polyg_sub_reg_dgB(polyg[side])[p_sect][0][k];
                        for(k = 0; k < MAX_N_COEF; k++)
                            sub_reg_dgB[1][k] = polyg_sub_reg_dgB(polyg[side])[p_sect][1][k];
                    }
                    else
                    {
                        for(k = 0; k < MAX_N_COEF; k++)
                            sub_reg_dgB[0][k] = dual_cell_midsoln[polyg[side]->id].sub_reg_dgB[rk_step][p_sect][0][k];
                        for(k = 0; k < MAX_N_COEF; k++)
                            sub_reg_dgB[1][k] = dual_cell_midsoln[polyg[side]->id].sub_reg_dgB[rk_step][p_sect][1][k];
                    }
                    
                    /// 1) the portion of the edge that starts at the center of polyg[side]
                    p_pt = Coords(Point_of_tri(tri)[side]);
                    n_pt = Coords(Point_of_tri(tri)[(side+1)%3]);

                    for(i = 0; i < dim; i++)
                        t[i] = fg_side_vector(tri)[side][i];
                    nor[0] = t[1];
                    nor[1] = -t[0];
                    // len = fg_length_side(tri)[side];

                    // crds0 = Coords(pts[p_sect]);
                    // crds1 = Coords(pts[(p_sect+1)%polyg[side]->n_sides]);

                    // cross_segments(crds0[0], crds0[1], crds1[0], crds1[1],
                    //               p_pt[0], p_pt[1], n_pt[0], n_pt[1], crs);

                    // len_to_crs = sqrt(sqr(crs[0]- p_pt[0]) + sqr(crs[1]-p_pt[1]));

                    // bound = 0.5 + 0.5*(len_to_crs/len); // crds of crossing point mapped onto [-1, 1].

                    x_bar = 0.5*(p_pt[0] + n_pt[0]);
                    y_bar = 0.5*(p_pt[1] + n_pt[1]);
                    dx    = 0.5*(n_pt[0] - p_pt[0]);
                    dy    = 0.5*(n_pt[1] - p_pt[1]);

                    // Bx projected on the tri edge
                    p_tri_Be[0][0] = sub_reg_dgB[0][0] +
                                     sub_reg_dgB[0][1]/sqrt_area*(x_bar - _piece_cent[p_sect][0]) +
                                     sub_reg_dgB[0][2]/sqrt_area*(y_bar - _piece_cent[p_sect][1]);
                    p_tri_Be[0][1] = sub_reg_dgB[0][1]/sqrt_area*dx +
                                     sub_reg_dgB[0][2]/sqrt_area*dy;

                    // By projected on the tri edge
                    p_tri_Be[1][0] = sub_reg_dgB[1][0] +
                                     sub_reg_dgB[1][1]/sqrt_area*(x_bar - _piece_cent[p_sect][0]) +
                                     sub_reg_dgB[1][2]/sqrt_area*(y_bar - _piece_cent[p_sect][1]);
                    p_tri_Be[1][1] = sub_reg_dgB[1][1]/sqrt_area*dx +
                                     sub_reg_dgB[1][2]/sqrt_area*dy;

                    /// Bn on the 1st portion of the edge[side]
                    p_Bn[side][0] = p_tri_Be[0][0]*nor[0] + p_tri_Be[1][0]*nor[1];
                    p_Bn[side][1] = p_tri_Be[0][1]*nor[0] + p_tri_Be[1][1]*nor[1];

                    if(YES == debug)
                    {
                        printf("portion 1 of side[%d], Bn[%13.12g, %13.12g]\n",
                                              side, p_Bn[side][0], p_Bn[side][1]);
                    }

                    /// 2) the portion of the edge[side+2] that ends at the center of polyg[side]
                    tmp_side = (side+2)%3;
                    p_sect = (p_sect+1)%(polyg[side]->n_sides);
                    sqrt_area = polyg[side]->_piece_sqrt_area[p_sect];


                    //// TMP
                    /******
                    // if((tri->id == 806 && tmp_side == 0) || (tri->id == 807 && tmp_side == 2))
                    // if((tri->id == 583 && tmp_side == 0) || (tri->id == 526 && tmp_side == 1))
                    // if((tri->id == 583 && tmp_side == 2) || (tri->id == 582 && tmp_side == 0))
                    if((tri->id == 583 && tmp_side == 1) || (tri->id == 584 && tmp_side == 2))
                    {
                        printf("\n----tri %d side %d in Project_dual_Mag_to_tri_skeleton()\n", tri->id, tmp_side);
                        debug = YES;
                    }
                    else
                        debug = NO;

                    if(YES == debug)
                    {
                        printf("\nthe 2nd part of tri[%d]-edge[%d] is in sector %d of dual cell %d\n", 
                               tri->id, tmp_side, p_sect, polyg[side]->id);
                    }
                    ********/
                    //// END::: TMP

                    if(rk_step == RK_STEP)
                    {
                        for(k = 0; k < MAX_N_COEF; k++)
                            sub_reg_dgB[0][k] = polyg_sub_reg_dgB(polyg[side])[p_sect][0][k];
                        for(k = 0; k < MAX_N_COEF; k++)
                            sub_reg_dgB[1][k] = polyg_sub_reg_dgB(polyg[side])[p_sect][1][k];
                    }
                    else
                    {
                        for(k = 0; k < MAX_N_COEF; k++)
                            sub_reg_dgB[0][k] = dual_cell_midsoln[polyg[side]->id].sub_reg_dgB[rk_step][p_sect][0][k];
                        for(k = 0; k < MAX_N_COEF; k++)
                            sub_reg_dgB[1][k] = dual_cell_midsoln[polyg[side]->id].sub_reg_dgB[rk_step][p_sect][1][k];
                    }

                    p_pt = Coords(Point_of_tri(tri)[tmp_side]);
                    n_pt = Coords(Point_of_tri(tri)[(tmp_side+1)%3]);

                    for(i = 0; i < dim; i++)
                        t[i] = fg_side_vector(tri)[tmp_side][i];
                    nor[0] = t[1];
                    nor[1] = -t[0];

                    x_bar = 0.5*(p_pt[0] + n_pt[0]);
                    y_bar = 0.5*(p_pt[1] + n_pt[1]);
                    dx    = 0.5*(n_pt[0] - p_pt[0]);
                    dy    = 0.5*(n_pt[1] - p_pt[1]);

                    // Bx projected on the tri edge
                    p_tri_Be[0][0] = sub_reg_dgB[0][0] +
                                     sub_reg_dgB[0][1]/sqrt_area*(x_bar - _piece_cent[p_sect][0]) +
                                     sub_reg_dgB[0][2]/sqrt_area*(y_bar - _piece_cent[p_sect][1]);
                    p_tri_Be[0][1] = sub_reg_dgB[0][1]/sqrt_area*dx +
                                     sub_reg_dgB[0][2]/sqrt_area*dy;

                    // By projected on the tri edge
                    p_tri_Be[1][0] = sub_reg_dgB[1][0] +
                                     sub_reg_dgB[1][1]/sqrt_area*(x_bar - _piece_cent[p_sect][0]) +
                                     sub_reg_dgB[1][2]/sqrt_area*(y_bar - _piece_cent[p_sect][1]);
                    p_tri_Be[1][1] = sub_reg_dgB[1][1]/sqrt_area*dx +
                                     sub_reg_dgB[1][2]/sqrt_area*dy;

                    n_Bn[tmp_side][0] = p_tri_Be[0][0]*nor[0] + p_tri_Be[1][0]*nor[1];
                    n_Bn[tmp_side][1] = p_tri_Be[0][1]*nor[0] + p_tri_Be[1][1]*nor[1];

                    if(YES == debug)
                    {
                        printf("portion 2 of side[%d], Bn[%13.12g, %13.12g]\n",
                                              tmp_side, n_Bn[tmp_side][0], n_Bn[tmp_side][1]);
                    }

                } //// END::: for(side = 0; side < 3; side++)

                /// reconstruct Bn on edges of tri.
                for(side = 0; side < 3; side++)
                {
                    tmp  = sqr(tri->edge_crs[side]);
                    tmp2 = tmp*tri->edge_crs[side];

                    Bn[side][0] = (p_Bn[side][0]*(tri->edge_crs[side]+1.0) + 
                                   p_Bn[side][1]*0.5*(tmp - 1.0) + 
                                   n_Bn[side][0]*(1.0-tri->edge_crs[side]) + 
                                   n_Bn[side][1]*0.5*(1.0 - tmp)
                                  )*0.5; 
                    // Bn[side][1] = minmod(p_Bn[side][1], n_Bn[side][1]); // destroy order of accuracy
                    // Bn[side][1] = 0.5*(p_Bn[side][1] + n_Bn[side][1]); // this preserves order of accuracy
                    Bn[side][1] = 0.5*(1.5*p_Bn[side][0]*(tmp-1.0) + p_Bn[side][1]*(tmp2+1.0) +
                                       1.5*n_Bn[side][0]*(1.0-tmp) + n_Bn[side][1]*(1.0-tmp2)
                                      );
                }

                //// TMP
                /***
                if(tri->id == 583)
                {
                     printf("\ntri %d in Project_dual_Mag_to_tri_skeleton()\n", tri->id);
                     debug = YES;
                }
                else
                    debug = NO;
                if(YES == debug)
                {
                    for(side = 0; side < 3; side++)
                        printf("Bound on edge [%d] = %g\n", side, tri->edge_crs[side]);
                }
                //// END::: TMP
                ***/ 

                ///END::::reconstruct Bn on edges of tri.

                /// tmp, consistency check
                sum = 0.0;
                for(side = 0; side < 3; side++)
                    sum += Bn[side][0]*(fg_length_side(tri)[side])/2.0;
                if(fabs(sum) > 1.0e-12)
                {
                    printf("\n\nERROR: Project_dual_Mag_to_tri_skeleton(),"
                         " sum of Bn = %e on edges of tri[%d] not zero\n", sum, tri->id);
                    print_tri_crds(tri);

                    for(side = 0; side < 3; side++)
                    {
                        printf("side[%d], projected Bn [%14.12g, %14.12g], prev_bn: %13.12g, next_bn: %13.12g (%14.12g)\n", 
                                side, Bn[side][0], Bn[side][1],
                                p_Bn[side][0], n_Bn[side][0], (p_Bn[side][0]+n_Bn[side][0])/2.0);
                    }
                    printf("\n\n********************************\n");
                    printf("\n\ntri %d in Project_dual_Mag_to_tri_skeleton()\n", tri->id);
                    printf("Takes Bn from polygons (%d, %d, %d)\n\n", polyg[0]->id, polyg[1]->id, polyg[2]->id);
                    printf("neighboring tris(%d, %d, %d)\n", Tri_on_side(tri,0)->id, 
                             Tri_on_side(tri,1)->id, Tri_on_side(tri,2)->id);
                    print_polyg_crds(polyg[0]);
                    print_polyg_crds(polyg[1]);
                    print_polyg_crds(polyg[2]);

                    match_Bn_at_comm_dual_cell_edges(polyg[0], polyg[1], dual_cell_midsoln, rk_step);
                    match_Bn_at_comm_dual_cell_edges(polyg[1], polyg[2], dual_cell_midsoln, rk_step);
                    match_Bn_at_comm_dual_cell_edges(polyg[2], polyg[0], dual_cell_midsoln, rk_step);

                    for(i = 0; i < 3; i++)
                    {
                        Bn_at_dual_cell_edges(polyg[i],NULL,dual_cell_midsoln,rk_step,polyg_Bn);
                        tmp_sum_Bn = net_Bn_flux_on_polygon_edges(polyg[i], polyg_Bn);

                        for(tmp_side = 0; tmp_side < polyg[i]->n_sides; tmp_side++)
                        {
                            if(rk_step == RK_STEP)
                            {
                                for(k = 0; k < MAX_N_COEF; k++)
                                    sub_reg_dgB[0][k] = polyg_sub_reg_dgB(polyg[i])[tmp_side][0][k];
                                for(k = 0; k < MAX_N_COEF; k++)
                                    sub_reg_dgB[1][k] = polyg_sub_reg_dgB(polyg[i])[tmp_side][1][k];
                            }
                            else
                            {
                                for(k = 0; k < MAX_N_COEF; k++)
                                    sub_reg_dgB[0][k] = dual_cell_midsoln[polyg[i]->id].sub_reg_dgB[rk_step][tmp_side][0][k];
                                for(k = 0; k < MAX_N_COEF; k++)
                                    sub_reg_dgB[1][k] = dual_cell_midsoln[polyg[i]->id].sub_reg_dgB[rk_step][tmp_side][1][k];
                            }
                            // printf("---Portion %d, div-error = %14.12g\n", tmp_side, sub_reg_dgB[0][1]+sub_reg_dgB[1][2]);
                        }
                        check_dual_cell_face_B_match_edge(polyg[i], dual_cell_midsoln, rk_step);
                        printf("\n\n");
                    }

                    clean_up(ERROR);
                }

                /****
                if(debug == YES)
                {
                    for(side = 0; side < 3; side++)
                    {
                        printf("side[%d], projected Bn [%14.12g, %14.12g], prev_bn: %13.12g, next_bn: %13.12g\n", 
                                side, Bn[side][0], Bn[side][1],
                                p_Bn[side][0], n_Bn[side][0]);
                    }
                    printf("Sum of Bn on tri boundary = %14.12g\n", sum);
                }
                ****/

                if(rk_step == RK_STEP)
                {
                    for(side = 0; side < 3; side++)
                    {
                        for(k = 0; k < N_COEF_EDGE; k++)    
                            fg_side_dgB(tri)[side][k] = Bn[side][k];
                    }
                }
                else
                {
                    for(side = 0; side < 3; side++)
                    {
                        for(k = 0; k < N_COEF_EDGE; k++)    
                            midsoln[tri->id].edge_dgBn[side][k][rk_step] = Bn[side][k];
                    }
                }
            }
        }

        // TMP
        /***
        printf("\n\n-------- End of Project_dual_Mag_to_tri_skeleton()--\n\n"); 
        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (temppolyg = first_polyg(*surf); !at_end_of_polyg_list(temppolyg,*surf);
                 temppolyg = temppolyg->next )
            {
                if(temppolyg->id == 169)
                {
                    for(side = 0; side < temppolyg->n_sides; side++)
                    {
                        print_tri_crds(tri_at_polyg_vert(temppolyg)[side]); 
                        print_tri_crds(tri_at_polyg_vert(temppolyg)[(side+1)%temppolyg->n_sides]); 
                        match_Bn_at_comm_tri_edges( tri_at_polyg_vert(temppolyg)[side], 
                          tri_at_polyg_vert(temppolyg)[(side+1)%temppolyg->n_sides],
                          midsoln, rk_step); 
                    }
                }
            }
        }
        ***/
}

EXPORT double net_Bn_flux_on_polygon_edges(
        POLYGON    *polyg,
        double     Bn[][N_COEF_EDGE])
{
        int        side;
        double     sum_Bn = 0.0;

        for(side = 0; side < polyg->n_sides; side++)
        {
            sum_Bn += polyg_length_side(polyg)[side]*Bn[side][0];
        }
        return sum_Bn; 
}


EXPORT void Construct_Mag_field_on_dual_cell_P1_DG(
        Front              *fr,
        Mid_soln           *midsoln,
        Dual_cell_Mid_soln *dual_cell_midsoln,
        int                rk_step,
        int                total_cell)
{
        SURFACE    **surf;
        POLYGON    *polyg, *tmpg;
        double     x_crds[20], y_crds[20], *cent;
        int        i, j, k, By_offset, tmpk, N_sides;
        POINT      *p0, *p1, *pc;
        double     phi0[20][3], phi1[20][3], phi2[20][3]; 
        double     **tmpA, **tmpB, rsideB[90], rsideA[90];
        double     len, t[3], nor[3], Bn[20][N_COEF_EDGE];
        int        M, N, P, LDA, LDB, l, LWORK, INFO;
        double     AA[5000], BB[5000], CC[800], DD[800], XX[800], work[25000], tmp,tmpslope;
        double     B_cent[3];
        int        debug = NO, var_offset = 6;
        double     val[45], area[20], sqrt_area[20], sqrt_area1, area2, sqrt_area2, *tri_cent, vertB[3];
        double     ***conformal_basis_tri, econ_u[10];
        double     **DG_basis_tri;
        double     dx, dy, x_bar, y_bar, one_3rd, two_3rd;
        TRI        **tris;
        Locstate   sts[20];
        static double **outputB, **outputA = NULL;
        double     pcrds0[3], pcrds1[3], pcrds2[3], piece_cent[20][3];
        double     qB[20][6][2], qt_crds[20][6][3], tmp_sum_Bn, con_u[10];

        one_3rd = 1.0/3.0; two_3rd = 2.0/3.0;

        // printf("\n\n++++++++++++++++++++++++++++++++++++++++++\n\n");
        // printf("WARNING: In Construct_Mag_field_on_dual_cell_P1_DG(), test rectangle\n");
        // tmp_construct_rect_DG();
        // tmp_construct_rect_DG_closed_config();
        // printf("\n\n++++++++++++++++++++++++++++++++++++++++++\n\n");
        // printf("WARNING: In Construct_Mag_field_on_dual_cell_P1_DG(), exit after tmp_construct_rect_DG()\n");
        // clean_up(0);

        pcrds0[2] = pcrds1[2] = pcrds2[2] = 0.0;

        // if(NULL == outputA)
        // {
        //    matrix(&outputA,50,50,sizeof(double));
        //    matrix(&outputB,50,50,sizeof(double));
        // }
        
        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (polyg = first_polyg(*surf); !at_end_of_polyg_list(polyg,*surf);
                 polyg = polyg->next )
            {
               if(NO == polyg->closed) continue;

               //// TMP
               /**
               if(polyg->id == 200 || polyg->id == 240)
                   debug = YES;
               else 
                   debug = NO;
                   printf("Dual cell %d, reconstruct B\n\n", polyg->id); fflush(stdout); 
               **/

               /***
               if(polyg->id == 1196)
               {
                   double tmp_polyg_Bn[MAX_N_POLY_SIDE][N_COEF_EDGE];

                   debug = YES;
                   printf("\n\n-------------------------------------------\n");
                   printf("Dual cell %d, reconstruct B\n\n", polyg->id);
                   // print_polyg_crds(polyg);
                   // gview_output_polygon("visual", 0, polyg, "polygon447",NULL);
                   printf("-----------------------------------\n\n");
                   Bn_at_dual_cell_edges(polyg,NULL,dual_cell_midsoln,rk_step,tmp_polyg_Bn);
                   printf("\n\n--- polygon(%d), net Bn-flux = %14.12g\n",
                                polyg->id, net_Bn_flux_on_polygon_edges(polyg, tmp_polyg_Bn));

                   for(i = 0; i < polyg->n_sides; i++)
                   {
                       tmpg = Polyg_on_side(polyg, i);
                       // if(tmpg->id == 302 || tmpg->id == 331 || tmpg->id == 301)
                       {
                           if(rk_step == RK_STEP)
                               printf("    side[%d] linear Bn = [%14.12g, %14.12g] of dual cell %d\n", i,
                                 polyg_side_dgB(polyg)[i][0], polyg_side_dgB(polyg)[i][1], polyg->id);
                           else
                               printf("    side[%d] linear Bn = [%14.12g, %14.12g] of dual cell %d\n", i,
                                 dual_cell_midsoln[polyg->id].edge_dgBn[rk_step][i][0],
                                 dual_cell_midsoln[polyg->id].edge_dgBn[rk_step][i][1], polyg->id);

                           for(k = 0; k < tmpg->n_sides; k++)
                           {
                               if(polyg == Polyg_on_side(tmpg, k)) break;
                           }
                           if(rk_step == RK_STEP)
                               printf("adj side[%d] linear Bn = [%14.12g, %14.12g], from dual cell %d\n", k,
                                     polyg_side_dgB(tmpg)[k][0], polyg_side_dgB(tmpg)[k][1], tmpg->id);
                           else
                               printf("adj side[%d] linear Bn = [%14.12g, %14.12g], from dual cell %d\n", k,
                                     dual_cell_midsoln[tmpg->id].edge_dgBn[rk_step][k][0],
                                     dual_cell_midsoln[tmpg->id].edge_dgBn[rk_step][k][1], tmpg->id);
                           printf("\n\n");
                       }
                   }
                   printf("-----------------------------------\n\n");
               }
               else
                   debug = NO;
               ***/

               Bn_at_dual_cell_edges(polyg,midsoln,dual_cell_midsoln,rk_step,Bn);

               ///// TMP check
               /*****
               tmp_sum_Bn = net_Bn_flux_on_polygon_edges(polyg, Bn);
               if(fabs(tmp_sum_Bn) > 1.0e-12)
               {
                   printf("\nWARNING: Construct_Mag_field_on_dual_cell_P1_DG()\n");
                   printf("polygon(%d) is not div-free, error = %14.12g\n", polyg->id, tmp_sum_Bn);
               }
               if(polyg->id == 302 || polyg->id == 332 || polyg->id == 331)
               {
                   printf("\n--- Construct_Mag_field_on_dual_cell_P1_DG(), polygon(%d), net Bn-flux = %14.12g\n\n",
                               polyg->id, tmp_sum_Bn);
               }
               ******/
               ///// END::: TMP check

               // Mag_at_tri_edge_quadrature_for_P1(polyg,fr,midsoln,rk_step,qt_crds,qB);

               cent = polyg_centroid(polyg);
               pcrds0[0] = cent[0];
               pcrds0[1] = cent[1];
               // By_offset: offset for where eqns for Y-component of B starts.
               By_offset = polyg->n_sides+1;
               N_sides = polyg->n_sides;

               /// M: number of least square eqn
               /// N: number of unknown;
               /// P: number of constraint eqns, which are satisfied exactly.

               // N: each tri-piece has two linear polynomials for x- and y-component of B(x,y) 
               //    respectively. Each linear polynomial has 3 unknowns.
               N = 6*N_sides; 

               // P: 2*(n inner edges + n outer edges) + n -1 = 5n -1. 
               // 4n (match trace avg and slope exactly) + n divergence-free condition - 1 (avg of trace at inner edge). 
               P = 2*N_sides + 2*N_sides + N_sides - 1;
               // P = 2*N_sides + N_sides;

               // M: choose x- and y-component of B values at quadrature points on edges of the tri-grid.
               // This leads to some linear depedency between constraint and least-square eqns.
               // M = 2*N_sides;  

               /***
               // 01-28-2015. comment out working version  //03/30/2015 try this for decay_alfven wave again
               // M: choose x- and y-component of B values at 3 vertices of the triangular- subregions. 12/05/2012
               M = 2*(3*N_sides);  
               // M: choose x- and y-component of B values at 2 vertices of the triangular- subregions. 12/05/2012
               // M = 2*(2*N_sides);  //old
               // quadrature pts where we do least-square fitting for magnetic field.
               for(i = 0; i < polyg->n_sides; i++)
               {
                   p0 = Point_of_polyg(polyg)[i];
                   p1 = Point_of_polyg(polyg)[(i+1)%N_sides];

                   qt_crds[i][0][0] = pcrds0[0];
                   qt_crds[i][0][1] = pcrds0[1];
                   qt_crds[i][1][0] = Coords(p0)[0];
                   qt_crds[i][1][1] = Coords(p0)[1];
                   qt_crds[i][2][0] = Coords(p1)[0];
                   qt_crds[i][2][1] = Coords(p1)[1];
               }
               quadrature_Mag_at_tri_for_P1(polyg,fr,midsoln,rk_step,qt_crds,qB);
               // 01-28-2015. comment out working version // END:: 03/30/2015 try this for decay_alfven wave again
               ****/

               // 01-28-2015. Use following quadratures: 
               // Take 2 vertices of the partial cell, which
               // are centroids of tris. And 2 midpoints on edges connecting
               // vertices(centroids of tris) and centroid of polygon.
               // M: choose x- and y-component of B values at 4 points of the triangular- subregions. 12/05/2012
               M = 2*(4*N_sides);  
               for(i = 0; i < polyg->n_sides; i++)
               {
                   p0 = Point_of_polyg(polyg)[i];
                   p1 = Point_of_polyg(polyg)[(i+1)%N_sides];

                   qt_crds[i][0][0] = (pcrds0[0]+Coords(p0)[0])*0.5;
                   qt_crds[i][0][1] = (pcrds0[1]+Coords(p0)[1])*0.5;
                   qt_crds[i][1][0] = Coords(p0)[0];
                   qt_crds[i][1][1] = Coords(p0)[1];

                   qt_crds[i][2][0] = (pcrds0[0]+Coords(p1)[0])*0.5;
                   qt_crds[i][2][1] = (pcrds0[1]+Coords(p1)[1])*0.5;
                   qt_crds[i][3][0] = Coords(p1)[0];
                   qt_crds[i][3][1] = Coords(p1)[1];
               }

               // if(YES == debug)
               //     printf("cell %d, n_sides %d, M N P = %d, %d, %d\n", polyg->id, polyg->n_sides,  M,  N, P); 
 
               quadrature_Mag_at_tri_for_P1_ver2(polyg,fr,midsoln,rk_step,qt_crds,qB);
               // END::: quadrature pts where we do least-square fitting for magnetic field.
               // if(YES == debug)
               //     printf("cell %d, after quadrature_Mag_at_tri_for_P1_ver2\n", polyg->id); 

               /**
               // 04/01/2015. implement using 3 vertices and two midpoints of two edges as
               // least squares constraints for reconstructing B on partial cells.
               M = 2*(5*N_sides);
               for(i = 0; i < polyg->n_sides; i++)
               {
                   p0 = Point_of_polyg(polyg)[i];
                   p1 = Point_of_polyg(polyg)[(i+1)%N_sides];

                   qt_crds[i][0][0] = pcrds0[0];
                   qt_crds[i][0][1] = pcrds0[1];
                   qt_crds[i][1][0] = (pcrds0[0]+Coords(p0)[0])*0.5;
                   qt_crds[i][1][1] = (pcrds0[1]+Coords(p0)[1])*0.5;
                   qt_crds[i][2][0] = Coords(p0)[0];
                   qt_crds[i][2][1] = Coords(p0)[1];

                   qt_crds[i][3][0] = (pcrds0[0]+Coords(p1)[0])*0.5;
                   qt_crds[i][3][1] = (pcrds0[1]+Coords(p1)[1])*0.5;
                   qt_crds[i][4][0] = Coords(p1)[0];
                   qt_crds[i][4][1] = Coords(p1)[1];
               }
               quadrature_Mag_at_tri_for_P1_ver3_5pts(polyg,fr,midsoln,rk_step,qt_crds,qB);
               END:: 04/01/2015. **/

               if(dual_cell_sten_2[polyg->id].Bsten_set == NO)
               {
                   dual_cell_sten_2[polyg->id].Bsten_set = YES;
                   matrix(&tmpA, M, N,sizeof(double));
                   // matrix(&tmpB, P+1, N,sizeof(double)); // P = 2*N_sides + 2*N_sides + N_sides - 1;
                   matrix(&tmpB, (2*N_sides + 2*N_sides + N_sides), N,sizeof(double)); 

                   dual_cell_sten_2[polyg->id].MB_A = tmpA;
                   dual_cell_sten_2[polyg->id].MB_B = tmpB;

                   for(i = 0; i < M; i++)
                   {
                       for(j = 0; j < N; j++)
                           tmpA[i][j] = 0.0;
                   }
                   for(i = 0; i < P; i++)
                   {
                       for(j = 0; j < N; j++)
                           tmpB[i][j] = 0.0;
                   }

                   /// 1) divergence-free
                   for(i = 0; i < polyg->n_sides; i++)
                   {
                       tmpB[i][i*var_offset+1] = 1.0; // a1
                       tmpB[i][i*var_offset+5] = 1.0; // b2
                       rsideB[i] = 0.0;
                   }

                   /// 2) match traces on outer edges
                   for(i = 0; i < polyg->n_sides; i++)
                   {
                       p0 = Point_of_polyg(polyg)[i];
                       p1 = Point_of_polyg(polyg)[(i+1)%N_sides];
                       pcrds1[0] = Coords(p0)[0];
                       pcrds1[1] = Coords(p0)[1];

                       pcrds2[0] = Coords(p1)[0];
                       pcrds2[1] = Coords(p1)[1];

                       area[i] = triangle_area_3d(pcrds0, pcrds1, pcrds2);
                       sqrt_area[i] = sqrt_area1 = sqrt(area[i]);
                       for(k = 0; k < 2; k++)
                           piece_cent[i][k] = one_3rd*(pcrds0[k]+ pcrds1[k] + pcrds2[k]);

                       /// TMP
                       // printf("piece[%d] centroid[%13.12g, %13.12g], qcrds[%13.12g, %13.12g]\n", i,
                       //           piece_cent[i][0], piece_cent[i][1],
                       //           qt_crds[i][0][0], qt_crds[i][0][1]);
                       /// END::: TMP
                       
                       dx = 0.5*(pcrds2[0] - pcrds1[0]);
                       dy = 0.5*(pcrds2[1] - pcrds1[1]);
                       x_bar = 0.5*(pcrds2[0] + pcrds1[0]);
                       y_bar = 0.5*(pcrds2[1] + pcrds1[1]);

                       for(k = 0; k < 2; k++)
                           t[k] = polyg_side_vector(polyg)[i][k];
                       nor[0] = t[1]; nor[1] = -t[0];

                       // average of trace
                       tmpB[i+N_sides][i*var_offset  ] = nor[0];
                       tmpB[i+N_sides][i*var_offset+1] = nor[0]*(x_bar-piece_cent[i][0])/sqrt_area1;
                       tmpB[i+N_sides][i*var_offset+2] = nor[0]*(y_bar-piece_cent[i][1])/sqrt_area1;
                       tmpB[i+N_sides][i*var_offset+3] = nor[1];
                       tmpB[i+N_sides][i*var_offset+4] = nor[1]*(x_bar-piece_cent[i][0])/sqrt_area1;
                       tmpB[i+N_sides][i*var_offset+5] = nor[1]*(y_bar-piece_cent[i][1])/sqrt_area1;
                       rsideB[i+N_sides] = Bn[i][0];

                       // slope of trace
                       tmpB[i+2*N_sides][i*var_offset+1] = nor[0]*dx/sqrt_area1;
                       tmpB[i+2*N_sides][i*var_offset+2] = nor[0]*dy/sqrt_area1;
                       tmpB[i+2*N_sides][i*var_offset+4] = nor[1]*dx/sqrt_area1;
                       tmpB[i+2*N_sides][i*var_offset+5] = nor[1]*dy/sqrt_area1;
                       rsideB[i+2*N_sides] = Bn[i][1];
                   }//// END::: 2) match traces on outer edges

                   /// 3) match traces on inner edges
                   for(i = 0; i < polyg->n_sides; i++)
                   {
                       p1 = Point_of_polyg(polyg)[(i+1)%N_sides];
                       pcrds2[0] = Coords(p1)[0];
                       pcrds2[1] = Coords(p1)[1];

                       t[0] = pcrds0[0]-pcrds2[0];
                       t[1] = pcrds0[1]-pcrds2[1];
                       tmp = sqrt(sqr(t[0]) + sqr(t[1]));
                       nor[0] = t[1]/tmp; nor[1] = -t[0]/tmp;

                       // sqrt_area1 = sqrt(area[i]);
                       // sqrt_area2 = sqrt(area[(i+1)%N_sides]);
                       sqrt_area1 = sqrt_area[i];
                       sqrt_area2 = sqrt_area[(i+1)%N_sides];

                       dx = 0.5*(pcrds0[0] - pcrds2[0]);
                       dy = 0.5*(pcrds0[1] - pcrds2[1]);
                       x_bar = 0.5*(pcrds2[0] + pcrds0[0]);
                       y_bar = 0.5*(pcrds2[1] + pcrds0[1]);

                       /// 3.1.a) match slope of trace on inner edge for ith-piece
                       tmpB[i+3*N_sides][i*var_offset+1] = nor[0]*dx/sqrt_area1;
                       tmpB[i+3*N_sides][i*var_offset+2] = nor[0]*dy/sqrt_area1;
                       tmpB[i+3*N_sides][i*var_offset+4] = nor[1]*dx/sqrt_area1;
                       tmpB[i+3*N_sides][i*var_offset+5] = nor[1]*dy/sqrt_area1;

                       /// 3.2.a) match average of trace on inner edge for ith-piece
                       tmpB[i+4*N_sides][i*var_offset  ] = nor[0];
                       tmpB[i+4*N_sides][i*var_offset+1] = nor[0]*(x_bar-piece_cent[i][0])/sqrt_area1;
                       tmpB[i+4*N_sides][i*var_offset+2] = nor[0]*(y_bar-piece_cent[i][1])/sqrt_area1;
                       tmpB[i+4*N_sides][i*var_offset+3] = nor[1];
                       tmpB[i+4*N_sides][i*var_offset+4] = nor[1]*(x_bar-piece_cent[i][0])/sqrt_area1;
                       tmpB[i+4*N_sides][i*var_offset+5] = nor[1]*(y_bar-piece_cent[i][1])/sqrt_area1;

                       nor[0] *= -1.0; nor[1] *= -1.0;
                       dx *= -1.0; dy *= -1.0;
                       /// 3.1.b) match slope of trace on inner edge for (i+1)th-piece
                       tmpB[i+3*N_sides][((i+1)%N_sides)*var_offset+1] = -(nor[0]*dx/sqrt_area2);
                       tmpB[i+3*N_sides][((i+1)%N_sides)*var_offset+2] = -(nor[0]*dy/sqrt_area2);
                       tmpB[i+3*N_sides][((i+1)%N_sides)*var_offset+4] = -(nor[1]*dx/sqrt_area2);
                       tmpB[i+3*N_sides][((i+1)%N_sides)*var_offset+5] = -(nor[1]*dy/sqrt_area2);

                       rsideB[i+3*N_sides] = 0.0;

                       /// 3.2.b) match average of trace on inner edge for (i+1)th-piece
                       tmpB[i+4*N_sides][((i+1)%N_sides)*var_offset  ] = nor[0];
                       tmpB[i+4*N_sides][((i+1)%N_sides)*var_offset+1] = nor[0]*(x_bar-piece_cent[(i+1)%N_sides][0])/sqrt_area2;
                       tmpB[i+4*N_sides][((i+1)%N_sides)*var_offset+2] = nor[0]*(y_bar-piece_cent[(i+1)%N_sides][1])/sqrt_area2;
                       tmpB[i+4*N_sides][((i+1)%N_sides)*var_offset+3] = nor[1];
                       tmpB[i+4*N_sides][((i+1)%N_sides)*var_offset+4] = nor[1]*(x_bar-piece_cent[(i+1)%N_sides][0])/sqrt_area2;
                       tmpB[i+4*N_sides][((i+1)%N_sides)*var_offset+5] = nor[1]*(y_bar-piece_cent[(i+1)%N_sides][1])/sqrt_area2; 

                       rsideB[i+4*N_sides] = 0.0;
                   }/// END::: /// 3) match traces on inner edges

                   /***
                   /// least square part: match B at quadrature points from the tri-grid, old
                   //// old
                   for(i = 0; i < polyg->n_sides; i++)
                   {
                       tmpA[i*2+1][i*var_offset+3] = tmpA[i*2][i*var_offset  ] = 1.0;
                       tmpA[i*2+1][i*var_offset+4] = tmpA[i*2][i*var_offset+1] = 
                                               (qt_crds[i][0][0]-piece_cent[i][0])/sqrt_area[i];
                       tmpA[i*2+1][i*var_offset+5] = tmpA[i*2][i*var_offset+2] = 
                                               (qt_crds[i][0][1]-piece_cent[i][1])/sqrt_area[i];
                       rsideA[i*2] = qB[i][0][0];
                       // tmpA[i*2+1][i*var_offset+3] = 1.0;
                       // tmpA[i*2+1][i*var_offset+4] = (qt_crds[i][0][0]-piece_cent[i][0])/sqrt_area[i];
                       // tmpA[i*2+1][i*var_offset+5] = (qt_crds[i][0][1]-piece_cent[i][1])/sqrt_area[i];
                       rsideA[i*2+1] = qB[i][0][1];
                   }
                   ***/
                   /****
                   // 01-28-2015, comment out working version //03/30/2015 try this for decay_alfven wave again
                   /// NEW: least square part: match B at vertices of triangular subregions.
                   /// This uses:  M = 2*(3*N_sides); /// 12/05/2012
                   for(i = 0; i < polyg->n_sides; i++)
                   {
                       tmpA[i*6+1][i*var_offset+3] = tmpA[i*6][i*var_offset  ] = 1.0;
                       tmpA[i*6+1][i*var_offset+4] = tmpA[i*6][i*var_offset+1] = 
                                                       (qt_crds[i][0][0]-piece_cent[i][0])/sqrt_area[i];
                       tmpA[i*6+1][i*var_offset+5] = tmpA[i*6][i*var_offset+2] = 
                                                       (qt_crds[i][0][1]-piece_cent[i][1])/sqrt_area[i];  
                       rsideA[i*6] = qB[i][0][0];

                       // tmpA[i*6+1][i*var_offset+3] = 1.0;
                       // tmpA[i*6+1][i*var_offset+4] = (qt_crds[i][0][0]-piece_cent[i][0])/sqrt_area[i];
                       // tmpA[i*6+1][i*var_offset+5] = (qt_crds[i][0][1]-piece_cent[i][1])/sqrt_area[i];  
                       rsideA[i*6+1] = qB[i][0][1];

                       tmpA[i*6+3][i*var_offset+3] = tmpA[i*6+2][i*var_offset  ] = 1.0;
                       tmpA[i*6+3][i*var_offset+4] = tmpA[i*6+2][i*var_offset+1] = 
                                                       (qt_crds[i][1][0]-piece_cent[i][0])/sqrt_area[i];
                       tmpA[i*6+3][i*var_offset+5] = tmpA[i*6+2][i*var_offset+2] = 
                                                       (qt_crds[i][1][1]-piece_cent[i][1])/sqrt_area[i];
                       rsideA[i*6+2] = qB[i][1][0];

                       // tmpA[i*6+3][i*var_offset+3] = 1.0;
                       // tmpA[i*6+3][i*var_offset+4] = (qt_crds[i][1][0]-piece_cent[i][0])/sqrt_area[i];
                       // tmpA[i*6+3][i*var_offset+5] = (qt_crds[i][1][1]-piece_cent[i][1])/sqrt_area[i];
                       rsideA[i*6+3] = qB[i][1][1];
                        
                       tmpA[i*6+5][i*var_offset+3] = tmpA[i*6+4][i*var_offset  ] = 1.0;
                       tmpA[i*6+5][i*var_offset+4] = tmpA[i*6+4][i*var_offset+1] = 
                                                         (qt_crds[i][2][0]-piece_cent[i][0])/sqrt_area[i];
                       tmpA[i*6+5][i*var_offset+5] = tmpA[i*6+4][i*var_offset+2] = 
                                                         (qt_crds[i][2][1]-piece_cent[i][1])/sqrt_area[i];
                       rsideA[i*6+4] = qB[i][2][0];

                       // tmpA[i*6+5][i*var_offset+3] = 1.0;
                       // tmpA[i*6+5][i*var_offset+4] = (qt_crds[i][2][0]-piece_cent[i][0])/sqrt_area[i];
                       // tmpA[i*6+5][i*var_offset+5] = (qt_crds[i][2][1]-piece_cent[i][1])/sqrt_area[i];
                       rsideA[i*6+5] = qB[i][2][1];
                   }
                   // END::: 01-28-2015, comment out working version //03/30/2015 try this for decay_alfven wave again
                   *****/

                   /// 01-28-2015:
                   /// This uses:  M = 2*(4*N_sides);
                   for(i = 0; i < polyg->n_sides; i++)
                   {
                       tmpA[i*8+1][i*var_offset+3] = tmpA[i*8][i*var_offset  ] = 1.0;
                       tmpA[i*8+1][i*var_offset+4] = tmpA[i*8][i*var_offset+1] = 
                                                       (qt_crds[i][0][0]-piece_cent[i][0])/sqrt_area[i];
                       tmpA[i*8+1][i*var_offset+5] = tmpA[i*8][i*var_offset+2] = 
                                                       (qt_crds[i][0][1]-piece_cent[i][1])/sqrt_area[i];  
                       rsideA[i*8] = qB[i][0][0];
                       rsideA[i*8+1] = qB[i][0][1];

                       tmpA[i*8+3][i*var_offset+3] = tmpA[i*8+2][i*var_offset  ] = 1.0;
                       tmpA[i*8+3][i*var_offset+4] = tmpA[i*8+2][i*var_offset+1] = 
                                                       (qt_crds[i][1][0]-piece_cent[i][0])/sqrt_area[i];
                       tmpA[i*8+3][i*var_offset+5] = tmpA[i*8+2][i*var_offset+2] = 
                                                       (qt_crds[i][1][1]-piece_cent[i][1])/sqrt_area[i];
                       rsideA[i*8+2] = qB[i][1][0];
                       rsideA[i*8+3] = qB[i][1][1];
                        
                       tmpA[i*8+5][i*var_offset+3] = tmpA[i*8+4][i*var_offset  ] = 1.0;
                       tmpA[i*8+5][i*var_offset+4] = tmpA[i*8+4][i*var_offset+1] = 
                                                         (qt_crds[i][2][0]-piece_cent[i][0])/sqrt_area[i];
                       tmpA[i*8+5][i*var_offset+5] = tmpA[i*8+4][i*var_offset+2] = 
                                                         (qt_crds[i][2][1]-piece_cent[i][1])/sqrt_area[i];
                       rsideA[i*8+4] = qB[i][2][0];
                       rsideA[i*8+5] = qB[i][2][1];

                       tmpA[i*8+7][i*var_offset+3] = tmpA[i*8+6][i*var_offset  ] = 1.0;
                       tmpA[i*8+7][i*var_offset+4] = tmpA[i*8+6][i*var_offset+1] = 
                                                         (qt_crds[i][3][0]-piece_cent[i][0])/sqrt_area[i];
                       tmpA[i*8+7][i*var_offset+5] = tmpA[i*8+6][i*var_offset+2] = 
                                                         (qt_crds[i][3][1]-piece_cent[i][1])/sqrt_area[i];
                       rsideA[i*8+6] = qB[i][3][0];
                       rsideA[i*8+7] = qB[i][3][1];
                   }

                   /**
                    // 04/01/2015. implement using 3 vertices and two midpoints of two edges as
                    // least squares constraints for reconstructing B on partial cells.
                   /// This uses:  M = 2*(5*N_sides);
                   for(i = 0; i < polyg->n_sides; i++)
                   {
                       tmpA[i*10+1][i*var_offset+3] = tmpA[i*10][i*var_offset  ] = 1.0;
                       tmpA[i*10+1][i*var_offset+4] = tmpA[i*10][i*var_offset+1] = 
                                                       (qt_crds[i][0][0]-piece_cent[i][0])/sqrt_area[i];
                       tmpA[i*10+1][i*var_offset+5] = tmpA[i*10][i*var_offset+2] = 
                                                       (qt_crds[i][0][1]-piece_cent[i][1])/sqrt_area[i];  
                       rsideA[i*10] = qB[i][0][0];
                       rsideA[i*10+1] = qB[i][0][1];

                       tmpA[i*10+3][i*var_offset+3] = tmpA[i*10+2][i*var_offset  ] = 1.0;
                       tmpA[i*10+3][i*var_offset+4] = tmpA[i*10+2][i*var_offset+1] = 
                                                       (qt_crds[i][1][0]-piece_cent[i][0])/sqrt_area[i];
                       tmpA[i*10+3][i*var_offset+5] = tmpA[i*10+2][i*var_offset+2] = 
                                                       (qt_crds[i][1][1]-piece_cent[i][1])/sqrt_area[i];
                       rsideA[i*10+2] = qB[i][1][0];
                       rsideA[i*10+3] = qB[i][1][1];
                        
                       tmpA[i*10+5][i*var_offset+3] = tmpA[i*10+4][i*var_offset  ] = 1.0;
                       tmpA[i*10+5][i*var_offset+4] = tmpA[i*10+4][i*var_offset+1] = 
                                                         (qt_crds[i][2][0]-piece_cent[i][0])/sqrt_area[i];
                       tmpA[i*10+5][i*var_offset+5] = tmpA[i*10+4][i*var_offset+2] = 
                                                         (qt_crds[i][2][1]-piece_cent[i][1])/sqrt_area[i];
                       rsideA[i*10+4] = qB[i][2][0];
                       rsideA[i*10+5] = qB[i][2][1];

                       tmpA[i*10+7][i*var_offset+3] = tmpA[i*10+6][i*var_offset  ] = 1.0;
                       tmpA[i*10+7][i*var_offset+4] = tmpA[i*10+6][i*var_offset+1] = 
                                                         (qt_crds[i][3][0]-piece_cent[i][0])/sqrt_area[i];
                       tmpA[i*10+7][i*var_offset+5] = tmpA[i*10+6][i*var_offset+2] = 
                                                         (qt_crds[i][3][1]-piece_cent[i][1])/sqrt_area[i];
                       rsideA[i*10+6] = qB[i][3][0];
                       rsideA[i*10+7] = qB[i][3][1];

                       tmpA[i*10+9][i*var_offset+3] = tmpA[i*10+8][i*var_offset  ] = 1.0;
                       tmpA[i*10+9][i*var_offset+4] = tmpA[i*10+8][i*var_offset+1] = 
                                                         (qt_crds[i][4][0]-piece_cent[i][0])/sqrt_area[i];
                       tmpA[i*10+9][i*var_offset+5] = tmpA[i*10+8][i*var_offset+2] = 
                                                         (qt_crds[i][4][1]-piece_cent[i][1])/sqrt_area[i];
                       rsideA[i*10+8] = qB[i][4][0];
                       rsideA[i*10+9] = qB[i][4][1];
                   }
                   END::: 04/01/2015 **/                               

                   /***  12/06/2012: This DOES NOT give full rank system to solve.
                   /// least square part: match B at  2 vertices of the triangular- subregions.
                   for(i = 0; i < polyg->n_sides; i++)
                   {
                       tmpA[i*4+1][i*var_offset+3] = tmpA[i*4][i*var_offset  ] = 1.0;
                       tmpA[i*4+1][i*var_offset+4] = tmpA[i*4][i*var_offset+1] =
                                                     (qt_crds[i][1][0]-piece_cent[i][0])/sqrt_area[i];
                       tmpA[i*4+1][i*var_offset+5] = tmpA[i*4][i*var_offset+2] =
                                                     (qt_crds[i][1][1]-piece_cent[i][1])/sqrt_area[i];
                       rsideA[i*4] = qB[i][1][0];
                       rsideA[i*4+1] = qB[i][1][1];

                       tmpA[i*4+3][i*var_offset+3] = tmpA[i*4+2][i*var_offset  ] = 1.0;
                       tmpA[i*4+3][i*var_offset+4] = tmpA[i*4+2][i*var_offset+1] = 
                                                         (qt_crds[i][2][0]-piece_cent[i][0])/sqrt_area[i];
                       tmpA[i*4+3][i*var_offset+5] = tmpA[i*4+2][i*var_offset+2] = 
                                                         (qt_crds[i][2][1]-piece_cent[i][1])/sqrt_area[i];
                       rsideA[i*4+2] = qB[i][2][0];
                       rsideA[i*4+3] = qB[i][2][1];
                   }
                   ***/
               } /// END::: if(dual_cell_sten_2[polyg->id].Bsten_set == NO)
               else
               {
                   tmpA = dual_cell_sten_2[polyg->id].MB_A;
                   tmpB = dual_cell_sten_2[polyg->id].MB_B;

                   /// 1) divergence-free
                   for(i = 0; i < polyg->n_sides; i++)
                       rsideB[i] = 0.0;

                   /// 2) match traces on outer edges
                   /// 3) match traces on inner edges
                   for(i = 0; i < polyg->n_sides; i++)
                   {
                       // average of trace, outer edge
                       rsideB[i+N_sides] = Bn[i][0];
                       // slope of trace, outer edge
                       rsideB[i+2*N_sides] = Bn[i][1];

                       // slope of trace, inner edge
                       rsideB[i+3*N_sides] = 0.0;
                       // average of trace, inner edge
                       rsideB[i+4*N_sides] = 0.0;
                   }

                   /***
                   // 01-28-2015, comment our working version //03/30/2015 try this for decay_alfven wave again
                   /// least square part: match B at vertices of triangular subregions.
                   /// This uses:  M = 2*(3*N_sides);
                   for(i = 0; i < polyg->n_sides; i++)
                   {
                       rsideA[i*6]   = qB[i][0][0];
                       rsideA[i*6+1] = qB[i][0][1];
                    
                       rsideA[i*6+2] = qB[i][1][0];
                       rsideA[i*6+3] = qB[i][1][1];

                       rsideA[i*6+4] = qB[i][2][0];
                       rsideA[i*6+5] = qB[i][2][1];
                   }
                   // END::: 01-28-2015, comment our working version // 03/30/2015 try this for decay_alfven wave again
                   ****/

                   /// This uses:  M = 2*(4*N_sides);
                   /// 01-28-2015: use:::
                   for(i = 0; i < polyg->n_sides; i++)
                   {
                       rsideA[i*8]   = qB[i][0][0];
                       rsideA[i*8+1] = qB[i][0][1];
                    
                       rsideA[i*8+2] = qB[i][1][0];
                       rsideA[i*8+3] = qB[i][1][1];

                       rsideA[i*8+4] = qB[i][2][0];
                       rsideA[i*8+5] = qB[i][2][1];

                       rsideA[i*8+6] = qB[i][3][0];
                       rsideA[i*8+7] = qB[i][3][1];
                   }

                   /**
                    // 04/01/2015. implement using 3 vertices and two midpoints of two edges as
                    // least squares constraints for reconstructing B on partial cells.
                   /// This uses:  M = 2*(5*N_sides);
                   for(i = 0; i < polyg->n_sides; i++)
                   {
                       rsideA[i*10]   = qB[i][0][0];
                       rsideA[i*10+1] = qB[i][0][1];
                    
                       rsideA[i*10+2] = qB[i][1][0];
                       rsideA[i*10+3] = qB[i][1][1];

                       rsideA[i*10+4] = qB[i][2][0];
                       rsideA[i*10+5] = qB[i][2][1];

                       rsideA[i*10+6] = qB[i][3][0];
                       rsideA[i*10+7] = qB[i][3][1];

                       rsideA[i*10+8] = qB[i][4][0];
                       rsideA[i*10+9] = qB[i][4][1];
                   }
                   END::: 04/01/2015 ***/

                   /***
                   /// least square part: match B at vertices of dual cell edges
                   /// OLD
                   for(i = 0; i < polyg->n_sides; i++)
                   {
                       rsideA[i*4] = qB[i][1][0];
                       rsideA[i*4+1] = qB[i][1][1];

                       rsideA[i*4+2] = qB[i][2][0];
                       rsideA[i*4+3] = qB[i][2][1];
                   }
                   ***/
               }

               /// TMP
               /**
               if(debug == YES)
               {
                   printf("\n\nM(least square) = %d, N = %d, P(constraints) = %d\n", M, N, P); 
                   print_matrix("tmpB", P, N, tmpB, "%g ");
                   print_matrix("tmpA", M, N, tmpA, "%g "); 
                   print_general_vector("RHS_B", rsideB, P,"\n");
                   print_general_vector("RHS_A", rsideA, M,"\n");
               }
               **/
               /// END: TMP

               LDA = M; LDB = P; LWORK = M+N+P;

               l = 0;
               for(j = 0; j < N; j++)
               {
                   for(i = 0; i < M; i++)
                   {
                       AA[l] = tmpA[i][j];
                       l++;
                   }
               }
               l = 0;
               for(j = 0; j < N; j++)
               {
                   for(i = 0; i < P; i++)
                   {
                       BB[l] = tmpB[i][j];
                       l++;
                   }
               }
               for(i = 0; i < M; i++)  // right side for the least square part
                   CC[i] = rsideA[i];
               for(i = 0; i < P; i++)  // right side for the constrained part
                   DD[i] = rsideB[i];

               FORTRAN_NAME(dgglse)(&M, &N, &P, AA, &LDA, ///// int M,int N,int P, double array A, int LDA
                                  BB, &LDB, CC, DD, XX, ///// double array B, int LDB, double array C, D, X,
                                  work, &LWORK, &INFO);
               if(INFO != 0)
               {
                   printf("WARNING: Construct_Mag_field_on_dual_cell_P1_DG()\n");
                   printf("dgglse returns %d\n", INFO);
               }

               /***
               if(debug == YES)
               {
                   printf("M = %d, N = %d, P = %d\n", M, N, P); 
                   for(i = 0; i < polyg->n_sides; i++)
                   {
                       printf("\n\n-------- Tri-Piece [%d] Reconstructed B for %d:\n", i, polyg->id);
                       printf("x-component = [%13.12g, %13.12g, %13.12g]\n", 
                                XX[i*var_offset], XX[i*var_offset+1], XX[i*var_offset+2]);
                       printf("y-component = [%13.12g, %13.12g, %13.12g]\n", 
                                XX[i*var_offset+3], XX[i*var_offset+4], XX[i*var_offset+5]);

                       // compute trace at outer edge 
                       tmp = tmpB[i+N_sides][i*var_offset]*XX[i*var_offset] + 
                             tmpB[i+N_sides][i*var_offset+1]*XX[i*var_offset+1] +
                             tmpB[i+N_sides][i*var_offset+2]*XX[i*var_offset+2] +
                             tmpB[i+N_sides][i*var_offset+3]*XX[i*var_offset+3] +
                             tmpB[i+N_sides][i*var_offset+4]*XX[i*var_offset+4] +
                             tmpB[i+N_sides][i*var_offset+5]*XX[i*var_offset+5];
                       printf("Average Bn: Trace = %13.12g, org = %13.12g\n", tmp, Bn[i][0]);

                       tmpslope = tmpB[i+2*N_sides][i*var_offset+1]*XX[i*var_offset+1] +
                                  tmpB[i+2*N_sides][i*var_offset+2]*XX[i*var_offset+2] +
                                  tmpB[i+2*N_sides][i*var_offset+4]*XX[i*var_offset+4] +
                                  tmpB[i+2*N_sides][i*var_offset+5]*XX[i*var_offset+5];
                       printf("Slope   Bn: Trace = %13.12g, org = %13.12g\n", tmpslope, Bn[i][1]);

                       printf("-----------------------------------\n\n");
                       // break;
                   }
                   // printf("WARNING: exit Construct_Mag_field_on_dual_cell_P1_DG()\n");
                   // clean_up(0);
               }
               ***/

               if(rk_step == RK_STEP)
               {
                   for(i = 0; i < polyg->n_sides; i++)
                   {
                       for(k = 0; k < MAX_N_COEF; k++)
                           polyg_sub_reg_dgB(polyg)[i][0][k] = XX[i*var_offset+k];
                       for(k = 0; k < MAX_N_COEF; k++)
                           polyg_sub_reg_dgB(polyg)[i][1][k] = XX[i*var_offset+k+3];
                   }
               }
               else
               {
                   for(i = 0; i < polyg->n_sides; i++)
                   {
                       for(k = 0; k < MAX_N_COEF; k++)
                           dual_cell_midsoln[polyg->id].sub_reg_dgB[rk_step][i][0][k] = XX[i*var_offset+k];
                       for(k = 0; k < MAX_N_COEF; k++)
                           dual_cell_midsoln[polyg->id].sub_reg_dgB[rk_step][i][1][k] = XX[i*var_offset+k+3];
                   }
               }

               // check_dual_cell_face_B_match_edge(polyg, dual_cell_midsoln, rk_step);
               // printf("WARNING: Stop in Construct_Mag_field_on_dual_cell_P1_DG()\n");
               // clean_up(0);
            } // END::: for (polyg = first_polyg(*surf); ... )
        } // END::: for(surf = fr->mesh->surfaces; surf && *surf; surf++)

        // printf("WARNING: Stop in Construct_Mag_field_on_dual_cell_P1_DG()\n");
        // clean_up(0);
}

//// THIS function is for reducing normal of B on interior edge of dual cell only.
EXPORT void Reduce_interior_Bn_variation_dual_cell_P1_DG(
        POLYGON            *polyg,
        Front              *fr, 
        Mid_soln           *midsoln,
        Dual_cell_Mid_soln *dual_cell_midsoln,
        int                rk_step)
{
        int                i, side, N_sides, k;
        double             **piece_cent = polyg->_piece_cent;
        double             Bn[3], t[3], nor[MAX_N_POLY_SIDE][3][3], len;
        double             pcrds[3][3];
        POINT              **polyg_pts = NULL;
        double             sub_B[2][3], Bx[2], By[2]; 
        double             *sqrt_area = polyg->_piece_sqrt_area;
        double             Moment[MAX_N_POLY_SIDE][3][2]; // [#sub-region][3 edges][#moments]
        double             compress_factor = 0.6;
        static double      **A = NULL;
        double             dx, dy, x_bar, y_bar, rsideA[10], XX[MAX_N_POLY_SIDE][10];
        double             neg_p_qB[20][5][2], neg_p_qt_crds[20][5][3];
        int                N_neg_p, N_other, neg_p_sect[20], other_sect[20];
        double             qB_2[20][5][2], qt_crds_2[20][5][3];
        int                N_fix_iteration = 0;
        DUAL_CELL_PAIR    *dual_pairs;

        if(A == NULL)
        {
            matrix(&(A), 6, 6, sizeof(double));

            for(i = 0; i < 6; i++)
            {
                for(k = 0; k < 6; k++)
                    A[i][k] = 0.0;
            }
        }
        
        N_sides = polyg->n_sides;
        polyg_pts = Point_of_polyg(polyg);

        pcrds[1][0] = polyg_centroid(polyg)[0];
        pcrds[1][1] = polyg_centroid(polyg)[1];
 
        do{
            N_fix_iteration++;
            for(i = 0; i < N_sides; i++)
            {
                if(rk_step == RK_STEP)
                {
                    for(k = 0; k < MAX_N_COEF; k++)
                        sub_B[0][k] = polyg_sub_reg_dgB(polyg)[i][0][k];
                    for(k = 0; k < MAX_N_COEF; k++)
                        sub_B[1][k] = polyg_sub_reg_dgB(polyg)[i][1][k];
                }
                else
                {
                    for(k = 0; k < MAX_N_COEF; k++)
                        sub_B[0][k] = dual_cell_midsoln[polyg->id].sub_reg_dgB[rk_step][i][0][k];
                    for(k = 0; k < MAX_N_COEF; k++)
                        sub_B[1][k] = dual_cell_midsoln[polyg->id].sub_reg_dgB[rk_step][i][1][k];
                }

                pcrds[2][0] = Coords(polyg_pts[i])[0];         
                pcrds[2][1] = Coords(polyg_pts[i])[1];         
    
                pcrds[0][0] = Coords(polyg_pts[(i+1)%N_sides])[0];         
                pcrds[0][1] = Coords(polyg_pts[(i+1)%N_sides])[1];         

                /// only adjust interior edges
                for(side = 0; side < 2; side++)
                {
                    t[0] =  pcrds[(side+1)%3][0] - pcrds[side][0];
                    t[1] =  pcrds[(side+1)%3][1] - pcrds[side][1];
 
                    len = sqr(t[0]) + sqr(t[1]);
                    len = sqrt(len);

                    nor[i][side][0] = t[1]/len;
                    nor[i][side][1] = -t[0]/len;

                    Bx[0] = sub_B[0][0] + sub_B[0][1]*( 0.5*(pcrds[(side+1)%3][0] + pcrds[side][0]) - piece_cent[i][0])/sqrt_area[i] +
                            sub_B[0][2]*( 0.5*(pcrds[(side+1)%3][1] + pcrds[side][1]) - piece_cent[i][1])/sqrt_area[i];
                    Bx[1] = 0.5*(sub_B[0][1]*(pcrds[(side+1)%3][0] - pcrds[side][0]) + 
                                 sub_B[0][2]*(pcrds[(side+1)%3][1] - pcrds[side][1]) )/sqrt_area[i];
                    ////////////////
                    By[0] = sub_B[1][0] + sub_B[1][1]*( 0.5*(pcrds[(side+1)%3][0] + pcrds[side][0]) - piece_cent[i][0])/sqrt_area[i] +
                            sub_B[1][2]*( 0.5*(pcrds[(side+1)%3][1] + pcrds[side][1]) - piece_cent[i][1])/sqrt_area[i];
                    By[1] = 0.5*(sub_B[1][1]*(pcrds[(side+1)%3][0] - pcrds[side][0]) + 
                                 sub_B[1][2]*(pcrds[(side+1)%3][1] - pcrds[side][1]) )/sqrt_area[i];

                    for(k = 0; k < N_COEF_EDGE; k++)
                        Moment[i][side][k] = Bx[k]*nor[i][side][0] + By[k]*nor[i][side][1]; 

                    Moment[i][side][1] *= compress_factor;
                } /// END::: for(side = 0; side < 2; side++)

                for(k = 0; k < 2; k++)
                    t[k] = polyg_side_vector(polyg)[i][k];
                nor[i][2][0] = t[1];
                nor[i][2][1] = -t[0];
                if(rk_step == RK_STEP)
                {
                    for(k = 0; k < N_COEF_EDGE; k++)
                        Moment[i][2][k] = polyg_side_dgB(polyg)[i][k];
                }
                else
                {
                    for(k = 0; k < N_COEF_EDGE; k++)
                        Moment[i][2][k] = dual_cell_midsoln[polyg->id].edge_dgBn[rk_step][i][k];
                }

                /// NOW, reconstruct B in the sub-region
                for(side = 0; side < 3; side++)
                {
                    dx = 0.5*(pcrds[(side+1)%3][0] - pcrds[side][0]);
                    dy = 0.5*(pcrds[(side+1)%3][1] - pcrds[side][1]);
                    x_bar = 0.5*(pcrds[(side+1)%3][0] + pcrds[side][0]);
                    y_bar = 0.5*(pcrds[(side+1)%3][1] + pcrds[side][1]);

                    /// match slope
                    A[side][1] = nor[i][side][0]*dx/sqrt_area[i]; 
                    A[side][2] = nor[i][side][0]*dy/sqrt_area[i];
                    A[side][4] = nor[i][side][1]*dx/sqrt_area[i];
                    A[side][5] = nor[i][side][1]*dy/sqrt_area[i];
                    rsideA[side] = Moment[i][side][1];
                    /// match average
                    A[side+3][0] = nor[i][side][0];  // a0
                    A[side+3][1] = nor[i][side][0]*(x_bar - piece_cent[i][0])/sqrt_area[i]; // a1
                    A[side+3][2] = nor[i][side][0]*(y_bar - piece_cent[i][1])/sqrt_area[i]; // a2
                    A[side+3][3] = nor[i][side][1];  // b0 
                    A[side+3][4] = nor[i][side][1]*(x_bar - piece_cent[i][0])/sqrt_area[i]; // b1
                    A[side+3][5] = nor[i][side][1]*(y_bar - piece_cent[i][1])/sqrt_area[i]; // b2
                    rsideA[side+3] = Moment[i][side][0];
                }
                /// div-free condition
                for(k = 0; k < 5; k++)
                    A[5][k] = 0.0;
                A[5][1] = 1.0;
                A[5][5] = 1.0; 
                rsideA[5] = 0.0;

                solve_by_gj(A,6,rsideA,XX[i]);
                if(rk_step == RK_STEP)
                {
                    for(k = 0; k < MAX_N_COEF; k++)
                        polyg_sub_reg_dgB(polyg)[i][0][k] = XX[i][k];
                    for(k = 0; k < MAX_N_COEF; k++)
                        polyg_sub_reg_dgB(polyg)[i][1][k] = XX[i][k+3];
                }
                else
                {
                    for(k = 0; k < MAX_N_COEF; k++)
                        dual_cell_midsoln[polyg->id].sub_reg_dgB[rk_step][i][0][k] = XX[i][k];
                    for(k = 0; k < MAX_N_COEF; k++)
                        dual_cell_midsoln[polyg->id].sub_reg_dgB[rk_step][i][1][k] = XX[i][k+3];
                }

            }/// END: for(i = 0; i < N_sides; i++)

            //// TMP
            /*****
            for(i = 0; i < N_sides; i++)
            {
                double tmp_moments[3][2];
                printf("\n--- sub-region(%d):\n", i);
                printf("old-edge-moments (%g, %g), (%g, %g), (%g, %g)\n", Moment[i][0][0],  Moment[i][0][1],
                    Moment[i][1][0],  Moment[i][1][1], Moment[i][2][0],  Moment[i][2][1]);


                pcrds[2][0] = Coords(polyg_pts[i])[0];
                pcrds[2][1] = Coords(polyg_pts[i])[1];

                pcrds[0][0] = Coords(polyg_pts[(i+1)%N_sides])[0];
                pcrds[0][1] = Coords(polyg_pts[(i+1)%N_sides])[1];

                for(side = 0; side < 3; side++)
                {
                    dx = 0.5*(pcrds[(side+1)%3][0] - pcrds[side][0]);
                    dy = 0.5*(pcrds[(side+1)%3][1] - pcrds[side][1]);
                    x_bar = 0.5*(pcrds[(side+1)%3][0] + pcrds[side][0]);
                    y_bar = 0.5*(pcrds[(side+1)%3][1] + pcrds[side][1]);

                    /// match slope
                    A[side][1] = nor[i][side][0]*dx/sqrt_area[i];
                    A[side][2] = nor[i][side][0]*dy/sqrt_area[i];
                    A[side][4] = nor[i][side][1]*dx/sqrt_area[i];
                    A[side][5] = nor[i][side][1]*dy/sqrt_area[i];
                    /// match average
                    A[side+3][0] = nor[i][side][0];  // a0
                    A[side+3][1] = nor[i][side][0]*(x_bar - piece_cent[i][0])/sqrt_area[i]; // a1
                    A[side+3][2] = nor[i][side][0]*(y_bar - piece_cent[i][1])/sqrt_area[i]; // a2
                    A[side+3][3] = nor[i][side][1];  // b0 
                    A[side+3][4] = nor[i][side][1]*(x_bar - piece_cent[i][0])/sqrt_area[i]; // b1
                    A[side+3][5] = nor[i][side][1]*(y_bar - piece_cent[i][1])/sqrt_area[i]; // b2

                    tmp_moments[side][0] = 0.0;
                    tmp_moments[side][1] = 0.0;
                    for(k = 0; k < 6; k++)
                        tmp_moments[side][0] += A[side+3][k]*XX[i][k]; 
                    for(k = 0; k < 6; k++)
                        tmp_moments[side][1] += A[side][k]*XX[i][k]; 
                }
                printf("new-edge-moments (%g, %g), (%g, %g), (%g, %g)\n\n",
                         tmp_moments[0][0],  tmp_moments[0][1],
                    tmp_moments[1][0],  tmp_moments[1][1], tmp_moments[2][0],  tmp_moments[2][1]);
            }
            ****/ 
            ///// END: TMP
            // printf("WARNING: Reduce_interior_Bn_variation_dual_cell_P1_DG(), exit\n");
            // clean_up(0);

            Collect_by_press_at_tri_quadrature_on_dual_cell_Gauss_N2(fr, polyg,
               midsoln, dual_cell_midsoln, rk_step, neg_p_qB, neg_p_qt_crds,&N_neg_p,neg_p_sect,
               qB_2, qt_crds_2, &N_other,other_sect);

        }while(N_neg_p != 0 && N_fix_iteration < 40); 

        if(N_neg_p != 0)
        {
            // printf("WARNING: Reduce_interior_Bn_variation_dual_cell_P1_DG(), found negative pressure\n");
            // printf("N_neg_p = %d\n", N_neg_p);
            // clean_up(ERROR);
        }
}

//// THIS function is a clone of Construct_Mag_field_on_dual_cell_P1_DG()
//// It takes  a single dual cell and do B reconstruction.
EXPORT void Build_Mag_field_on_individual_dual_cell_P1_DG_ver2(
        POLYGON            *polyg,
        Front              *fr,
        Mid_soln           *midsoln,
        Dual_cell_Mid_soln *dual_cell_midsoln,
        int                rk_step)
{
        POLYGON    *tmpg;
        double     x_crds[20], y_crds[20], *cent;
        int        i, j, k, By_offset, tmpk, N_sides, tmp_P, tmp_M;
        POINT      *p0, *p1, *pc;
        double     phi0[20][3], phi1[20][3], phi2[20][3]; 
        double     rsideB[80], rsideA[80];
        double     len, t[3], nor[3], Bn[20][N_COEF_EDGE];
        int        M, N, P, LDA, LDB, l, LWORK, INFO;
        double     AA[2800], BB[2800], CC[800], DD[800], XX[800], work[15000], tmp,tmpslope;
        double     B_cent[3];
        int        debug = NO, var_offset = 6;
        double     val[45], area[20], sqrt_area[20], sqrt_area1, area2, sqrt_area2, *tri_cent, vertB[3];
        double     ***conformal_basis_tri, econ_u[10];
        double     **DG_basis_tri;
        double     dx, dy, x_bar, y_bar, one_3rd, two_3rd;
        TRI        **tris;
        Locstate   sts[20];
        static double  **outputB, **outputA = NULL;
        double     pcrds0[3], pcrds1[3], pcrds2[3], piece_cent[20][3], con_u[10];
        double     qB[20][6][2], qt_crds[20][6][3], tmp_sum_Bn, tmpBx, tmpBy;
        double     qB_2[20][5][2], qt_crds_2[20][5][3]; // save B value at the tri-mesh cell interface with
                                                        // lower pressure
        double             neg_p_qB[20][5][2], neg_p_qt_crds[20][5][3];
        int                N_neg_p, N_other, neg_p_sect[20], other_sect[20];
       
        static double **tmpA = NULL, **tmpB;

        // printf("\n\n----Dual %d enter  Build_Mag_field_on_individual_dual_cell_P1_DG_ver2()\n",polyg->id);

        one_3rd = 1.0/3.0; two_3rd = 2.0/3.0;

        if(NULL == tmpA)
        {
            /// M: number of least square eqn, from input argument
            /// N: number of unknown;
            /// P: number of constraint eqns, which are satisfied exactly.

            // N: each tri-piece has two linear polynomials for x- and y-component of B(x,y) 
            //    respectively. Each linear polynomial has 3 unknowns.
            N = 6*MAX_N_POLY_SIDE;

            // 4n (match trace avg and slope exactly) + n divergence-free condition - 1 (avg of trace at inner edge). 
            P = 2*MAX_N_POLY_SIDE + 2*MAX_N_POLY_SIDE + 2*MAX_N_POLY_SIDE + MAX_N_POLY_SIDE;

            // M: choose x- and y-component of B values at quadrature points on edges of the tri-grid.
            // This leads to some linear depedency between constraint and least-square eqns.
            // M = 2*N_sides;  

            // M: choose x- and y-component of B values at 3 vertices of the triangular- subregions.
            //    + x- and y-component of B values at quadrature pt of the overlay tri-cell where flux is computed.
            M = 2*(3*MAX_N_POLY_SIDE) + 2*MAX_N_POLY_SIDE;

            /**
            // 04/01/2015. implement using 3 vertices and two midpoints of two edges as
            // least squares constraints for reconstructing B on partial cells.
 
            // M: choose x- and y-component of B values at 3 vertices of the triangular- subregions.
            //    + x- and y-component of B values at midpoints of edges of partial tri of dual cell.
            M = 2*(4*MAX_N_POLY_SIDE) + 2*MAX_N_POLY_SIDE;
            END::: 04/01/2015. **/

            matrix(&tmpA, M, N,sizeof(double));
            // matrix(&tmpB, P+1, N,sizeof(double)); 
            matrix(&tmpB, (2*MAX_N_POLY_SIDE + 2*MAX_N_POLY_SIDE + MAX_N_POLY_SIDE), N,sizeof(double));
        }

        pcrds0[2] = pcrds1[2] = pcrds2[2] = 0.0;

        Bn_at_dual_cell_edges(polyg,midsoln,dual_cell_midsoln,rk_step,Bn);

        // part 1) quadrature pts at edge of tri-meshes where flux is calculated.
        // Collect_by_press_at_tri_quadrature_on_dual_cell_Gauss_N2(fr, polyg,
        //        midsoln, dual_cell_midsoln, rk_step, neg_p_qB, neg_p_qt_crds,&N_neg_p,neg_p_sect,
        //        qB_2, qt_crds_2, &N_other,other_sect);
         
        cent = polyg_centroid(polyg);
        pcrds0[0] = cent[0];
        pcrds0[1] = cent[1];
        // By_offset: offset for where eqns for Y-component of B starts.
        By_offset = polyg->n_sides+1;
        N_sides = polyg->n_sides;

        /// M: number of least square eqn, from input argument
        /// N: number of unknown;
        /// P: number of constraint eqns, which are satisfied exactly.

        // N: each tri-piece has two linear polynomials for x- and y-component of B(x,y) 
        //    respectively. Each linear polynomial has 3 unknowns.
        N = 6*N_sides; 

        // P: 2*(n inner edges + n outer edges) + n -1 = 5n -1. 
        // 4n (match trace avg and slope exactly) + n divergence-free condition - 1 (avg of trace at inner edge).
        P = 2*N_sides + 2*N_sides + N_sides - 1;
        // P = 2*N_sides + N_sides;

        /** 01-28-2015. comment out working version
        // M: choose x- and y-component of B values at 3 vertices of triangular subregion.
        M = 2*(3*N_sides);

        // part 2) quadrature pts where we do least-square fitting for magnetic field.
        for(i = 0; i < polyg->n_sides; i++)
        {
            p0 = Point_of_polyg(polyg)[i];
            p1 = Point_of_polyg(polyg)[(i+1)%N_sides];

            qt_crds[i][0][0] = pcrds0[0];     // x-coord of centroid of dual cell
            qt_crds[i][0][1] = pcrds0[1];     // y-coord of centroid of dual cell
            qt_crds[i][1][0] = Coords(p0)[0];
            qt_crds[i][1][1] = Coords(p0)[1];
            qt_crds[i][2][0] = Coords(p1)[0];
            qt_crds[i][2][1] = Coords(p1)[1];
        }
        quadrature_Mag_at_tri_for_P1(polyg,fr,midsoln,rk_step,qt_crds,qB);
        01-28-2015. comment out working version **/

        // 01-28-2015. Use following quadratures: 
        // Take 2 vertices of the partial cell, which
        // are centroids of tris. And 2 midpoints on edges connecting
        // vertices(centroids of tris) and centroid of polygon.
        // M: choose x- and y-component of B values at 4 points of the triangular- subregions.
        M = 2*(4*N_sides);
        for(i = 0; i < polyg->n_sides; i++)
        {
            p0 = Point_of_polyg(polyg)[i];
            p1 = Point_of_polyg(polyg)[(i+1)%N_sides];

            qt_crds[i][0][0] = (pcrds0[0]+Coords(p0)[0])*0.5;
            qt_crds[i][0][1] = (pcrds0[1]+Coords(p0)[1])*0.5;
            qt_crds[i][1][0] = Coords(p0)[0];
            qt_crds[i][1][1] = Coords(p0)[1];

            qt_crds[i][2][0] = (pcrds0[0]+Coords(p1)[0])*0.5;
            qt_crds[i][2][1] = (pcrds0[1]+Coords(p1)[1])*0.5;
            qt_crds[i][3][0] = Coords(p1)[0];
            qt_crds[i][3][1] = Coords(p1)[1];
        }
        quadrature_Mag_at_tri_for_P1_ver2(polyg,fr,midsoln,rk_step,qt_crds,qB);
        /* END:: 01-28-2015. Use following quadratures: */

        /**
        // 04/01/2015. implement using 3 vertices and two midpoints of two edges as
        // least squares constraints for reconstructing B on partial cells.
        M = 2*(5*N_sides);
        for(i = 0; i < polyg->n_sides; i++)
        {
            p0 = Point_of_polyg(polyg)[i];
            p1 = Point_of_polyg(polyg)[(i+1)%N_sides];

            qt_crds[i][0][0] = pcrds0[0];
            qt_crds[i][0][1] = pcrds0[1];
            qt_crds[i][1][0] = (pcrds0[0]+Coords(p0)[0])*0.5;
            qt_crds[i][1][1] = (pcrds0[1]+Coords(p0)[1])*0.5;
            qt_crds[i][2][0] = Coords(p0)[0];
            qt_crds[i][2][1] = Coords(p0)[1];

            qt_crds[i][3][0] = (pcrds0[0]+Coords(p1)[0])*0.5;
            qt_crds[i][3][1] = (pcrds0[1]+Coords(p1)[1])*0.5;
            qt_crds[i][4][0] = Coords(p1)[0];
            qt_crds[i][4][1] = Coords(p1)[1];
        }
        quadrature_Mag_at_tri_for_P1_ver3_5pts(polyg,fr,midsoln,rk_step,qt_crds,qB);
        **/

        // END::: quadrature pts where we do least-square fitting for magnetic field.

        for(i = 0; i < M; i++)
        {
            for(j = 0; j < N; j++)
                tmpA[i][j] = 0.0;
        }
        for(i = 0; i < P; i++)
        {
            for(j = 0; j < N; j++)
                tmpB[i][j] = 0.0;
        }

        /// 1) divergence-free
        for(i = 0; i < polyg->n_sides; i++)
        {
            tmpB[i][i*var_offset+1] = 1.0; // a1
            tmpB[i][i*var_offset+5] = 1.0; // b2
            rsideB[i] = 0.0;
        }
   
        /// 2) match traces on outer edges
        for(i = 0; i < polyg->n_sides; i++)
        {
            p0 = Point_of_polyg(polyg)[i];
            p1 = Point_of_polyg(polyg)[(i+1)%N_sides];
            pcrds1[0] = Coords(p0)[0];
            pcrds1[1] = Coords(p0)[1];

            pcrds2[0] = Coords(p1)[0];
            pcrds2[1] = Coords(p1)[1];

            // area[i] = triangle_area_3d(pcrds0, pcrds1, pcrds2);
            sqrt_area[i] = sqrt_area1 = polyg->_piece_sqrt_area[i];
            for(k = 0; k < 2; k++)
                piece_cent[i][k] = polyg->_piece_cent[i][k];
                // piece_cent[i][k] = one_3rd*(pcrds0[k]+ pcrds1[k] + pcrds2[k]);

            dx = 0.5*(pcrds2[0] - pcrds1[0]);
            dy = 0.5*(pcrds2[1] - pcrds1[1]);
            x_bar = 0.5*(pcrds2[0] + pcrds1[0]);
            y_bar = 0.5*(pcrds2[1] + pcrds1[1]);

            for(k = 0; k < 2; k++)
                t[k] = polyg_side_vector(polyg)[i][k];
            nor[0] = t[1]; nor[1] = -t[0];

            // average of trace
            tmpB[i+N_sides][i*var_offset  ] = nor[0];
            tmpB[i+N_sides][i*var_offset+1] = nor[0]*(x_bar-piece_cent[i][0])/sqrt_area1;
            tmpB[i+N_sides][i*var_offset+2] = nor[0]*(y_bar-piece_cent[i][1])/sqrt_area1;
            tmpB[i+N_sides][i*var_offset+3] = nor[1];
            tmpB[i+N_sides][i*var_offset+4] = nor[1]*(x_bar-piece_cent[i][0])/sqrt_area1;
            tmpB[i+N_sides][i*var_offset+5] = nor[1]*(y_bar-piece_cent[i][1])/sqrt_area1;
            rsideB[i+N_sides] = Bn[i][0];

            // slope of trace
            tmpB[i+2*N_sides][i*var_offset+1] = nor[0]*dx/sqrt_area1;
            tmpB[i+2*N_sides][i*var_offset+2] = nor[0]*dy/sqrt_area1;
            tmpB[i+2*N_sides][i*var_offset+4] = nor[1]*dx/sqrt_area1;
            tmpB[i+2*N_sides][i*var_offset+5] = nor[1]*dy/sqrt_area1;
            rsideB[i+2*N_sides] = Bn[i][1];
        }//// END::: 2) match traces on outer edges
                   
        /// 3) match traces on inner edges
        for(i = 0; i < polyg->n_sides; i++)
        {
            p1 = Point_of_polyg(polyg)[(i+1)%N_sides];
            pcrds2[0] = Coords(p1)[0];
            pcrds2[1] = Coords(p1)[1];

            t[0] = pcrds0[0]-pcrds2[0];
            t[1] = pcrds0[1]-pcrds2[1];
            tmp = sqrt(sqr(t[0]) + sqr(t[1]));
            nor[0] = t[1]/tmp; nor[1] = -t[0]/tmp;

            sqrt_area1 = sqrt_area[i];
            sqrt_area2 = sqrt_area[(i+1)%N_sides];

            dx = 0.5*(pcrds0[0] - pcrds2[0]);
            dy = 0.5*(pcrds0[1] - pcrds2[1]);
            x_bar = 0.5*(pcrds2[0] + pcrds0[0]);
            y_bar = 0.5*(pcrds2[1] + pcrds0[1]);

            /// 3.1.a) match slope of trace on inner edge for ith-piece
            tmpB[i+3*N_sides][i*var_offset+1] = nor[0]*dx/sqrt_area1;
            tmpB[i+3*N_sides][i*var_offset+2] = nor[0]*dy/sqrt_area1;
            tmpB[i+3*N_sides][i*var_offset+4] = nor[1]*dx/sqrt_area1;
            tmpB[i+3*N_sides][i*var_offset+5] = nor[1]*dy/sqrt_area1;

            /// 3.2.a) match average of trace on inner edge for ith-piece
            tmpB[i+4*N_sides][i*var_offset  ] = nor[0];
            tmpB[i+4*N_sides][i*var_offset+1] = nor[0]*(x_bar-piece_cent[i][0])/sqrt_area1;
            tmpB[i+4*N_sides][i*var_offset+2] = nor[0]*(y_bar-piece_cent[i][1])/sqrt_area1;
            tmpB[i+4*N_sides][i*var_offset+3] = nor[1];
            tmpB[i+4*N_sides][i*var_offset+4] = nor[1]*(x_bar-piece_cent[i][0])/sqrt_area1;
            tmpB[i+4*N_sides][i*var_offset+5] = nor[1]*(y_bar-piece_cent[i][1])/sqrt_area1;

            nor[0] *= -1.0; nor[1] *= -1.0;
            dx *= -1.0; dy *= -1.0;
            /// 3.1.b) match slope of trace on inner edge for (i+1)th-piece
            tmpB[i+3*N_sides][((i+1)%N_sides)*var_offset+1] = -(nor[0]*dx/sqrt_area2);
            tmpB[i+3*N_sides][((i+1)%N_sides)*var_offset+2] = -(nor[0]*dy/sqrt_area2);
            tmpB[i+3*N_sides][((i+1)%N_sides)*var_offset+4] = -(nor[1]*dx/sqrt_area2);
            tmpB[i+3*N_sides][((i+1)%N_sides)*var_offset+5] = -(nor[1]*dy/sqrt_area2);

            rsideB[i+3*N_sides] = 0.0;

            /// 3.2.b) match average of trace on inner edge for (i+1)th-piece
            tmpB[i+4*N_sides][((i+1)%N_sides)*var_offset  ] = nor[0];
            tmpB[i+4*N_sides][((i+1)%N_sides)*var_offset+1] = nor[0]*(x_bar-piece_cent[(i+1)%N_sides][0])/sqrt_area2;
            tmpB[i+4*N_sides][((i+1)%N_sides)*var_offset+2] = nor[0]*(y_bar-piece_cent[(i+1)%N_sides][1])/sqrt_area2;
            tmpB[i+4*N_sides][((i+1)%N_sides)*var_offset+3] = nor[1];
            tmpB[i+4*N_sides][((i+1)%N_sides)*var_offset+4] = nor[1]*(x_bar-piece_cent[(i+1)%N_sides][0])/sqrt_area2;
            tmpB[i+4*N_sides][((i+1)%N_sides)*var_offset+5] = nor[1]*(y_bar-piece_cent[(i+1)%N_sides][1])/sqrt_area2; 

            rsideB[i+4*N_sides] = 0.0;
        }/// END::: /// 3) match traces on inner edges

        /********
        /// constraints from eliminating negative pressure
        tmp_P = 2*N_sides + 2*N_sides + N_sides - 1;
        for(i = 0; i < N_neg_p; i++)
        {
            for(j = 0; j < N; j++)
                tmpB[tmp_P+i*2][j] = 0.0;
            for(j = 0; j < N; j++)
                tmpB[tmp_P+i*2+1][j] = 0.0;
        }

        for(i = 0; i < N_neg_p; i++)
        {
            tmpB[tmp_P+i*2+1][neg_p_sect[i]*var_offset+3] = tmpB[tmp_P+i*2][neg_p_sect[i]*var_offset] = 1.0;
            tmpB[tmp_P+i*2+1][neg_p_sect[i]*var_offset+4] = 
                          tmpB[tmp_P+i*2][neg_p_sect[i]*var_offset+1] = 
                               (neg_p_qt_crds[i][0][0]-piece_cent[neg_p_sect[i]][0])/sqrt_area[neg_p_sect[i]];
            tmpB[tmp_P+i*2+1][neg_p_sect[i]*var_offset+5] = 
                          tmpB[tmp_P+i*2][neg_p_sect[i]*var_offset+2] =
                               (neg_p_qt_crds[i][0][1]-piece_cent[neg_p_sect[i]][1])/sqrt_area[neg_p_sect[i]];
            rsideB[tmp_P+i*2] = neg_p_qB[i][0][0];
            rsideB[tmp_P+i*2+1] = neg_p_qB[i][0][1];
 
            // printf("Assemble tmpB(%d) for neg-press, target B (%g, %g), pt(%g, %g)\n",
            //        neg_p_sect[i],  neg_p_qB[i][0][0],  neg_p_qB[i][0][1], neg_p_qt_crds[i][0][0], 
            //                        neg_p_qt_crds[i][0][1]);
        }
        *********/ 

            // printf("P = %d, M = %d, N = %d, N_neg_p = %d, N_other = %d\n", P, M, N, N_neg_p, N_other);
            // print_matrix("tmpB", P, N, tmpB, "%11.9g ");
            // printf("WARNING: exit in Build_Mag_field_on_individual_dual_cell_P1_DG()\n");
            // clean_up(0);

        /** 01-28-2015, comment out OLD working version
          * /// NEW: least square part: match B at vertices of triangular subregions.
          * /// This uses:  M = 2*(3*N_sides); /// 12/05/2012
        /// 12/04/2012: Least square part: match B at quadrature of tri-edge and vertices of triangular subregions.
        for(i = 0; i < polyg->n_sides; i++)
        {
            tmpA[i*6+1][i*var_offset+3] = tmpA[i*6][i*var_offset  ] = 1.0;
            tmpA[i*6+1][i*var_offset+4] = tmpA[i*6][i*var_offset+1] =    
                                          (qt_crds[i][0][0]-piece_cent[i][0])/sqrt_area[i];
            tmpA[i*6+1][i*var_offset+5] = tmpA[i*6][i*var_offset+2] =    
                                          (qt_crds[i][0][1]-piece_cent[i][1])/sqrt_area[i];    
            rsideA[i*6] = qB[i][0][0];
            rsideA[i*6+1] = qB[i][0][1];

            tmpA[i*6+3][i*var_offset+3] = tmpA[i*6+2][i*var_offset  ] = 1.0;
            tmpA[i*6+3][i*var_offset+4] = tmpA[i*6+2][i*var_offset+1] = 
                                          (qt_crds[i][1][0]-piece_cent[i][0])/sqrt_area[i];
            tmpA[i*6+3][i*var_offset+5] = tmpA[i*6+2][i*var_offset+2] = 
                                          (qt_crds[i][1][1]-piece_cent[i][1])/sqrt_area[i];
            rsideA[i*6+2] = qB[i][1][0];
            rsideA[i*6+3] = qB[i][1][1];
                        
            tmpA[i*6+5][i*var_offset+3] = tmpA[i*6+4][i*var_offset  ] = 1.0;
            tmpA[i*6+5][i*var_offset+4] = tmpA[i*6+4][i*var_offset+1] = 
                                          (qt_crds[i][2][0]-piece_cent[i][0])/sqrt_area[i];
            tmpA[i*6+5][i*var_offset+5] = tmpA[i*6+4][i*var_offset+2] = 
                                          (qt_crds[i][2][1]-piece_cent[i][1])/sqrt_area[i];
            rsideA[i*6+4] = qB[i][2][0];

                   // tmpA[i*6+5][i*var_offset+3] = 1.0;
                   // tmpA[i*6+5][i*var_offset+4] = (qt_crds[i][2][0]-piece_cent[i][0])/sqrt_area[i];
                   // tmpA[i*6+5][i*var_offset+5] = (qt_crds[i][2][1]-piece_cent[i][1])/sqrt_area[i];
            rsideA[i*6+5] = qB[i][2][1];
        } /// END::: 12/04/2012: least square part: 
          ///        match B at quadrature of tri-edge and vertices of triangular subregions
        01-28-2015, comment out OLD working version **/

        /// 01-28-2015: New working version
        /// This uses:  M = 2*(4*N_sides);
        for(i = 0; i < polyg->n_sides; i++)
        {
            tmpA[i*8+1][i*var_offset+3] = tmpA[i*8][i*var_offset  ] = 1.0;
            tmpA[i*8+1][i*var_offset+4] = tmpA[i*8][i*var_offset+1] =
                                          (qt_crds[i][0][0]-piece_cent[i][0])/sqrt_area[i];
            tmpA[i*8+1][i*var_offset+5] = tmpA[i*8][i*var_offset+2] =
                                          (qt_crds[i][0][1]-piece_cent[i][1])/sqrt_area[i];
            rsideA[i*8] = qB[i][0][0];
            rsideA[i*8+1] = qB[i][0][1];

            tmpA[i*8+3][i*var_offset+3] = tmpA[i*8+2][i*var_offset  ] = 1.0;
            tmpA[i*8+3][i*var_offset+4] = tmpA[i*8+2][i*var_offset+1] =
                                          (qt_crds[i][1][0]-piece_cent[i][0])/sqrt_area[i];
            tmpA[i*8+3][i*var_offset+5] = tmpA[i*8+2][i*var_offset+2] =
                                          (qt_crds[i][1][1]-piece_cent[i][1])/sqrt_area[i];
            rsideA[i*8+2] = qB[i][1][0];
            rsideA[i*8+3] = qB[i][1][1];

            tmpA[i*8+5][i*var_offset+3] = tmpA[i*8+4][i*var_offset  ] = 1.0;
            tmpA[i*8+5][i*var_offset+4] = tmpA[i*8+4][i*var_offset+1] =
                                          (qt_crds[i][2][0]-piece_cent[i][0])/sqrt_area[i];
            tmpA[i*8+5][i*var_offset+5] = tmpA[i*8+4][i*var_offset+2] =
                                          (qt_crds[i][2][1]-piece_cent[i][1])/sqrt_area[i];
            rsideA[i*8+4] = qB[i][2][0];
            rsideA[i*8+5] = qB[i][2][1];

            tmpA[i*8+7][i*var_offset+3] = tmpA[i*8+6][i*var_offset  ] = 1.0;
            tmpA[i*8+7][i*var_offset+4] = tmpA[i*8+6][i*var_offset+1] =
                                          (qt_crds[i][3][0]-piece_cent[i][0])/sqrt_area[i];
            tmpA[i*8+7][i*var_offset+5] = tmpA[i*8+6][i*var_offset+2] =
                                          (qt_crds[i][3][1]-piece_cent[i][1])/sqrt_area[i];
            rsideA[i*8+6] = qB[i][3][0];
            rsideA[i*8+7] = qB[i][3][1];
        }
        /* END::: 01-28-2015: New working version*/

        /**
        // 04/01/2015. implement using 3 vertices and two midpoints of two edges as
        // least squares constraints for reconstructing B on partial cells.
        // This uses:  M = 2*(5*N_sides);
        for(i = 0; i < polyg->n_sides; i++)
        {
            tmpA[i*10+1][i*var_offset+3] = tmpA[i*10][i*var_offset  ] = 1.0;
            tmpA[i*10+1][i*var_offset+4] = tmpA[i*10][i*var_offset+1] = 
                    (qt_crds[i][0][0]-piece_cent[i][0])/sqrt_area[i];
            tmpA[i*10+1][i*var_offset+5] = tmpA[i*10][i*var_offset+2] = 
                    (qt_crds[i][0][1]-piece_cent[i][1])/sqrt_area[i];  
            rsideA[i*10] = qB[i][0][0];
            rsideA[i*10+1] = qB[i][0][1];

            tmpA[i*10+3][i*var_offset+3] = tmpA[i*10+2][i*var_offset  ] = 1.0;
            tmpA[i*10+3][i*var_offset+4] = tmpA[i*10+2][i*var_offset+1] = 
                    (qt_crds[i][1][0]-piece_cent[i][0])/sqrt_area[i];
            tmpA[i*10+3][i*var_offset+5] = tmpA[i*10+2][i*var_offset+2] = 
                    (qt_crds[i][1][1]-piece_cent[i][1])/sqrt_area[i];
            rsideA[i*10+2] = qB[i][1][0];
            rsideA[i*10+3] = qB[i][1][1];
                        
            tmpA[i*10+5][i*var_offset+3] = tmpA[i*10+4][i*var_offset  ] = 1.0;
            tmpA[i*10+5][i*var_offset+4] = tmpA[i*10+4][i*var_offset+1] = 
                    (qt_crds[i][2][0]-piece_cent[i][0])/sqrt_area[i];
            tmpA[i*10+5][i*var_offset+5] = tmpA[i*10+4][i*var_offset+2] = 
                    (qt_crds[i][2][1]-piece_cent[i][1])/sqrt_area[i];
            rsideA[i*10+4] = qB[i][2][0];
            rsideA[i*10+5] = qB[i][2][1];

            tmpA[i*10+7][i*var_offset+3] = tmpA[i*10+6][i*var_offset  ] = 1.0;
            tmpA[i*10+7][i*var_offset+4] = tmpA[i*10+6][i*var_offset+1] = 
                    (qt_crds[i][3][0]-piece_cent[i][0])/sqrt_area[i];
            tmpA[i*10+7][i*var_offset+5] = tmpA[i*10+6][i*var_offset+2] = 
                    (qt_crds[i][3][1]-piece_cent[i][1])/sqrt_area[i];
            rsideA[i*10+6] = qB[i][3][0];
            rsideA[i*10+7] = qB[i][3][1];
            tmpA[i*10+9][i*var_offset+3] = tmpA[i*10+8][i*var_offset  ] = 1.0;
            tmpA[i*10+9][i*var_offset+4] = tmpA[i*10+8][i*var_offset+1] = 
                    (qt_crds[i][4][0]-piece_cent[i][0])/sqrt_area[i];
            tmpA[i*10+9][i*var_offset+5] = tmpA[i*10+8][i*var_offset+2] = 
                    (qt_crds[i][4][1]-piece_cent[i][1])/sqrt_area[i];
            rsideA[i*10+8] = qB[i][4][0];
            rsideA[i*10+9] = qB[i][4][1];
        }
        END::: 04/01/2015 **/

        /// least square: 
        // printf("P = %d, M = %d, N = %d,  N_neg_p = %d, N_other = %d\n", P, M, N, N_neg_p, N_other); 
        /**
        tmp_M = 2*(2*N_sides);
        /// first assemble B field at end points of dual cell edge.
        for(i = 0; i < polyg->n_sides; i++)
        {
            tmpA[i*4+1][i*var_offset+3] = tmpA[i*4][i*var_offset  ] = 1.0;
            tmpA[i*4+1][i*var_offset+4] = tmpA[i*4][i*var_offset+1] =
                                   (qt_crds[i][1][0]-piece_cent[i][0])/sqrt_area[i];
            tmpA[i*4+1][i*var_offset+5] = tmpA[i*4][i*var_offset+2] =
                                   (qt_crds[i][1][1]-piece_cent[i][1])/sqrt_area[i];
            rsideA[i*4] = qB[i][1][0];
            rsideA[i*4+1] = qB[i][1][1];

            tmpA[i*4+3][i*var_offset+3] = tmpA[i*4+2][i*var_offset  ] = 1.0;
            tmpA[i*4+3][i*var_offset+4] = tmpA[i*4+2][i*var_offset+1] = 
                                   (qt_crds[i][2][0]-piece_cent[i][0])/sqrt_area[i];
            tmpA[i*4+3][i*var_offset+5] = tmpA[i*4+2][i*var_offset+2] = 
                                   (qt_crds[i][2][1]-piece_cent[i][1])/sqrt_area[i];
            rsideA[i*4+2] = qB[i][2][0];
            rsideA[i*4+3] = qB[i][2][1];
        }
        for(i = 0; i < N_other; i++)
        {
            for(j = 0; j < N; j++)
                tmpA[tmp_M+i*2][j] = 0.0;
            for(j = 0; j < N; j++)
                tmpA[tmp_M+i*2+1][j] = 0.0;
        }
        **/
        /// END::: first assemble B field at end points of dual cell edge.
        /// assemble point at selected quadratures
        /***
        for(i = 0; i < N_other; i++) 
        {
            // printf("tmpA assemble eqn %d, %d, secter %d\n", tmp_M+i*2, tmp_M+i*2+1, other_sect[i]);
            tmpA[tmp_M+i*2+1][other_sect[i]*var_offset+3] = tmpA[tmp_M+i*2][other_sect[i]*var_offset] = 1.0;
            tmpA[tmp_M+i*2+1][other_sect[i]*var_offset+4] =
                    tmpA[tmp_M+i*2][other_sect[i]*var_offset+1] =
                               (qt_crds_2[i][0][0]-piece_cent[other_sect[i]][0])/sqrt_area[other_sect[i]];

            tmpA[tmp_M+i*2+1][other_sect[i]*var_offset+5] =
                    tmpA[tmp_M+i*2][other_sect[i]*var_offset+2] =
                               (qt_crds_2[i][0][1]-piece_cent[other_sect[i]][1])/sqrt_area[other_sect[i]];
            rsideA[tmp_M+i*2] = qB_2[i][0][0];
            rsideA[tmp_M+i*2+1] = qB_2[i][0][1];
        }
        ***/ 
        /// END::: assemble point at selected quadratures
        /// END::: least square:
 
        /// TMP
        if(debug == YES)
        {
            printf("P = %d, M = %d, N = %d,  N_neg_p = %d, N_other = %d\n", P, M, N, N_neg_p, N_other); 
            print_matrix("tmpB", P, N, tmpB, "%g ");
            print_matrix("tmpA", M, N, tmpA, "%g "); 
            print_general_vector("RHS_B", rsideB, P,"\n");
            print_general_vector("RHS_A", rsideA, M,"\n");
            // printf("WARNING: exit in Build_Mag_field_on_individual_dual_cell_P1_DG()\n");
            // clean_up(0);
        }
        /// END: TMP

        LDA = M; LDB = P; LWORK = M+N+P;

        l = 0;
        for(j = 0; j < N; j++)
        {
            for(i = 0; i < M; i++)
            {
                AA[l] = tmpA[i][j];
                l++;
            }
        }
        l = 0;
        for(j = 0; j < N; j++)
        {
            for(i = 0; i < P; i++)
            {
                BB[l] = tmpB[i][j];
                l++;
            }
        }
        for(i = 0; i < M; i++)  // right side for the least square part
            CC[i] = rsideA[i];
        for(i = 0; i < P; i++)  // right side for the constrained part
            DD[i] = rsideB[i];
        FORTRAN_NAME(dgglse)(&M, &N, &P, AA, &LDA, ///// int M,int N,int P, double array A, int LDA
                                  BB, &LDB, CC, DD, XX, ///// double array B, int LDB, double array C, D, X,
                                  work, &LWORK, &INFO);
        if(INFO != 0)
        {
            printf("WARNING:  Build_Mag_field_on_individual_dual_cell_P1_DG()\n");
            printf("dgglse returns %d\n", INFO);
        }

        if(debug == YES)
        {
            for(i = 0; i < polyg->n_sides; i++)
            {
                       printf("\n\n-------- Tri-Piece [%d] Reconstructed B:\n", i);
                       printf("x-component = [%g, %g, %g]\n", 
                                XX[i*var_offset], XX[i*var_offset+1], XX[i*var_offset+2]);
                       printf("y-component = [%g, %g, %g]\n", 
                                XX[i*var_offset+3], XX[i*var_offset+4], XX[i*var_offset+5]);

                       // compute trace at outer edge 
                       tmp = tmpB[i+N_sides][i*var_offset]*XX[i*var_offset] + 
                             tmpB[i+N_sides][i*var_offset+1]*XX[i*var_offset+1] +
                             tmpB[i+N_sides][i*var_offset+2]*XX[i*var_offset+2] +
                             tmpB[i+N_sides][i*var_offset+3]*XX[i*var_offset+3] +
                             tmpB[i+N_sides][i*var_offset+4]*XX[i*var_offset+4] +
                             tmpB[i+N_sides][i*var_offset+5]*XX[i*var_offset+5];
                       printf("Average Bn: Trace = %13.12g, org = %13.12g\n", tmp, Bn[i][0]);

                       tmpslope = tmpB[i+2*N_sides][i*var_offset+1]*XX[i*var_offset+1] +
                                  tmpB[i+2*N_sides][i*var_offset+2]*XX[i*var_offset+2] +
                                  tmpB[i+2*N_sides][i*var_offset+4]*XX[i*var_offset+4] +
                                  tmpB[i+2*N_sides][i*var_offset+5]*XX[i*var_offset+5];
                       printf("Slope   Bn: Trace = %13.12g, org = %13.12g\n", tmpslope, Bn[i][1]);

                       printf("-----------------------------------\n\n");
                       // break;
            }
        }

        if(rk_step == RK_STEP)
        {
            for(i = 0; i < polyg->n_sides; i++)
            {
                for(k = 0; k < MAX_N_COEF; k++)
                    polyg_sub_reg_dgB(polyg)[i][0][k] = XX[i*var_offset+k];
                for(k = 0; k < MAX_N_COEF; k++)
                    polyg_sub_reg_dgB(polyg)[i][1][k] = XX[i*var_offset+k+3];
            }
        }
        else
        {
            for(i = 0; i < polyg->n_sides; i++)
            {
                for(k = 0; k < MAX_N_COEF; k++)
                    dual_cell_midsoln[polyg->id].sub_reg_dgB[rk_step][i][0][k] = XX[i*var_offset+k];
                for(k = 0; k < MAX_N_COEF; k++)
                    dual_cell_midsoln[polyg->id].sub_reg_dgB[rk_step][i][1][k] = XX[i*var_offset+k+3];
            }
        }

        //// TMP
        // printf("\n\nCheck fixed B state at tri quadrature pts:\n");
        /***
        for(i = 0; i < N_neg_p; i++)
        {
            tmpBx = tmpB[tmp_P+i*2][neg_p_sect[i]*var_offset]*XX[neg_p_sect[i]*var_offset]  + 
                    tmpB[tmp_P+i*2][neg_p_sect[i]*var_offset+1]*XX[neg_p_sect[i]*var_offset + 1] +
                    tmpB[tmp_P+i*2][neg_p_sect[i]*var_offset+2]*XX[neg_p_sect[i]*var_offset + 2];

            tmpBy = tmpB[tmp_P+i*2+1][neg_p_sect[i]*var_offset+3]*XX[neg_p_sect[i]*var_offset+3]  +  
                    tmpB[tmp_P+i*2+1][neg_p_sect[i]*var_offset+4]*XX[neg_p_sect[i]*var_offset + 4] +
                    tmpB[tmp_P+i*2+1][neg_p_sect[i]*var_offset+5]*XX[neg_p_sect[i]*var_offset + 5];

            printf("Compute new tmpB(%d) for neg-press, target B (%g, %g), pt(%g, %g), new B (%g, %g)\n",
                   neg_p_sect[i],  neg_p_qB[i][0][0],  neg_p_qB[i][0][1], neg_p_qt_crds[i][0][0],
                                   neg_p_qt_crds[i][0][1], tmpBx, tmpBy);
        }
        ***/
        //// END::: TMP

        // Print_by_press_at_tri_quadrature_on_dual_cell_Gauss_N2(fr, polyg,
        //        midsoln, dual_cell_midsoln, rk_step);
        // printf("\n\n----Dual %d left Build_Mag_field_on_individual_dual_cell_P1_DG()\n",polyg->id);
        // check_dual_cell_face_B_match_edge(polyg, dual_cell_midsoln, rk_step);
        // printf("WARNING: Stop in Build_Mag_field_on_individual_dual_cell_P1_DG()\n");
        // clean_up(0);
}

//// THIS function is for limiting negative pressure in soln only.
//// It takes B value at quadrature points of the tri-cell interface as 
//// the linear least-square constraint.
EXPORT void Build_Mag_field_on_individual_dual_cell_P1_DG(
        POLYGON            *polyg,
        Front              *fr,
        Mid_soln           *midsoln,
        Dual_cell_Mid_soln *dual_cell_midsoln,
        int                rk_step)
{
        POLYGON    *tmpg;
        double     x_crds[20], y_crds[20], *cent;
        int        i, j, k, By_offset, tmpk, N_sides, tmp_P, tmp_M;
        POINT      *p0, *p1, *pc;
        double     phi0[20][3], phi1[20][3], phi2[20][3]; 
        double     rsideB[80], rsideA[80];
        double     len, t[3], nor[3], Bn[20][N_COEF_EDGE];
        int        M, N, P, LDA, LDB, l, LWORK, INFO;
        double     AA[2800], BB[2800], CC[800], DD[800], XX[800], work[15000], tmp,tmpslope;
        double     B_cent[3];
        int        debug = NO, var_offset = 6;
        double     val[45], area[20], sqrt_area[20], sqrt_area1, area2, sqrt_area2, *tri_cent, vertB[3];
        double     ***conformal_basis_tri, econ_u[10];
        double     **DG_basis_tri;
        double     dx, dy, x_bar, y_bar, one_3rd, two_3rd;
        TRI        **tris;
        Locstate   sts[20];
        static double  **outputB, **outputA = NULL;
        double     pcrds0[3], pcrds1[3], pcrds2[3], piece_cent[20][3], con_u[10];
        double     qB[20][4][2], qt_crds[20][4][3], tmp_sum_Bn, tmpBx, tmpBy;
        double     qB_2[20][5][2], qt_crds_2[20][5][3]; // save B value at the tri-mesh cell interface with
                                                        // lower pressure
        double             neg_p_qB[20][5][2], neg_p_qt_crds[20][5][3];
        int                N_neg_p, N_other, neg_p_sect[20], other_sect[20];
       
        static double **tmpA = NULL, **tmpB;

        printf("\n\n----Dual %d enter  Build_Mag_field_on_individual_dual_cell_P1_DG()\n",polyg->id);

        one_3rd = 1.0/3.0; two_3rd = 2.0/3.0;

        if(NULL == tmpA)
        {
            /// M: number of least square eqn, from input argument
            /// N: number of unknown;
            /// P: number of constraint eqns, which are satisfied exactly.

            // N: each tri-piece has two linear polynomials for x- and y-component of B(x,y) 
            //    respectively. Each linear polynomial has 3 unknowns.
            N = 6*MAX_N_POLY_SIDE;

            // 4n (match trace avg and slope exactly) + n divergence-free condition - 1 (avg of trace at inner edge). 
            P = 2*MAX_N_POLY_SIDE + 2*MAX_N_POLY_SIDE + 2*MAX_N_POLY_SIDE + MAX_N_POLY_SIDE;

            // M: choose x- and y-component of B values at quadrature points on edges of the tri-grid.
            // This leads to some linear depedency between constraint and least-square eqns.
            // M = 2*N_sides;  

            // M: choose x- and y-component of B values at 3 vertices of the triangular- subregions.
            //    + x- and y-component of B values at quadrature pt of the overlay tri-cell where flux is computed.
            M = 2*(3*MAX_N_POLY_SIDE) + 2*MAX_N_POLY_SIDE;

            matrix(&tmpA, M, N,sizeof(double));
            // matrix(&tmpB, P+1, N,sizeof(double)); 
            matrix(&tmpB, (2*MAX_N_POLY_SIDE + 2*MAX_N_POLY_SIDE + MAX_N_POLY_SIDE), N,sizeof(double));
        }

        pcrds0[2] = pcrds1[2] = pcrds2[2] = 0.0;

        Bn_at_dual_cell_edges(polyg,midsoln,dual_cell_midsoln,rk_step,Bn);

        // part 1) quadrature pts at edge of tri-meshes where flux is calculated.
        Collect_by_press_at_tri_quadrature_on_dual_cell_Gauss_N2(fr, polyg,
               midsoln, dual_cell_midsoln, rk_step, neg_p_qB, neg_p_qt_crds,&N_neg_p,neg_p_sect,
               qB_2, qt_crds_2, &N_other,other_sect);
         
        cent = polyg_centroid(polyg);
        pcrds0[0] = cent[0];
        pcrds0[1] = cent[1];
        // By_offset: offset for where eqns for Y-component of B starts.
        By_offset = polyg->n_sides+1;
        N_sides = polyg->n_sides;

        /// M: number of least square eqn, from input argument
        /// N: number of unknown;
        /// P: number of constraint eqns, which are satisfied exactly.

        // N: each tri-piece has two linear polynomials for x- and y-component of B(x,y) 
        //    respectively. Each linear polynomial has 3 unknowns.
        N = 6*N_sides; 

        // P: 2*(n inner edges + n outer edges) + n -1 = 5n -1. 
        // 4n (match trace avg and slope exactly) + n divergence-free condition - 1 (avg of trace at inner edge). 
        // + quadrature points where pressure is negative
        P = 2*N_sides + 2*N_sides + N_sides - 1 + 2*N_neg_p;
        // P = 2*N_sides + N_sides;

        // M: choose x- and y-component of B values at 2 outer vertices of triangular subregion.
        //    + x- and y-component of B values at quadrature pt of the overlay tri-cell where flux is computed.
        // M = 2*(N_sides) + 2*N_sides;
        M = 2*(2*N_sides) + 2*N_other;

        // part 2) quadrature pts where we do least-square fitting for magnetic field.
        /*** OLD:: 12/04/2012
        for(i = 0; i < polyg->n_sides; i++)
        {
            p0 = Point_of_polyg(polyg)[i];
            p1 = Point_of_polyg(polyg)[(i+1)%N_sides];

            qt_crds[i][0][0] = pcrds0[0];     // x-coord of centroid of dual cell
            qt_crds[i][0][1] = pcrds0[1];     // y-coord of centroid of dual cell
            qt_crds[i][1][0] = Coords(p0)[0];
            qt_crds[i][1][1] = Coords(p0)[1];
            qt_crds[i][2][0] = Coords(p1)[0];
            qt_crds[i][2][1] = Coords(p1)[1];
        }
        quadrature_Mag_at_tri_for_P1(polyg,fr,midsoln,rk_step,qt_crds,qB);
        ***/

        if(P > N)
        {
            printf("\n\nERROR: Dual cell (%d) in Build_Mag_field_on_individual_dual_cell_P1_DG()\n",polyg->id);
            printf("Too many quadrature points with negative pressure P = %d, N = %d\n", P, N);
            Print_by_press_at_tri_quadrature_on_dual_cell_Gauss_N2(fr, polyg,
                 midsoln, dual_cell_midsoln, rk_step);
            clean_up(ERROR);
        }

        tris = tri_at_polyg_vert(polyg);
        if(rk_step == RK_STEP)
        {
            for(i = 0; i < polyg->n_sides; i++)
                sts[i] = tris[i]->st;
        }
        else
        {
            for(i = 0; i < polyg->n_sides; i++)
                sts[i] = midsoln[tris[i]->id].st[rk_step];
        }
        for(i = 0; i < polyg->n_sides; i++)
        {
            qt_crds[i][1][0] = Coords(Point_of_polyg(polyg)[i])[0];
            qt_crds[i][1][1] = Coords(Point_of_polyg(polyg)[i])[1];
            qt_crds[i][2][0] = Coords(Point_of_polyg(polyg)[(i+1)%N_sides])[0];
            qt_crds[i][2][1] = Coords(Point_of_polyg(polyg)[(i+1)%N_sides])[1];
        }
        for(i = 0; i < polyg->n_sides; i++)
        {
            sqrt_area[i] = sqrt(fg_area(tris[i]));
            con_u_at_pt(sts[i], qt_crds[i][1], fg_centroid(tris[i]), sqrt_area[i], con_u);
            qB[i][1][0] = con_u[5];
            qB[i][1][1] = con_u[6];


            sqrt_area[(i+1)%N_sides] = sqrt(fg_area(tris[(i+1)%N_sides]));
            con_u_at_pt(sts[(i+1)%N_sides], qt_crds[i][2],
                            fg_centroid(tris[(i+1)%N_sides]), sqrt_area[(i+1)%N_sides], con_u);
            qB[i][2][0] = con_u[5];
            qB[i][2][1] = con_u[6];
        }
        // END::: quadrature pts where we do least-square fitting for magnetic field.

        for(i = 0; i < M; i++)
        {
            for(j = 0; j < N; j++)
                tmpA[i][j] = 0.0;
        }
        for(i = 0; i < P; i++)
        {
            for(j = 0; j < N; j++)
                tmpB[i][j] = 0.0;
        }

        /// 1) divergence-free
        for(i = 0; i < polyg->n_sides; i++)
        {
            tmpB[i][i*var_offset+1] = 1.0; // a1
            tmpB[i][i*var_offset+5] = 1.0; // b2
            rsideB[i] = 0.0;
        }
   
        /// 2) match traces on outer edges
        for(i = 0; i < polyg->n_sides; i++)
        {
            p0 = Point_of_polyg(polyg)[i];
            p1 = Point_of_polyg(polyg)[(i+1)%N_sides];
            pcrds1[0] = Coords(p0)[0];
            pcrds1[1] = Coords(p0)[1];

            pcrds2[0] = Coords(p1)[0];
            pcrds2[1] = Coords(p1)[1];

            area[i] = triangle_area_3d(pcrds0, pcrds1, pcrds2);
            sqrt_area[i] = sqrt_area1 = sqrt(area[i]);
            for(k = 0; k < 2; k++)
                piece_cent[i][k] = one_3rd*(pcrds0[k]+ pcrds1[k] + pcrds2[k]);

                       // qt_crds[i][0][0] = pcrds0[0];
                       // qt_crds[i][0][1] = pcrds0[1];
                       // qt_crds[i][1][0] = pcrds1[0];
                       // qt_crds[i][1][1] = pcrds1[1];
                       // qt_crds[i][2][0] = pcrds2[0];
                       // qt_crds[i][2][1] = pcrds2[1];
 
                       /// TMP
                       // printf("piece[%d] centroid[%13.12g, %13.12g], qcrds[%13.12g, %13.12g]\n", i,
                       //           piece_cent[i][0], piece_cent[i][1],
                       //           qt_crds[i][0][0], qt_crds[i][0][1]);
                       /// END::: TMP
                       
            dx = 0.5*(pcrds2[0] - pcrds1[0]);
            dy = 0.5*(pcrds2[1] - pcrds1[1]);
            x_bar = 0.5*(pcrds2[0] + pcrds1[0]);
            y_bar = 0.5*(pcrds2[1] + pcrds1[1]);

            for(k = 0; k < 2; k++)
                t[k] = polyg_side_vector(polyg)[i][k];
            nor[0] = t[1]; nor[1] = -t[0];

            // average of trace
            tmpB[i+N_sides][i*var_offset  ] = nor[0];
            tmpB[i+N_sides][i*var_offset+1] = nor[0]*(x_bar-piece_cent[i][0])/sqrt_area1;
            tmpB[i+N_sides][i*var_offset+2] = nor[0]*(y_bar-piece_cent[i][1])/sqrt_area1;
            tmpB[i+N_sides][i*var_offset+3] = nor[1];
            tmpB[i+N_sides][i*var_offset+4] = nor[1]*(x_bar-piece_cent[i][0])/sqrt_area1;
            tmpB[i+N_sides][i*var_offset+5] = nor[1]*(y_bar-piece_cent[i][1])/sqrt_area1;
            rsideB[i+N_sides] = Bn[i][0];

            // slope of trace
            tmpB[i+2*N_sides][i*var_offset+1] = nor[0]*dx/sqrt_area1;
            tmpB[i+2*N_sides][i*var_offset+2] = nor[0]*dy/sqrt_area1;
            tmpB[i+2*N_sides][i*var_offset+4] = nor[1]*dx/sqrt_area1;
            tmpB[i+2*N_sides][i*var_offset+5] = nor[1]*dy/sqrt_area1;
            rsideB[i+2*N_sides] = Bn[i][1];
        }//// END::: 2) match traces on outer edges
                   
        /// 3) match traces on inner edges
        for(i = 0; i < polyg->n_sides; i++)
        {
            p1 = Point_of_polyg(polyg)[(i+1)%N_sides];
            pcrds2[0] = Coords(p1)[0];
            pcrds2[1] = Coords(p1)[1];

            t[0] = pcrds0[0]-pcrds2[0];
            t[1] = pcrds0[1]-pcrds2[1];
            tmp = sqrt(sqr(t[0]) + sqr(t[1]));
            nor[0] = t[1]/tmp; nor[1] = -t[0]/tmp;

            sqrt_area1 = sqrt_area[i];
            sqrt_area2 = sqrt_area[(i+1)%N_sides];

            dx = 0.5*(pcrds0[0] - pcrds2[0]);
            dy = 0.5*(pcrds0[1] - pcrds2[1]);
            x_bar = 0.5*(pcrds2[0] + pcrds0[0]);
            y_bar = 0.5*(pcrds2[1] + pcrds0[1]);

            /// 3.1.a) match slope of trace on inner edge for ith-piece
            tmpB[i+3*N_sides][i*var_offset+1] = nor[0]*dx/sqrt_area1;
            tmpB[i+3*N_sides][i*var_offset+2] = nor[0]*dy/sqrt_area1;
            tmpB[i+3*N_sides][i*var_offset+4] = nor[1]*dx/sqrt_area1;
            tmpB[i+3*N_sides][i*var_offset+5] = nor[1]*dy/sqrt_area1;

            /// 3.2.a) match average of trace on inner edge for ith-piece
            tmpB[i+4*N_sides][i*var_offset  ] = nor[0];
            tmpB[i+4*N_sides][i*var_offset+1] = nor[0]*(x_bar-piece_cent[i][0])/sqrt_area1;
            tmpB[i+4*N_sides][i*var_offset+2] = nor[0]*(y_bar-piece_cent[i][1])/sqrt_area1;
            tmpB[i+4*N_sides][i*var_offset+3] = nor[1];
            tmpB[i+4*N_sides][i*var_offset+4] = nor[1]*(x_bar-piece_cent[i][0])/sqrt_area1;
            tmpB[i+4*N_sides][i*var_offset+5] = nor[1]*(y_bar-piece_cent[i][1])/sqrt_area1;

            nor[0] *= -1.0; nor[1] *= -1.0;
            dx *= -1.0; dy *= -1.0;
            /// 3.1.b) match slope of trace on inner edge for (i+1)th-piece
            tmpB[i+3*N_sides][((i+1)%N_sides)*var_offset+1] = 