/*
*                               gFV.c
*
*       Copyright 1999 by The University at Stony Brook, All rights reserved.
*/

#include <ghyp/ghyp.h>
#include <gdecs/vecdecs.h>

/* define flux function type */
#define Ez_USE_LF_FLUX      6
#define USE_LF_FLUX         1
// #define USE_HLL_FLUX     2
// #define USE_ROE_FLUX     3
// #define USE_UPWIND_FLUX    4


#if defined (PETSC)
#include "petscksp.h"
#endif /* if defined (PETSC) */

#if defined(PETSC)
LOCAL   float     K[2] = {0.5,0.5};
LOCAL   int       comm_size = 1;
#endif /* if defined(PETSC) */

#define state_id(i)     (STATE_ID + (i+1))

#define one_3rd         0.33333333333333333333333333
#define two_3rd         0.66666666666666666666666667
#define four_3rd        1.33333333333333333333333333
#define one_6th         0.16666666666666666666666667

LOCAL size_t BLOCK_SIZE = 0; /*TOLERANCE - TODO: what is a good value*/
LOCAL Tri_mass_1st_rows   *constrain_mass_1st_rows = NULL;
LOCAL double ***RK0_rhs_adj = NULL, ***RK1_rhs_adj, ***RK0_rhs = NULL, ***RK1_rhs; // for saving 4th order TVD RK method
LOCAL double **RK0_rhs_avg_adj = NULL, **RK1_rhs_avg_adj, **RK0_rhs_avg = NULL, **RK1_rhs_avg; // for saving 4th order TVD RK method

// #define NEW_LIMIT

#if defined(TWOD)

// LOCAL void      matrix_vec_mult(double**,float*,int,int,float*);

// LOCAL void      matrix_inv(double**,int,double**);
// LOCAL void      comp_mass_matrix(int,TRI*,int,double**);
// LOCAL void      comp_mass_matrix_1st_row(int,TRI*,int,double**);

LOCAL void      grad_vh(float*,double*,int,float*);
// LOCAL float     vh_val(float*,double*,int);
// LOCAL void      con_u_at_pt(Locstate,float*,double*,float*);
LOCAL void      flux_at_pt(float*,Locstate,float*,double*,float*,float*);
LOCAL float     inter_integr(TRI*,Locstate,int,float*,int);
LOCAL float     inter_integr_13_quad(TRI*,Locstate,int,float*,int);
LOCAL float     inter_integr_13_quad_CV(TRI*,Locstate,int,float*,int,Limiting_store**);
LOCAL float     inter_integr_13_quad_flux(TRI*,Locstate,float crds[][2],float fluxx[][10],float fluxy[][10],int,Dual_cell_Mid_soln*);
LOCAL float     inter_integr_13_quad_ver2(TRI*,Locstate,int, float crds[][2], float fluxx[][10], float fluxy[][10],float*);
LOCAL float     inter_integr_3_quad_flux(TRI*,Locstate,float crds[][2],float fluxx[][10],float fluxy[][10],
                                         int,Dual_cell_Mid_soln*);
LOCAL float     inter_integr_3_quad_ver2(TRI*,Locstate,int, float crds[][2], float fluxx[][10], float fluxy[][10],float*);
LOCAL float     inter_integr_center(TRI*,Locstate,int,float*,int);
LOCAL void      edge_integr(TRI*,TRI*,Locstate,Locstate,float,int,int,float*,Front*,float,Mid_soln*,int,int,Dual_cell_Mid_soln*);
LOCAL void      edge_integr_adj_op(TRI*,TRI*,Locstate,Locstate,float,int,int,float*,Front*,float,Mid_soln*,int,Dual_cell_Mid_soln*);
LOCAL void      edge_integr_CV_4pt(TRI*,TRI*,Locstate,Locstate,float,int,int,float*,Front*,float,Mid_soln*,int,Limiting_store**);
LOCAL void      adv_fw(TRI*,TRI*,float,float,Mid_soln*,int,Front*,Limiting_store**,int,int,int);
LOCAL void      adv_fw_MHD_DG(TRI*,TRI*,float,float,Mid_soln*,int,Front*,Limiting_store**,int,int,int,Dual_cell_Mid_soln*);
LOCAL void      adv_B_edge_DG(int,TRI*,TRI*,float,float,Mid_soln*,int,Front*,Limiting_store**,int,int,int,float*);
LOCAL void      adv_fw_ls_high_moment(TRI*,TRI*,float,float,Mid_soln*,int,Front*,Limiting_store**,int,int);
LOCAL void      adv_fw_ls(TRI*,TRI*,float,float,Mid_soln*,int,Front*,Limiting_store**,int,int);
LOCAL void      impose_conservation_constraint(TRI*,TRI*,float,float,Mid_soln*,int,Front*,Limiting_store**);
LOCAL void      constrained_adv_fw(TRI*,TRI*,float,float,Mid_soln*,int,Front*,Limiting_store**);
LOCAL void      ls_adv_fw(TRI*,TRI*,float,float,Mid_soln*,int,Front*,Limiting_store**,int,int);
LOCAL void      ls_adv_fw_new(TRI*,TRI*,float,float,Mid_soln*,int,Front*,Limiting_store**,int,int);
LOCAL void      constrained_adv_fw_min_E(TRI*,TRI*,float,float,Mid_soln*,int,Front*,Limiting_store**);
LOCAL void      adv_fw_cell_avg(TRI*,TRI*,float,float,Mid_soln*,int,Front*,Limiting_store**);
LOCAL void      adv_fw_cell_avg_MHD(TRI*,TRI*,float,float,Mid_soln*,int,Front*,Limiting_store**,int,int,int,Dual_cell_Mid_soln*);
LOCAL void      dup_adv_fw_for_test(TRI*,TRI*,float,float,Mid_soln*,int,Front*, double   soln[][15]);
LOCAL float     compute_max_speed(Locstate);


LOCAL int       bdry_tri_adv_fw(TRI*,TRI*,Front*);
LOCAL int       tri_cent_outside(TRI*,float*,float*,int,BDRY_SIDE*);
// LOCAL void      update_buffer(Front*,Mid_soln*,int);
// LOCAL void      update_buffer_x_per_y_ref(Front*,Mid_soln*,int);
// LOCAL void      update_buffer_x_ref(Front*,Mid_soln*,int);

LOCAL void      limiting_soln_with_buffer_tris(Front*,Mid_soln*,int,int,int,Limiting_store**);
LOCAL void      local_limiting_soln_with_buffer_tris(Front*,Mid_soln*,int,Limiting_store**);
LOCAL void      redo_limiting_3rd_degreeP3(TRI*,TRI *nbtri[3],Mid_soln*,int);
// LOCAL void      limiting_3rd_degreeP3(TRI*,TRI *nbtri[3],Mid_soln*,int,int);
// LOCAL void      limiting_2nd_degreeP3(TRI*,TRI *nbtri[3],Mid_soln*,int);
// LOCAL void      limiting_2nd_degree(TRI*,TRI *nbtri[3],Mid_soln*,int);
// LOCAL void      limiting_1st_degreeP3(TRI*,TRI *nbtri[3],Mid_soln*,int,int);
// LOCAL void      limiting_1st_degree(TRI*,TRI *nbtri[],int,Mid_soln*,int);

LOCAL float     maxmod(float, float);
// LOCAL float     minmod2(float, float);
LOCAL float     TVB_minmod(float,float,float);
LOCAL void      weight_mod(float*,int,float*);
// LOCAL int       WENO_mod(float*,float*,float*,int,float,float*);
// LOCAL int       WENO_mod_on_3rd(float*,float*,float*,int,float,float*);
// LOCAL int       WENO_mod_on_2nd(float*,float*,float*,int,float,float*);
// LOCAL int       WENO_mod_1(float*,float*,float*,int,float*);

// LOCAL void      update_coef(TRI*,Mid_soln*,int,size_t);
LOCAL int       find_tri(double*);
LOCAL int       find_tri_from_ver(float*,float*,float*);
// LOCAL void      print_matrix(float**,int,int);
// LOCAL void      matrix_matrix_mult(double**,double**,int,int,double**);
LOCAL void      print_state_coef(Locstate);
LOCAL void      print_mass_matrix_1st_row(double**);
LOCAL float     compute_alpha_of_LF_flux(Locstate,Locstate,float*);
LOCAL float     compute_alpha_of_LF_flux_MHD(Locstate,Locstate,float*);
// LOCAL void      inverse_matrix(double**,int,double**);
// LOCAL void      print_ldb_matrix(const char*,int,int,double**,const char*);
LOCAL void      g_exact_R_flux(float*,Locstate,Locstate,float*);

LOCAL void      DG_boundary_state(Locstate,TRI*,float*,Front*,int,float*,float,Mid_soln*,int);
LOCAL void      impose_gas_sine_states(Locstate,TRI*,float*,Front*,int,float*,float,Mid_soln*,int,float);
LOCAL void      DG_NEU_boundary_state(Locstate,TRI*,float*,Front*,int,float*,float,Mid_soln*,int);
LOCAL void      DG_CONST_P_boundary_state(Locstate,TRI*,float*,Front*,int,float*,float,Mid_soln*,int);
LOCAL void      flux_from_st(Locstate,float*,float*);
LOCAL int       solution_from_tri(Locstate,Front*,float*);
LOCAL void      print_bottom_tris_state(Front*);
// LOCAL void      zero_moments(Locstate);
LOCAL void      sample_soln_along_line(const char*,int,Front*);
LOCAL void      show_boundary_tri(const char*,int,Front*);
LOCAL void      attach_buffer(Front*,Mid_soln*,int,TRI*,TRI**,int*);
// LOCAL void      attach_buffer_states(Front*,Mid_soln*,int,TRI*);
// LOCAL void      shock_vort_attach_buffer_states(Front*,Mid_soln*,int,TRI*);
// LOCAL void      db_Mach_attach_buffer_states(Front*,Mid_soln*,int,TRI*);

LOCAL float     compute_glb_alpha_of_LF(Locstate);
// LOCAL int       overshoot_state(TRI*,Mid_soln*,int);
// LOCAL int       overshoot_state_Shu_V(TRI*,Mid_soln*,int);

LOCAL void      build_1st_order_poly(TRI*,TRI *nbtri[3],Mid_soln*,
                 float*,float nbuave[3][4],int);
LOCAL void      compute_L1(Front*);
LOCAL void      gas_sine_L1(Front*);
LOCAL float     vortex_evo_den_soln(float*,float);
LOCAL void      vortex_evo_init(Locstate,TRI*);
LOCAL float     FM_vort_soln(float*,Front*);
LOCAL void      HLL_flux(float*,float*,float*,Locstate,Locstate,float*,int);
LOCAL void      HLL_flux_2(float*,float*,float*,Locstate,Locstate,float*,int);
LOCAL void      Roe_flux(float*,float*,float*,Locstate,Locstate,Locstate,Locstate,float*,int);
LOCAL void      upwind_flux(float*,float*,float*,Locstate,Locstate,float*,int);
LOCAL void      HLL_flux_ideal_MHD(float*,float*,float*,Locstate,Locstate,float*,int);

LOCAL void      Burgers_flux(float*,float*,float*,Locstate,Locstate,float*,int);
LOCAL void      con_u_to_state(float*,int,Locstate);
LOCAL void      inverse_2_2_matrix(double**,int,double**);
LOCAL void      d_inverse_2_2_matrix(double**,int,double**);
LOCAL void      LR_matrix_in_dir(float*,Locstate,float**,float**);
LOCAL void      d_matrix_vec_mult(float**,float*,int,int,float*);
LOCAL void      ld_matrix_vec_mult(double**,float*,int,int,float*);
LOCAL void      d_matrix_matrix_mult(float**,float**,int,int,float**);
LOCAL void      matrix_matrix_mult_diff_size(double**,double**,int,int,int,int,double**);
LOCAL float     gas_sine_den_soln(float*,float);
LOCAL void      compute_tri_geom(float*,double*,double*,double*,float*);
LOCAL int       solution_from_loc_tris(Locstate,TRI**,int,size_t,float*);
LOCAL int       install_local_tris(TRI*,TRI**);
LOCAL int       fsame_sign(float*,int);
LOCAL int       Shu_V_smooth_dect(TRI*,Mid_soln*,int,float*,float nbuave[ ][4],int*);
// LOCAL void      extrema_detec(float*,float nbuave[ ][4],int*);

#if defined(__cplusplus)
extern "C" {
#endif /* defined(__cplusplus) */
FORTRAN     void    FORTRAN_NAME(dgglse)(int*,int*,int*,double*,int*, ///// int M,int N,int P, double array A, int LDA
                                double*,int*,double*,double*,double*, ///// double array B, int LDB, C, D, X,
                                double*,int*,int*);                   ///// double array WORK, int LWORK, int INFO
#if defined(__cplusplus)
}
#endif /* defined(__cplusplus) */


// LOCAL double **Lmass_matrix = NULL, **mass_inv;
LOCAL Tri_HR_sten *alltri_HR_sten = NULL;

LOCAL double    dmach = 0.0;
LOCAL int       debug_flag = NO; 
// LOCAL size_t    sizest = 0;
// LOCAL float     glb_alpha = -HUGE_VAL;
LOCAL double    **mass_1st_row = NULL, **mass_1st_rows[20];
// LOCAL float       newdt; 
// LOCAL TRI         *time_on_tri;
LOCAL int       HR_times = 1;
LOCAL float     press_min = 1.0e-6;
EXPORT float    newdt, current_time, RK_bdry_time;
EXPORT TRI      *time_on_tri;

// The following two set variables specify edge Gauss quadrature points
LOCAL int       Gauss_N = 2; // N = 4 for P3; N = 3 for P2 ---- NOTE: _edge_flux_store only allocates for max 5 points 
LOCAL double    q[9], qw[9];
LOCAL int       Lobatto_N = 2; // Number of Lobatto quadrature
LOCAL double    Lo_q[9], Lo_qw[9];  // Abscissas and Weight coefficients for Lobatto quadrature 
// LOCAL int       Use_dual_cell_state = NO;

// for db_Mach reflection, Mach 10 shock states
LOCAL float     post_sk_st[4] = {8, 8.25, 0.0, 116.5};  
                // in the y-direction. the velocity should be
                // v[0] = 8.25*cos(radians(60.0)), v[1] = 8.25*sin(radians(60.0))
LOCAL float     pre_sk_st[4] = {1.4, 0.0, 0.0, 1.0};
LOCAL float     sk_y_pos; // The y coords of the Shock at x = 0.0.
LOCAL float     burg_init(float);
LOCAL float     burg_sol(float*,float,float);
LOCAL double    deriv_burg_init(double);
LOCAL double    fprime(double);
LOCAL double    fprimeprime(double);
LOCAL void      Burgers_L1(Front*);
// LOCAL void      print_tri_crds(TRI*);
LOCAL int       max_angle(float*,int);
LOCAL void      g_sine_evolution_initializer(TRI*,Locstate,float);
// LOCAL float     cond_num(float A[ ][2]);
LOCAL float     comput_loc_alpha(int,Locstate,float*,Locstate,float*,float);
LOCAL int       is_phy_bdry_side(TRI*,int);
LOCAL int       unphysical_state(TRI*,Mid_soln*,int);
LOCAL void      reflect_state_about_y(TRI*,Locstate,double**);
LOCAL void      local_find_time_step_on_tris(Front*);
LOCAL int       jump_at_quadrature(TRI*,TRI *nbtri[3],Mid_soln*,int,
                  int*,int*,int*,int*,float*,float*,float*,float*,int);
LOCAL void      fix_overshoot(TRI*,Mid_soln*,int,Front*);
// LOCAL void      fix_unphysical_st(TRI*,Mid_soln*,int,Front*);
LOCAL void      show_states_at_edge_quadrature(TRI*,Locstate);
// LOCAL int       unphysical_st_at_quadrature(TRI*,Locstate);
LOCAL void      p1_L2projection_ver2(TRI*,Locstate,Locstate);
// LOCAL void      u_average_indx(TRI*,Locstate,int,float*);
// LOCAL void      R_degree3_term_averageP3(TRI*,Locstate,double**,int,float*);
//LOCAL void      R_degree3_term_average_liuP3(TRI*,Locstate,double**,int,float*);
LOCAL int       unify_weight(float*,int,float*);
// LOCAL void      R_degree2_above_term_averageP3(TRI*,Locstate,double**,float*);
LOCAL void      Shu_V_sten_coeff(TRI*,Mid_soln*,int,float*,float nbuave[3][4],float*,float*);
LOCAL int       WENO_mod_cand1_P3(float*,float*,int,float,float*);
LOCAL int       WENO_mod_cand1_P2(float*,float*,int,float,float*);
LOCAL void      limit_by_edge_cent_val(float coef[][2],float A[][2],float*);
LOCAL void      harmonic_mod(float*,float*,int,float*);
LOCAL void      least_sqr(float*,float A[][2],float*);
LOCAL void      weno_weight_ls(float coef[][2],float A[][2],float*,float A_edge[][2],int,float*);
LOCAL void      comp_coef_3eqns(float A[][3],float*,float*);
LOCAL void      degree3_term_Int(Locstate,double**,float*);
// LOCAL void      update_db_Mach_buffer(Front*,Mid_soln*,int);
LOCAL void      pp_send_interior_fields(int*,int,int,float,Front*,Mid_soln*,int,int);
LOCAL void      pp_receive_interior_fields(int*,int,int,float,Front*,Mid_soln*,int,Limiting_store**,int);
// LOCAL void      tmp_set_send_domain(float*,float*,int,int,RECT_GRID*,float);
// LOCAL void      tmp_set_receive_domain(float*,float*,int,int,RECT_GRID*,float);
// LOCAL int       count_num_of_tris_inside(float*,float*,Front*,TRI***);
LOCAL void      unbundle_single_st(Locstate,size_t,byte*);
LOCAL void      bundle_single_st(Locstate,size_t,byte*);
// LOCAL Buf_soln  *find_match_tri(float*,Buf_soln*,int);
LOCAL int       tri_outside_db_Mach_boundary(TRI*);
LOCAL int       tri_outside_Mach_step_boundary(TRI*);
LOCAL void      local_limiting_soln_with_buffer_tris_multiple_times(Front*,Mid_soln*,int,int,Limiting_store**);
LOCAL void      fix_neg_press(Locstate,float*,int);
LOCAL int       unphysical_st_at_pt(TRI*,Locstate,float*);
LOCAL void      fix_unphysical_st_at_pt(TRI*,Locstate,float*,Locstate);
LOCAL void      limiting_3rd_degreeP3_char(TRI*,TRI *nbtri[3],Mid_soln*,int,int,Locstate*,float **L[],float **R[]);
LOCAL void      limiting_1st_degreeP3_char(TRI*,TRI *nbtri[3],Mid_soln*,int,int,Locstate*,float **L[],float **R[]);
LOCAL void      limiting_2nd_degreeP3_char(TRI*,TRI *nbtri[3],Mid_soln*,int,Locstate*,float **L[],float **R[]);
LOCAL void      compute_Roemean(Locstate,Locstate,Locstate,float*,float*);
LOCAL int       WENO_mod_cand1_2nd_P3(float*,int,float,float*);
LOCAL int       WENO_mod_cand1_3rd_P3(float*,int,float,float*);
LOCAL void      comput_Roe(TRI*,TRI *nbtri[3],Mid_soln*,int,Locstate*,float **L[],float **R[]);
// LOCAL void      convert_con_char(Locstate,float**,Locstate);
LOCAL void      convert_cand_conu_charu(TRI*,TRI  *nbtri[3],Mid_soln*,int,float**);
// LOCAL void      save_reconstruct_st(TRI*,Mid_soln*,int,float conu[4][MAX_N_COEF]);
LOCAL void      test_unphysical_st_at_quadrature(TRI*,Mid_soln*);
LOCAL void      print_conu_state(float conu[4][MAX_N_COEF]);
// LOCAL void      weno_combine_P3(TRI*,float u[3][4][MAX_N_COEF],float out[4][MAX_N_COEF]);
// LOCAL void      save_weno_st_to_midsoln(TRI*,Mid_soln*,int,float conu[4][MAX_N_COEF]);
LOCAL void      impose_Burgers_states(Locstate,TRI*,float*,Front*,int,float*,float,Mid_soln*,int,float);
LOCAL void      L2_state_build_13pts(TRI*,float crds[][2],float con_u[][8],Locstate);
LOCAL void      p3_project_to_p2(double**,Locstate);
LOCAL void      reflect_state_about_y_ver2(TRI*,Locstate,float**,Limiting_store*);
LOCAL void      accurate_vort_L1(Front*);
// LOCAL void      accurate_mag_vort_L1(Front*);
// LOCAL float     mag_vort_sol(float*,float,float*);
LOCAL float     vort_sol(float*,float,float*);
LOCAL void      attach_cell_averages_in_buffer(Front*,Mid_soln*);
LOCAL void      attach_cell_averages_in_buffer_at_step(Front*,int,Mid_soln*);
LOCAL void      update_buffer_x_ref_for_constraint(Front*,Mid_soln*);
LOCAL void      update_buffer_for_constraint(Front*,Mid_soln*);
// LOCAL Buf_soln *find_match_tri_in_periodicBC( RECT_GRID*, float*,Buf_soln*,int);
LOCAL void      set_RK_time_for_bdry(Front*,Front*,int,float);
LOCAL void      WENO_constrained_trans_B_reconstruction(Front*,Mid_soln*,int,Limiting_store**);
LOCAL void      constrained_trans_B_reconstruction_DG(Front*,Mid_soln*,int,Limiting_store**);
LOCAL void      preliminary_reconstruction_zone_center(Front*,Mid_soln*,int,Limiting_store**);
LOCAL void      tri_comput_P3_polynomials_from_avg(TRI*,TRI *nbtri[],int,Mid_soln*,Limiting_store**,int);
LOCAL void      tri_comput_P3_polynomials_from_avg_by_min(TRI*,TRI *nbtri[],int,Mid_soln*,Limiting_store**,int);

LOCAL void      tri_comput_P2_polynomials_from_avg(TRI*,TRI *nbtri[],int,Mid_soln*,Limiting_store**,int);
// LOCAL void      tri_comput_P2_polynomials_from_avg_MHD(TRI*,TRI *nbtri[],int,Mid_soln*,Limiting_store**,int,double**);
LOCAL void      tri_comput_P2_polynomials_from_avg_MHD_ver1(TRI*,TRI *nbtri[],int,Mid_soln*,Limiting_store**,int);
LOCAL void      tri_comput_P2_polynomials_from_avg_MHD_ver2(TRI*,TRI *nbtri[],int,Mid_soln*,Limiting_store**,int);

LOCAL void      tri_comput_P1_polynomials_from_avg(TRI*,TRI *nbtri[],int,Mid_soln*,Limiting_store**,int);
LOCAL void      tri_comput_P1_polynomials_from_avg_MHD(TRI*,TRI *nbtri[],int,Mid_soln*,Limiting_store**,int);

LOCAL void      tri_B_P1_polynomial_reconstruction_consv(TRI*,Mid_soln*,Limiting_store**,int);
LOCAL void      tri_B_P1_polynomial_reconstruction_ver1(TRI*,Mid_soln*,Limiting_store**,int);
LOCAL void      tri_B_P1_polynomial_reconstruction_ver2(TRI*,Mid_soln*,Limiting_store**,int);//NEW
LOCAL void      tri_B_P1_polynomial_reconstruction_upwind(TRI*,Mid_soln*,Limiting_store**,int);

LOCAL void      tri_B_P2_polynomial_reconstruction_consv(TRI*,Mid_soln*,Limiting_store**,int);
LOCAL int       tri_B_P2_polynomial_reconstruction_9edge_1(TRI*,Mid_soln*,Limiting_store**,int);
LOCAL int       tri_B_P2_polynomial_reconstruction_new(TRI*,Mid_soln*,Limiting_store**,int);

LOCAL void      show_state_to_rect_Mach_step(const char*,int,Front*);
// LOCAL void      show_state_to_rotor_MHD(const char*,int,Front*);
// LOCAL void      get_sten_neighbr(TRI  *nbtri[],TRI*,TRI  *nntri[],int*);
// LOCAL void      get_sten_neighbr_B_field(TRI  *nbtri[],TRI*,TRI  *nntri[],int*);
LOCAL void      du_indx(TRI*,Locstate,int,double*,double*,double*);

LOCAL void      show_state_to_rect_Mach_step(const char*,int,Front*);
LOCAL void      show_state_to_rotor_MHD(const char*,int,Front*);
LOCAL void      show_Az_state_MHD( const char*,int,Front*);

// LOCAL void      get_sten_neighbr(TRI  *nbtri[],TRI*,TRI  *nntri[],int*);
// LOCAL void      get_sten_neighbr_B_field(TRI  *nbtri[],TRI*,TRI  *nntri[],int*);
LOCAL void      du_indx(TRI*,Locstate,int,double*,double*,double*);
// LOCAL void      R_degree3_term_pt_P3(Locstate,int,double*,double*,float*);
LOCAL void      adv_E_vertice_B_edge(TRI*,TRI*,float,float,Mid_soln*,int,Front*);
LOCAL void      adv_E_vertice_B_edge_LLF_U_star(TRI*,TRI*,float,float,Mid_soln*,int,Front*);
LOCAL void      adv_E_vertice_B_edge_DG(TRI*,TRI*,float,float,Mid_soln*,int,Front*);//Huijing
LOCAL void      HLL_soln_at_pt(POINT*,TRI*,int,Mid_soln*,int,double*);
LOCAL void      HLL_soln_ideal_MHD(float*,float*,float*,Locstate,Locstate,float*,int);
LOCAL double    HLL_soln_ideal_MHD_Elec_z(float*,float*,float*,Locstate,Locstate,int);
LOCAL double    LF_soln_ideal_MHD_Elec_z(float*,float*,float*,Locstate,Locstate,double,int);
LOCAL void      adv_B_edge(int,TRI*,TRI*,double,int,Mid_soln*,Front*);
LOCAL double    Elec_z_at_pt(POINT*,TRI*,int,Mid_soln*,int,double,int);
// LOCAL void      update_edge_B_buffer(Front*,Mid_soln*,int);
// LOCAL void      tri_B_P0_polynomial_reconstruction(TRI*,Mid_soln*,int,double*);
// LOCAL void      init_tri_comput_P1_polynomials_from_avg_MHD(TRI*,TRI *tris[],int);
LOCAL int       Mag_p2_edge_sten(TRI*,int,TRI *tris[],int*);
LOCAL int       Mag_p2_9edge_sten_1(TRI*,int,TRI *tris[],int*);
LOCAL int       Mag_p2_9edge_sten_2(TRI*,int,TRI *tris[],int*);
LOCAL int       Mag_p2_9edge_reverse_sten(TRI*,int,TRI *tris[],int*);
LOCAL int       Mag_p2_9edge_reverse_sten_5pt_vertex(TRI*,int,TRI *tris[],int*);

LOCAL int       Mag_p1_5edge_reverse_sten_5pt_vertex(TRI*,int,TRI *tris[],int*);
LOCAL int       Mag_p1_5edge_reverse_sten(TRI*,int,TRI *tris[],int*);
LOCAL int       Mag_p1_5edge_central_sten(TRI*,TRI *tris[],int*);
LOCAL int       Mag_p1_3edge_central_sten_new(TRI*,TRI *tris[],int*);

// LOCAL double    weno_weight_P2(TRI*,double*);
// LOCAL void      tris_between_edge_neighbrs(TRI*,POINT*,int,TRI *tris[],int*);
LOCAL void      get_sten_consv_var_P2(TRI  *nbtri[ ],TRI*,TRI *nntri[],int*);
// LOCAL void      update_center_and_edge_phys_buffer(Front*,Front*,Mid_soln*,int);
LOCAL void      assign_buf_state_blast_MHD(Locstate,TRI*,Mid_soln*,int,int);
LOCAL int       Mag_p2_11edge_central_sten(TRI*,TRI *tris[],int*);
// LOCAL int       count_num_tris_vertex(TRI*,POINT*,int*);
LOCAL void      verify_edge_B(TRI*,int,Mid_soln*,double*,double*);
LOCAL int       thermal_press_out_range(TRI*,Locstate,float,float);
LOCAL int       Mag_p1_5edge_one_side_sten(TRI*,int,TRI *tris[],int*);
LOCAL int       minmod_sign(float,float);
LOCAL void      scale_state_by_factor(Locstate,float);
// LOCAL double    biased_min_mod(double*,int,double);
LOCAL void      least_sqr_fit_linear_poly(double*,double ls_A[][3],int,double*);
LOCAL double    Max_wave_speed_at_pt(POINT*,TRI*,int,Mid_soln*,int,int);
LOCAL void      vertex_LLF_flux_in_dir(POINT*,TRI*,int,Mid_soln*,int,double,double*,double*,int);
LOCAL double    tan_half_angle(POINT*,TRI*,int,double*);
LOCAL float     compute_alpha_of_state_MHD(Locstate,float*);
LOCAL double    Max_wave_speed_at_pt_single_state(POINT*,TRI*,Mid_soln*,int,double*,int);
LOCAL void      LLF_U_star_model(double*,Mid_soln*,int,TRI  *tris[],double,double*,double nor[][3],int,double*,int);
LOCAL double    edge_Ez_quadrature(TRI*,TRI*,float,Mid_soln*,int,Front*,int,float*);
LOCAL float     vh_val_ver2(float,int);
LOCAL float     vh_val_d_ver2(float,int);

LOCAL int       Mag_p2_6edge_one_side_sten_new(TRI*,int,TRI *tris[],int*);
LOCAL int       Mag_p2_6edge_central_sten_new(TRI*,TRI *tris[],int*);
LOCAL int       Mag_p2_5edge_reverse_sten_5pt_vertex_new(TRI*,int,TRI *tris[],int*);
LOCAL int       Mag_p2_6edge_reverse_sten_new(TRI*,int,TRI *tris[],int*);
LOCAL int       Mag_p2_5edge_reverse_sten_new(TRI*,int,TRI *tris[],int*);

LOCAL void      B_edge_reconstruction(Front*,Mid_soln*,int,Limiting_store**);
LOCAL void      edge_B_P1_polynomial_reconstruction(TRI*,Mid_soln*,Limiting_store**,int);
LOCAL void      edge_B_P2_polynomial_reconstruction(TRI*,Mid_soln*,Limiting_store**,int);
LOCAL void      from_cell_norB_along_edge_P1(TRI*,Mid_soln*,int,int,int,double*);
LOCAL void      from_cell_norB_along_edge_P2(TRI*,Mid_soln*,int,int,int,double*);
// LOCAL void      B_cell_2nd_reconstruction(Front*,Mid_soln*,int,Limiting_store**);
LOCAL void      tri_B_P1_polynomial_2nd_reconstruction(TRI*,Mid_soln*,Limiting_store**,int);
LOCAL void      tri_B_P2_polynomial_2nd_reconstruction(TRI*,Mid_soln*,Limiting_store**,int);
LOCAL void      tri_B_P2_polynomial_2nd_reconstruction_ver2(TRI*,Mid_soln*,Limiting_store**,int);
LOCAL void      MHD_fix_neg_dens_press(Front*,Mid_soln*,Limiting_store**,int,int);
LOCAL void      fix_unphysical_st_cont_Bn_edge(TRI*,Mid_soln*,int,Front*);
LOCAL void      compress_Bn_on_edge(TRI*,Mid_soln*,int,double,Front*);
LOCAL void      entropy_fix_for_blast_MHD(Locstate,TRI*,Mid_soln*,int,double*);
LOCAL void      grad_vh_loc_div_free_basis(float*,double*,double,int,float*);
LOCAL void      adv_dual_cell_B_edge_DG(POLYGON*,POLYGON*,float,float,Mid_soln*,Dual_cell_Mid_soln*,int,Front*); 
LOCAL void      dual_cell_edge_Ez_quadrature(TRI*,TRI*,POINT*,POINT*,Mid_soln*,int,double*,double*,int);
LOCAL void      B_state_on_dual_cell_P1(POLYGON*,double*,int,Dual_cell_Mid_soln*,double*,int);
LOCAL void      B_state_on_dual_cell_DG_P1(POLYGON*,double*,int,Dual_cell_Mid_soln*,double*,int);
LOCAL void      tri_B_avg_from_dual_cell_P1_tri_P0(TRI*,TRI*,Mid_soln*,int,Front*,Dual_cell_Mid_soln*,double*);
LOCAL void      tri_B_avg_from_dual_cell_P1_tri_P0_revised(TRI*,TRI*,Mid_soln*,int,Front*,Dual_cell_Mid_soln*,double*);
LOCAL void      tri_B_avg_from_dual_cell_P1_tri_P1(TRI*,TRI*,Mid_soln*,int,Front*,Dual_cell_Mid_soln*,double*,double*);
LOCAL void      tri_B_avg_from_dual_cell_P1_tri_DG_P1(TRI*,TRI*,Mid_soln*,int,Front*,Dual_cell_Mid_soln*,double*,double*);
LOCAL void      dual_cell_B_edge_from_tri_DG_P1(POLYGON*,POLYGON*,Mid_soln*,
                                                Dual_cell_Mid_soln*,int,Front*,double Bn[][N_COEF_EDGE],
                                                double pBn[][2][N_COEF_EDGE],int);
// LOCAL int       Add_dual_to_dual_cell_pair_list(DUAL_CELL_PAIR*,int,POLYGON*,TRI*,TRI*);

// LOCAL void      Dual_cell_edge_B_reconstruction_P1(Front*,Mid_soln*,Dual_cell_Mid_soln*,int,double);
// LOCAL void      Collect_press_at_tri_quadrature_on_dual_cell_Gauss_N2(Front*,POLYGON*,
//                         Mid_soln*,Dual_cell_Mid_soln*,int,double qB[][4][2],double  qt_crds[][4][3]);

#if defined(__MPI__)
LOCAL void      pp_send_interior_fields_on_Periodic_Side(int*,int,int,float,Front*,Mid_soln*,int,MPI_Request*);
LOCAL void      pp_receive_interior_fields_on_Periodic_Side(int*,int,int,float,Front*,Mid_soln*,int);
#endif // if defined(__MPI__) //
LOCAL void      gather_to_io_node(FILE*,int,int,char*,PP_GRID*,double**);
LOCAL void      merge_b_on_dual_edges_removing_rounding_error(Front*,Dual_cell_Mid_soln*,int);


LOCAL void      show_boundary_tri(
        const char   *dname,
        int          step,
	Front        *fr)
{
        SURFACE      **surf;
        TRI          *tri;
        RECT_GRID    *gr = fr->rect_grid;
        // float        *L = gr->L, *U = gr->U; 
        double       *cent;
        int          num_tri = 0, count = 0, k, l, j, i;

        FILE          *fp[7], *tri_fp;
        static char   *fname = NULL, *ppfname = NULL;
        static size_t fname_len = 0, ppfname_len = 0;
        char          outname[7][256],outdir[256], triname[256];
        const char    *nstep;
        float         *pts, *crds, *fsts;
        int        alloc_len_verts = 0, alloc_len_pts = 0;
        int        npts=0, ntris=0;
        int        *verts = NULL;
        POINT      *p;
        Locstate   st;

        // printf("Enter show_boundary_tri()\n");

        sprintf(outdir,"%s/%s",dname,"matlab_data");

        ppfname = set_ppfname(ppfname,"tri",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(triname,"%s.ts%s",ppfname,nstep);

        if (create_directory(dname,YES) == FUNCTION_FAILED)
        {
            (void) printf("WARNING in show_boundary_tri(), directory "
                          "%s doesn't exist and can't be created\n",dname);
            return;
        }
        if (create_directory(outdir,YES) == FUNCTION_FAILED)
        {
            (void) printf("WARNING in show_boundary_tri(), directory "
                         "%s doesn't exist and can't be created\n",outdir);
            return;
        }

        fname = get_list_file_name(fname,outdir,triname,&fname_len);
        if ((tri_fp = fopen(fname,"w")) == NULL)
        {
            (void) printf("WARNING in show_boundary_tri(), "
                           "can't open %s\n",fname);
            return;
        }

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf); !at_end_of_tri_list(tri,*surf);
                tri = tri->next)
            {
                cent = fg_centroid(tri);
                // if(Boundary_tri(tri) == YES)
                if(Boundary_tri(tri))
                {
                    for (k = 0; k < 3; ++k)
                        Index_of_point(Point_of_tri(tri)[k]) = -1;
                    num_tri++;
                    // if(num_tri == 3) break;
                }
            }
        }

        alloc_len_pts = 7*num_tri;
        alloc_len_verts = 8*num_tri;
        vector(&pts,alloc_len_pts,FLOAT);
        vector(&verts,7*num_tri,INT);

        fprintf(tri_fp,"%d\n",num_tri); 

        for(npts=0, ntris=0, surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf); !at_end_of_tri_list(tri,*surf);
                tri = tri->next)
            {
                cent = fg_centroid(tri);
                // if(Boundary_tri(tri) == YES)
                if(Boundary_tri(tri))
                {
                    st = tri->st;
                    for (k = 0; k < 3; ++k)
                    {
                        p = Point_of_tri(tri)[k];
                        if (Index_of_point(p) == -1)
                        {
                            crds = Coords(p);
                            for (l = 0; l < 2; ++l)
                                pts[2*npts+l] = crds[l];
                            ++npts;
                            Index_of_point(p) = npts;
                        }
                        // printf("tri_verts:4*(%d)+%d = %d\n", ntris, k, 4*ntris+k);
                        verts[3*ntris+k] = Index_of_point(p);     
                    }
                    ++ntris;
                }
            }
        }

        for (j = 0; j < ntris; ++j)
        {
            (void) fprintf(tri_fp,"%d %d %d %d\n",
                           verts[3*j],verts[3*j+1],verts[3*j+2], 1);
        }

        fprintf(tri_fp,"%d\n",npts); 
        for (i = 0; i < npts; ++i)
            (void) fprintf(tri_fp,"%g %g\n", pts[2*i],pts[2*i+1]);

        fclose(tri_fp);

        free(pts);
        free(verts);

        // printf("Leave show_boundary_tri()\n");
        // sample_soln_along_line(dname,step,fr);
}

EXPORT void      show_state_on_tri(
        const char   *dname,
        int          step,
	Front        *fr)
{
        SURFACE      **surf;
        TRI          *tri;
        RECT_GRID    *gr = fr->rect_grid;
        float        *L = gr->L, *U = gr->U; 
        double  *cent;
        int          num_tri = 0, count = 0, k, l, j, i;

        FILE          *fp[7], *tri_fp;
        static char   *fname = NULL, *ppfname = NULL;
        static size_t fname_len = 0, ppfname_len = 0;
        char          outname[7][256],outdir[256], triname[256];
        const char    *nstep;
        float         *pts, *crds, *fsts;
        int        alloc_len_verts = 0, alloc_len_pts = 0;
        int        npts=0, ntris=0;
        int        *verts = NULL;
        POINT      *p;
        Locstate   st;

        // printf("Leave show_state_on_tri()\n");
        // if(debugging("Sod") || debugging("v_evo"))
        if(debugging("Sod"))
            sample_soln_along_line(dname,step,fr);

        if(debugging("mag_acc_vert"))
        {
            //// accurate test for magnetic vortex evolution
            printf("IN show_state_on_tri(), fr->time = %g\n", fr->time);
            accurate_mag_vort_L1(fr);
        }

        if(debugging("v_evo"))
            compute_L1(fr);
        if(debugging("g_sine") || debugging("mag_g_sine"))
            gas_sine_L1(fr);
        if(debugging("Burgers"))
            Burgers_L1(fr);
        if(debugging("acc_vert"))
        {
            // accurate_vort_L1(fr);
            accurate_mag_vort_L1(fr);
        }

        if(!debugging("plot_tri_state"))
            return;

        // return show_boundary_tri(dname,step,fr);

        sprintf(outdir,"%s/%s",dname,"matlab_data");

        ppfname = set_ppfname(ppfname,"dens",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname[0],"%s.ts%s",ppfname,nstep);
        ppfname = set_ppfname(ppfname,"mom0",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname[1],"%s.ts%s",ppfname,nstep);
        ppfname = set_ppfname(ppfname,"mom1",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname[2],"%s.ts%s",ppfname,nstep);
        ppfname = set_ppfname(ppfname,"energy",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname[3],"%s.ts%s",ppfname,nstep);

        ppfname = set_ppfname(ppfname,"press",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname[4],"%s.ts%s",ppfname,nstep);

        ppfname = set_ppfname(ppfname,"u",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname[5],"%s.ts%s",ppfname,nstep);
        ppfname = set_ppfname(ppfname,"v",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname[6],"%s.ts%s",ppfname,nstep);

        ppfname = set_ppfname(ppfname,"tri",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(triname,"%s.ts%s",ppfname,nstep);

        if (create_directory(dname,YES) == FUNCTION_FAILED)
        {
            (void) printf("WARNING in show_state_on_tri(), directory "
                          "%s doesn't exist and can't be created\n",dname);
            return;
        }
        if (create_directory(outdir,YES) == FUNCTION_FAILED)
        {
            (void) printf("WARNING in show_state_on_tri(), directory "
                         "%s doesn't exist and can't be created\n",outdir);
            return;
        }

        fname = get_list_file_name(fname,outdir,outname[0],&fname_len);
        if ((fp[0] = fopen(fname,"w")) == NULL)
        {
            (void) printf("WARNING in show_state_on_tri(), "
                           "can't open %s\n",fname);
            return;
        }

        fname = get_list_file_name(fname,outdir,outname[1],&fname_len);
        if ((fp[1] = fopen(fname,"w")) == NULL)
        {
            (void) printf("WARNING in show_state_on_tri(), "
                           "can't open %s\n",fname);
            return;
        }

        fname = get_list_file_name(fname,outdir,outname[2],&fname_len);
        if ((fp[2] = fopen(fname,"w")) == NULL)
        {
            (void) printf("WARNING in show_state_on_tri(), "
                           "can't open %s\n",fname);
            return;
        }

        fname = get_list_file_name(fname,outdir,outname[3],&fname_len);
        if ((fp[3] = fopen(fname,"w")) == NULL)
        {
            (void) printf("WARNING in show_state_on_tri(), "
                           "can't open %s\n",fname);
            return;
        }

        fname = get_list_file_name(fname,outdir,outname[4],&fname_len);
        if ((fp[4] = fopen(fname,"w")) == NULL)
        {
            (void) printf("WARNING in show_state_on_tri(), "
                           "can't open %s\n",fname);
            return;
        }

        fname = get_list_file_name(fname,outdir,outname[5],&fname_len);
        if ((fp[5] = fopen(fname,"w")) == NULL)
        {
            (void) printf("WARNING in show_state_on_tri(), "
                           "can't open %s\n",fname);
            return;
        }

        fname = get_list_file_name(fname,outdir,outname[6],&fname_len);
        if ((fp[6] = fopen(fname,"w")) == NULL)
        {
            (void) printf("WARNING in show_state_on_tri(), "
                           "can't open %s\n",fname);
            return;
        }

        fname = get_list_file_name(fname,outdir,triname,&fname_len);
        if ((tri_fp = fopen(fname,"w")) == NULL)
        {
            (void) printf("WARNING in show_state_on_tri(), "
                           "can't open %s\n",fname);
            return;
        }

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf); !at_end_of_tri_list(tri,*surf);
                tri = tri->next)
            {
                cent = fg_centroid(tri);
                if((L[0] < cent[0] && L[1] < cent[1] &&
                    U[1] > cent[1] && U[0] > cent[0])
                  )
                // if((L[0] < cent[0] && -0.2 < cent[1] &&
                //     0.2 > cent[1] && U[0] > cent[0])
                //   )
                {
                    for (k = 0; k < 3; ++k)
                        Index_of_point(Point_of_tri(tri)[k]) = -1;
                    num_tri++;
                }
            }
        }

        alloc_len_pts = 7*num_tri;
        alloc_len_verts = 8*num_tri;
        vector(&pts,alloc_len_pts,FLOAT);
        vector(&verts,7*num_tri,INT);
        vector(&fsts,alloc_len_verts,FLOAT);

        fprintf(tri_fp,"%d\n",num_tri); 

        // printf("num_of_tri = %d, alloc# = %d, all_verts = %d, all_fsts = %d\n",
        //          num_tri, alloc_len_pts, 4*num_tri, alloc_len_verts);

        for(npts=0, ntris=0, surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf); !at_end_of_tri_list(tri,*surf);
                tri = tri->next)
            {
                cent = fg_centroid(tri);
                if((L[0] < cent[0] && L[1] < cent[1] &&
                    U[1] > cent[1] && U[0] > cent[0])
                  )
                // if((L[0] < cent[0] && -0.2 < cent[1] &&
                //     0.2 > cent[1] && U[0] > cent[0])
                //   )
                {
                    st = tri->st;
                    for (k = 0; k < 3; ++k)
                    {
                        p = Point_of_tri(tri)[k];
                        if (Index_of_point(p) == -1)
                        {
                            crds = Coords(p);
                            for (l = 0; l < 2; ++l)
                                pts[2*npts+l] = crds[l];
                            ++npts;
                            Index_of_point(p) = npts;
                        }
                        // printf("tri_verts:4*(%d)+%d = %d\n", ntris, k, 4*ntris+k);
                        verts[3*ntris+k] = Index_of_point(p);     
                    }
                    fsts[7*ntris] = Dens(st);
                    fsts[7*ntris+1] = Mom(st)[0];
                    fsts[7*ntris+2] = Mom(st)[1];
                    fsts[7*ntris+3] = Energy(st);
                    // fsts[7*ntris+4] = pressure(st); // for magnetic MHD, do not print
                    fsts[7*ntris+5] = Mom(st)[0]/Dens(st);
                    fsts[7*ntris+6] = Mom(st)[1]/Dens(st);
                    ++ntris;

                    // if(ntris == 3) break;
                }
            }
        }

        for (j = 0; j < ntris; ++j)
        {
            (void) fprintf(tri_fp,"%d %d %d %d\n",
                           verts[3*j],verts[3*j+1],verts[3*j+2], 1);
        }

        fprintf(tri_fp,"%d\n",npts); 
        for (i = 0; i < npts; ++i)
            (void) fprintf(tri_fp,"%g %g\n", pts[2*i],pts[2*i+1]);

        fprintf(fp[0],"%d\n",ntris); 
        for (i = 0; i < ntris; ++i)
            (void) fprintf(fp[0],"%g\n", fsts[7*i]);
        fprintf(fp[1],"%d\n",ntris); 
        for (i = 0; i < ntris; ++i)
            (void) fprintf(fp[1],"%g\n", fsts[7*i+1]);
        fprintf(fp[2],"%d\n",ntris); 
        for (i = 0; i < ntris; ++i)
            (void) fprintf(fp[2],"%g\n", fsts[7*i+2]);
        fprintf(fp[3],"%d\n",ntris); 
        for (i = 0; i < ntris; ++i)
            (void) fprintf(fp[3],"%g\n", fsts[7*i+3]);
        fprintf(fp[4],"%d\n",ntris); 
        // for (i = 0; i < ntris; ++i)
        //     (void) fprintf(fp[4],"%g\n", fsts[7*i+4]);
        fprintf(fp[5],"%d\n",ntris); 
        for (i = 0; i < ntris; ++i)
            (void) fprintf(fp[5],"%g\n", fsts[7*i+5]);
        fprintf(fp[6],"%d\n",ntris); 
        for (i = 0; i < ntris; ++i)
            (void) fprintf(fp[6],"%g\n", fsts[7*i+6]);

        fclose(fp[0]);
        fclose(fp[1]);
        fclose(fp[2]);
        fclose(fp[3]);
        fclose(fp[4]);
        fclose(fp[5]);
        fclose(fp[6]);
        fclose(tri_fp);

        free(pts);
        free(verts);
        free(fsts); 
}

LOCAL void  show_state_to_rect_Mach_step(
        const char   *dname,
        int          step,
        Front        *fr)
{
        SURFACE      **surf;
        TRI          *tri;
        RECT_GRID    *gr = fr->rect_grid;
        float        *L = gr->L, *U = gr->U, con_u[4];
        double  *cent;
        int          k, l, j, i, dim = 2;
        FILE         *fp[7], *crdsfp[MAXD];
        static char  *fname = NULL, *ppfname = NULL;
        static size_t fname_len = 0, ppfname_len = 0;
        char         outname[7][256],outdir[256], crdsname[MAXD][256];
        const char   *nstep;
        static Locstate st = NULL;
        float      dx, dy, crds[MAXD], dent[MAXD];
        float      **denst, diam, **xcrds, **ycrds;
        int        Xnumpts = 201, Ynumpts = 601, Nx, Ny, ic[MAXD], ixmin, ixmax, iymin, iymax, ratio;
        static int first = YES;
        int        myid, ICOORDS[MAXD];
        int        Nnodes, **find_tri;

#if defined(__MPI__)
            /**
              wired for 2 by 2 partition
            **/
            // after 062608, use this resolution for 2 by 8 partition
            // the resolution is 800 by  2400, subdomain: 400
            Nnodes = pp_numnodes();
            Xnumpts = 501; Ynumpts = 3000/(Nnodes/2) + 1;
            // Xnumpts = 26; Ynumpts = 101; // debug
#else
            Xnumpts = 1001; Ynumpts = 3001;
#endif // if defined(_MPI_)

        if(st == NULL)
            g_alloc_state(&st, fr->sizest);

        sprintf(outdir,"%s/%s",dname,"matlab_data");

        ppfname = set_ppfname(ppfname,"dens",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname[0],"%s.ts%s",ppfname,nstep);
        ppfname = set_ppfname(ppfname,"mom0",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname[1],"%s.ts%s",ppfname,nstep);
        ppfname = set_ppfname(ppfname,"mom1",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname[2],"%s.ts%s",ppfname,nstep);
        ppfname = set_ppfname(ppfname,"energy",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname[3],"%s.ts%s",ppfname,nstep);

        ppfname = set_ppfname(ppfname,"press",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname[4],"%s.ts%s",ppfname,nstep);

        ppfname = set_ppfname(ppfname,"u",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname[5],"%s.ts%s",ppfname,nstep);
        ppfname = set_ppfname(ppfname,"v",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname[6],"%s.ts%s",ppfname,nstep);


        ppfname = set_ppfname(ppfname,"X_crds",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(crdsname[0],"%s.ts%s",ppfname,nstep);

        ppfname = set_ppfname(ppfname,"Y_crds",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(crdsname[1],"%s.ts%s",ppfname,nstep);

        if (create_directory(dname,YES) == FUNCTION_FAILED)
        {
            (void) printf("WARNING in show_state_to_rect(), directory "
                          "%s doesn't exist and can't be created\n",dname);
            return;
        }
        if (create_directory(outdir,YES) == FUNCTION_FAILED)
        {
            (void) printf("WARNING in show_state_to_rect(), directory "
                         "%s doesn't exist and can't be created\n",outdir);
            return;
        }

        fname = get_list_file_name(fname,outdir,outname[0],&fname_len);
        if ((fp[0] = fopen(fname,"w")) == NULL)
        {
            (void) printf("WARNING in show_state_to_rect(), "
                           "can't open %s\n",fname);
            return;
        }

        if(first == YES)
        {
            fname = get_list_file_name(fname,outdir,crdsname[0],&fname_len);
            if ((crdsfp[0] = fopen(fname,"w")) == NULL)
            {
                (void) printf("WARNING in show_state_to_rect(), "
                           "can't open %s\n",fname);
                return;
            }
            fname = get_list_file_name(fname,outdir,crdsname[1],&fname_len);
            if ((crdsfp[1] = fopen(fname,"w")) == NULL)
            {
                (void) printf("WARNING in show_state_to_rect(), "
                           "can't open %s\n",fname);
                return;
            }
        }

        // db_Mach, show [0,1]*[0,3] domain
        // printf("db_Mach, show: output data size %d %d\n", Xnumpts, (Ynumpts-1)/4*3+1);
        // fprintf(crdsfp[0],"%d %d\n", Xnumpts, (Ynumpts-1)/4*3+1);
        // fprintf(crdsfp[1],"%d %d\n", Xnumpts, (Ynumpts-1)/4*3+1);
        // fprintf(fp[0],"%d %d\n", Xnumpts, (Ynumpts-1)/4*3+1);

        // Compute crds
        dx = (gr->U[0]-gr->L[0])/(Xnumpts-1);
        dy = (gr->U[1]-gr->L[1])/(Ynumpts-1);
#if defined(__MPI__)
        Nx = Xnumpts;
        Ny = Ynumpts;
#else
        Nx = Xnumpts;
        Ny = Ynumpts;
#endif // if defined(__MPI__)

        matrix(&denst,Nx,Ny,sizeof(float));
        matrix(&xcrds,Nx,Ny,sizeof(float));
        matrix(&ycrds,Nx,Ny,sizeof(float));
        matrix(&find_tri,Nx,Ny,sizeof(int));

        for(j = 0; j < Ny; j++)
        {
            for(i = 0; i < Nx; i++)
            {
                xcrds[i][j] = ycrds[i][j] = -0.000001;
                find_tri[i][j] = NO;
            }
        }

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf); !at_end_of_tri_list(tri,*surf);
                tri = tri->next)
            {
                diam = fg_length_side(tri)[0];
                break;
            }
        }
        ratio = 5*((int)(diam/min(dx,dy)) + 1);

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf); !at_end_of_tri_list(tri,*surf);
                tri = tri->next)
            {
                // if(tri->BC_type == SUBDOMAIN) continue;
                for(i = 0; i < dim; i++)
                    dent[i] = fg_centroid(tri)[i];
                /// if tri is in the corner
                if(tri->BC_type == SUBDOMAIN)
                {
                    if(dent[0] >= 0.8 && dent[0] <= 1.0 &&
                       dent[1] >= 0.6 && dent[1] <= 3.0)
                        continue;
                }
                ic[0] = irint(floor((dent[0]-gr->L[0])/dx));
                ic[1] = irint(floor((dent[1]-gr->L[1])/dy));
                ixmin = ic[0] - ratio; ixmax = ic[0] + ratio;
                iymin = ic[1] - ratio; iymax = ic[1] + ratio;

                for(j = iymin; j <= iymax; j++)
                {
                    for(i = ixmin; i <= ixmax; i++)
                    {
                        if(i < 0 || i >= Nx || j < 0 || j >= Ny)
                            continue;
                        crds[0] = gr->L[0] + dx*i;
                        crds[1] = gr->L[1] + dy*j;
                        if(i == 0)
                            crds[0] += 0.000001*dx;
                        if(j == 0)
                            crds[1] += 0.000001*dy;
                        if(i == Xnumpts-1)
                            crds[0] -= 0.000001*dx;
                        if(j == Ynumpts-1)
                            crds[1] -= 0.000001*dy;
                        if(pt_in_tri(crds, tri) == YES)
                        {
                            con_u_at_pt(tri->st, crds, fg_centroid(tri), sqrt(fg_area(tri)), con_u);
                            assign(st, tri->st, fr->sizest);
                            Dens(st) = con_u[0];
                            Mom(st)[0] = con_u[1];
                            Mom(st)[1] = con_u[2];
                            Energy(st) = con_u[3];
                            denst[i][j] = Dens(st);
                            xcrds[i][j] = crds[0];
                            ycrds[i][j] = crds[1];
                            find_tri[i][j] = YES;
                        }
                    }
                }
            }
        }

        // fix state at the corner
        for(j = 0; j < Ny; j++)
        {
            for(i = 0; i < Nx; i++)
            {
                if(find_tri[i][j] == NO)
                {
                    xcrds[i][j] = gr->L[0] + dx*i;
                    ycrds[i][j] = gr->L[1] + dy*j;
                    denst[i][j] = 0.0;
                }
            }
        }

        for(j = 0; j < Ny; j++)
        {
            for(i = 0; i < Nx; i++)
            {
                if(i != (Xnumpts-1))
                {
                    if(first == YES)
                    {
                        fprintf(crdsfp[0],"%g\t", xcrds[i][j]);
                        fprintf(crdsfp[1],"%g\t", ycrds[i][j]);
                    }
                    fprintf(fp[0],"%g\t", denst[i][j]);
                }
                else
                {
                    if(first == YES)
                    {
                        fprintf(crdsfp[0],"%g", xcrds[i][j]);
                        fprintf(crdsfp[1],"%g", ycrds[i][j]);
                    }
                    fprintf(fp[0],"%g", denst[i][j]);
                }
            }
            if(first == YES)
            {
                fprintf(crdsfp[0],"\n");
                fprintf(crdsfp[1],"\n");
            }
            fprintf(fp[0],"\n");
        }

        fclose(fp[0]);
        // fclose(fp[1]); fclose(fp[2]); fclose(fp[3]);
        // fclose(fp[4]); fclose(fp[5]); fclose(fp[6]);
        if(first == YES)
        {
            fclose(crdsfp[0]);
            fclose(crdsfp[1]);
        }

        free(denst);
        free(xcrds);
        free(ycrds);
        free(find_tri);

        first = NO;
}


LOCAL void  show_state_to_rotor_MHD(
        const char   *dname,
        int          step,
        Front        *fr)
{
        SURFACE      **surf;
        TRI          *tri;
        RECT_GRID    *gr = fr->rect_grid;
        double        *L = gr->L, *U = gr->U, con_u[10];
        double       *cent, c, u2;
        int          k, l, j, i, dim = 2;
        FILE         *fp[8], *crdsfp[MAXD];
        static char  *fname = NULL, *ppfname = NULL;
        static size_t fname_len = 0, ppfname_len = 0;
        char         outname[8][256],outdir[256], crdsname[MAXD][256];
        const char   *nstep;
        static Locstate st = NULL;
        double     dx, dy, crds[MAXD], dent[MAXD];
        double     **denst, diam, **xcrds, **ycrds, **press, **Mach, **B2, **B2_xy, **u, **v, **vel_len, **A_z;
        int        Xnumpts = 201, Ynumpts = 601, Nx, Ny, ic[MAXD], ixmin, ixmax, iymin, iymax, ratio;
        static int first = YES;
        int        myid, ICOORDS[MAXD];
        int        Nnodes, **find_tri, Az_is_bad = NO;
        PP_GRID    *pp_grid = fr->pp_grid;

#if defined(__MPI__)
            /**
              wired for 2 by 2 partition
            **/
            // after 062608, use this resolution for 2 by 8 partition
            // the resolution is 800 by  2400, subdomain: 400
            Nnodes = pp_numnodes();
            // Xnumpts = 501; Ynumpts = 1000/(Nnodes/2) + 1; // for Euler
            // Xnumpts = 26; Ynumpts = 101; // debug
            Xnumpts = 400; Ynumpts = 400;
            if(debugging("Orszag_T"))
            {
                Xnumpts = 1200; Ynumpts = 1200;
            }
            if(debugging("blast_MHD"))
            {
                Xnumpts = 200; Ynumpts = 200;
            }
            if(debugging("field_loop"))
            {
                Xnumpts = 400; Ynumpts = 200;
                // Xnumpts = 100; Ynumpts = 50;
            }
            Xnumpts = Xnumpts* pp_grid->Zoom_grid.gmax[0]/pp_grid->Global_grid.gmax[0];
            Ynumpts = Ynumpts* pp_grid->Zoom_grid.gmax[1]/pp_grid->Global_grid.gmax[1];
#else
            Xnumpts = 401; Ynumpts = 401;
            if(debugging("Orszag_T"))
            {
                Xnumpts = 1201; Ynumpts = 1201;
            }
            if(debugging("blast_MHD"))
            {
                Xnumpts = 201; Ynumpts = 201;
            }
            if(debugging("field_loop"))
            {
                Xnumpts = 401; Ynumpts = 201;
                // Xnumpts = 51; Ynumpts = 26;
                // Xnumpts = 51; Ynumpts = 26;
            }
#endif // if defined(_MPI_)

        if(st == NULL)
            g_alloc_state(&st, fr->sizest);

        sprintf(outdir,"%s/%s",dname,"matlab_data");

        ///////////////////////////////////////////////////////
        ppfname = set_ppfname(ppfname,"dens",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname[0],"%s.ts%s",ppfname,nstep);

        ppfname = set_ppfname(ppfname,"press",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname[1],"%s.ts%s",ppfname,nstep);

        ppfname = set_ppfname(ppfname,"Mach",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname[2],"%s.ts%s",ppfname,nstep);

        ppfname = set_ppfname(ppfname,"B2",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname[3],"%s.ts%s",ppfname,nstep);

        ///////////////////////////////////////////////////////
        ppfname = set_ppfname(ppfname,"u",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname[4],"%s.ts%s",ppfname,nstep);

        ppfname = set_ppfname(ppfname,"v",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname[5],"%s.ts%s",ppfname,nstep);

        ppfname = set_ppfname(ppfname,"vel_len",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname[6],"%s.ts%s",ppfname,nstep);

        /**
        if(debugging("field_loop"))
        {
            ppfname = set_ppfname(ppfname,"A_z",&ppfname_len);
            nstep = right_flush(step,7);
            sprintf(outname[7],"%s.ts%s",ppfname,nstep);
        }
        **/
        ///////////////////////////////////////////////////////

        ppfname = set_ppfname(ppfname,"X_crds",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(crdsname[0],"%s.ts%s",ppfname,nstep);

        ppfname = set_ppfname(ppfname,"Y_crds",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(crdsname[1],"%s.ts%s",ppfname,nstep);

        if (create_directory(dname,YES) == FUNCTION_FAILED)
        {
            (void) printf("WARNING in show_state_to_rect(), directory "
                          "%s doesn't exist and can't be created\n",dname);
            return;
        }
        if (create_directory(outdir,YES) == FUNCTION_FAILED)
        {
            (void) printf("WARNING in show_state_to_rect(), directory "
                         "%s doesn't exist and can't be created\n",outdir);
            return;
        }

#if defined(__MPI__)
        if (is_io_node(pp_mynode()))
        {
            for(i = 0; i < 7; i++)
            {
                fname = get_list_file_name(fname,outdir,outname[i],&fname_len);
                if ((fp[i] = fopen(fname,"w")) == NULL)
                {
                    (void) printf("WARNING in show_state_to_rect(), "
                               "can't open %s\n",fname);
                    return;
                }
            }

            /**
            if(debugging("field_loop"))
            {
                fname = get_list_file_name(fname,outdir,outname[7],&fname_len);
                if ((fp[7] = fopen(fname,"w")) == NULL)
                {
                    (void) printf("WARNING in show_state_to_rect(), "
                               "can't open %s\n",fname);
                    return;
                }
            }
            **/

            if(first == YES)
            {
                fname = get_list_file_name(fname,outdir,crdsname[0],&fname_len);
                if ((crdsfp[0] = fopen(fname,"w")) == NULL)
                {
                    (void) printf("WARNING in show_state_to_rect(), "
                               "can't open %s\n",fname);
                    return;
                }
                fname = get_list_file_name(fname,outdir,crdsname[1],&fname_len);
                if ((crdsfp[1] = fopen(fname,"w")) == NULL)
                {
                    (void) printf("WARNING in show_state_to_rect(), "
                               "can't open %s\n",fname);
                    return;
                }
            }
        }
#else // #if defined(__MPI__)
        for(i = 0; i < 7; i++)
        {
            fname = get_list_file_name(fname,outdir,outname[i],&fname_len);
            if ((fp[i] = fopen(fname,"w")) == NULL)
            {
                (void) printf("WARNING in show_state_to_rect(), "
                           "can't open %s\n",fname);
                return;
            }
        }

        /**
        if(debugging("field_loop"))
        {
            fname = get_list_file_name(fname,outdir,outname[7],&fname_len);
            if ((fp[7] = fopen(fname,"w")) == NULL)
            {
                (void) printf("WARNING in show_state_to_rect(), "
                               "can't open %s\n",fname);
                return;
            }
        }
        **/

        if(first == YES)
        {
            fname = get_list_file_name(fname,outdir,crdsname[0],&fname_len);
            if ((crdsfp[0] = fopen(fname,"w")) == NULL)
            {
                (void) printf("WARNING in show_state_to_rect(), "
                           "can't open %s\n",fname);
                return;
            }
            fname = get_list_file_name(fname,outdir,crdsname[1],&fname_len);
            if ((crdsfp[1] = fopen(fname,"w")) == NULL)
            {
                (void) printf("WARNING in show_state_to_rect(), "
                           "can't open %s\n",fname);
                return;
            }
        }
#endif // #if defined(__MPI__)

        // Rotor problem, show [0,1]*[0,1] domain
        // printf("db_Mach, show: output data size %d %d\n", Xnumpts, (Ynumpts-1)/4*3+1);
        // fprintf(crdsfp[0],"%d %d\n", Xnumpts, (Ynumpts-1)/4*3+1);
        // fprintf(crdsfp[1],"%d %d\n", Xnumpts, (Ynumpts-1)/4*3+1);
        // fprintf(fp[0],"%d %d\n", Xnumpts, (Ynumpts-1)/4*3+1);

        // Compute crds
        dx = (gr->U[0]-gr->L[0])/(Xnumpts-1);
        dy = (gr->U[1]-gr->L[1])/(Ynumpts-1);
#if defined(__MPI__)
        Nx = Xnumpts;
        Ny = Ynumpts;
        dx = (gr->U[0]-gr->L[0])/(Xnumpts); // use cell center for simplicity
        dy = (gr->U[1]-gr->L[1])/(Ynumpts);
#else
        Nx = Xnumpts;
        Ny = Ynumpts;
 
        // printf("Nx = %d, Ny = %d\n", Nx, Ny);

#endif // if defined(__MPI__)

        matrix(&denst,Nx,Ny,sizeof(double));
        matrix(&press,Nx,Ny,sizeof(double));
        matrix(&Mach,Nx,Ny,sizeof(double));
        matrix(&B2,Nx,Ny,sizeof(double));
        matrix(&B2_xy,Nx,Ny,sizeof(double));
        matrix(&xcrds,Nx,Ny,sizeof(double));
        matrix(&ycrds,Nx,Ny,sizeof(double));
        matrix(&find_tri,Nx,Ny,sizeof(int));
        ///////
        matrix(&u,Nx,Ny,sizeof(double));
        matrix(&v,Nx,Ny,sizeof(double));
        matrix(&vel_len,Nx,Ny,sizeof(double));

        // if(debugging("field_loop"))
        //     matrix(&A_z,Nx,Ny,sizeof(double));

        for(j = 0; j < Ny; j++)
        {
            for(i = 0; i < Nx; i++)
            {
                xcrds[i][j] = ycrds[i][j] = 0.0;
                find_tri[i][j] = NO;
            }
        }

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf); !at_end_of_tri_list(tri,*surf);
                tri = tri->next)
            {
                diam = fg_length_side(tri)[0];
                break;
            }
        }
        ratio = 5*((int)(diam/min(dx,dy)) + 1);

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf); !at_end_of_tri_list(tri,*surf);
                tri = tri->next)
            {
                // if(tri->BC_type == SUBDOMAIN) continue;

                for(i = 0; i < dim; i++)
                    dent[i] = fg_centroid(tri)[i];

#if defined(__MPI__)
                ic[0] = irint(floor((dent[0]-gr->L[0])/dx));
                ic[1] = irint(floor((dent[1]-gr->L[1])/dy));
#else
                ic[0] = irint(floor((dent[0]-gr->L[0])/dx));
                ic[1] = irint(floor((dent[1]-gr->L[1])/dy));
#endif // if defined(__MPI__)
                ixmin = ic[0] - ratio; ixmax = ic[0] + ratio;
                iymin = ic[1] - ratio; iymax = ic[1] + ratio;

                for(j = iymin; j <= iymax; j++)
                {
                    for(i = ixmin; i <= ixmax; i++)
                    {
                        if(i < 0 || i >= Nx || j < 0 || j >= Ny)
                            continue;
#if defined(__MPI__)
                        crds[0] = gr->L[0] + dx*(i+0.5); // cell center type
                        crds[1] = gr->L[1] + dy*(j+0.5); // cell center type
#else
                        crds[0] = gr->L[0] + dx*i;
                        crds[1] = gr->L[1] + dy*j;
                        if(i == 0)
                            crds[0] += 0.0000001*dx;
                        if(j == 0)
                            crds[1] += 0.0000001*dy;
                        if(i == Xnumpts-1)
                            crds[0] -= 0.0000001*dx;
                        if(j == Ynumpts-1)
                            crds[1] -= 0.0000001*dy;
#endif // if defined(__MPI__)
                        if(pt_in_tri(crds, tri) == YES)
                        {
                            con_u_at_pt(tri->st, crds, fg_centroid(tri), sqrt(fg_area(tri)), con_u);
                            assign(st, tri->st, fr->sizest);
                            // Dens(st) = con_u[0];
                            // Mom(st)[0] = con_u[1];
                            // Mom(st)[1] = con_u[2];
                            // Energy(st) = con_u[3];
                            denst[i][j] = Dens(st);
                            for(k = 0; k < 2; k++)
                                con_u[k+1] /= con_u[0];
                            con_u[4] /= con_u[0];

                            B2[i][j] = 0.5*(sqr(con_u[5]) + sqr(con_u[6]) + sqr(con_u[7]));
                            B2_xy[i][j] = (sqr(con_u[5]) + sqr(con_u[6]));
                            u2 = sqr(con_u[1]) + sqr(con_u[2]) + sqr(con_u[4]);

                            if(debugging("BS_case") || debugging("blast_MHD"))
                            {
                                press[i][j] = (1.4-1.0)*(con_u[3] - 0.5*con_u[0]*(u2) - (B2[i][j]) );
                                //// TMP
                                if(press[i][j] < 0.0) 
                                {
                                    // printf("ERROR: show_state_to_rotor_MHD() tri[%d] has negative pressure %g\n", 
                                    //          tri->id, press[i][j]); 
                                    // verbose_print_state("state", tri->st);
                                    // clean_up(ERROR);
                                    press[i][j] = POLY_thermal_pressure_MHD(tri->st);
                                }
                                if(press[i][j] < 0.0) 
                                {
                                    printf("WARNING: show_state_to_rotor_MHD() tri[%d] has negative pressure %g\n", 
                                           tri->id, press[i][j]); 
                                    // verbose_print_state("state", tri->st);
                                    // clean_up(ERROR);
                                }
               
                                c = sqrt((1.4*press[i][j])/denst[i][j]);
                            }
                            else
                            {
                                press[i][j] = (5.0/3.0-1.0)*(con_u[3] - 0.5*con_u[0]*(u2) - (B2[i][j]) );
                                c = sqrt((5.0/3.0*press[i][j])/denst[i][j]);
                            }
                            Mach[i][j] = sqrt(u2)/c;
                            xcrds[i][j] = crds[0];
                            ycrds[i][j] = crds[1];
                            find_tri[i][j] = YES;

                            ///////
                            u[i][j] =  con_u[1];
                            v[i][j] =  con_u[2];
                            vel_len[i][j] = sqrt(sqr(con_u[1]) + sqr(con_u[2]));
                            // printf("[%d][%d] u2 = %g, c = %g, Mach = %g, \n", i, j, u2, c, Mach[i][j]);
                            // clean_up(0);
                            /***
                            if(debugging("field_loop"))
                            {
                                double rc, R = 0.3, tmp_A_z[2];
                                rc = sqrt(sqr(crds[0]) + sqr(crds[1]));
                                // az_fromBx = con_u[5]*crds[1]; 
                                // az_fromBy = -con_u[6]*crds[0]; 
                                A_z_at_pt(tri->st, tri, crds, tmp_A_z); 
                                if(rc < R)
                                {
                                    // printf("difference in A_z %e, (%12.11g, %12.11g)\n", tmp_A_z[0]-tmp_A_z[1],
                                    //         tmp_A_z[0], tmp_A_z[1]);
                                    if(isnan(tmp_A_z[0]) || fabs(tmp_A_z[0]-tmp_A_z[1]) >1.0e-10)
                                    {
                                        printf("ERROR: field_loop: show_state_to_rotor_MHD()\n");
                                       
                                        printf("tri->%d, potential Az nan or error, %g %g\n", tri->id, 
                                            tmp_A_z[0], tmp_A_z[1]);
                                        clean_up(ERROR);
                                    }
                                }
                                else if( crds[0] < -0.9 || crds[0] > 0.9 || crds[1] < -0.4 || crds[1] > 0.4)
                                {
                                }

                                A_z[i][j] = 0.5*(tmp_A_z[0] + tmp_A_z[1]);
                            }
                            ***/
                        }
                    }
                }
            }
        }

        // fix state at the corner
        for(j = 0; j < Ny; j++)
        {
            for(i = 0; i < Nx; i++)
            {
                if(find_tri[i][j] == NO)
                {
                    // printf("ERROR: show_state_to_rotor_MHD(), (%d %d)\n", i, j);
                    // clean_up(ERROR);
#if defined(__MPI__)
                    xcrds[i][j] = gr->L[0] + dx*(i+0.5);
                    ycrds[i][j] = gr->L[1] + dy*(j+0.5);
#else
                    xcrds[i][j] = gr->L[0] + dx*i;
                    ycrds[i][j] = gr->L[1] + dy*j;
#endif
                    B2_xy[i][j] = B2[i][j] = Mach[i][j] = press[i][j] = denst[i][j] = 0.0;
                    u[i][j] = v[i][j] = vel_len[i][j] = 0.0;

                    // if(debugging("field_loop"))
                    //     A_z[i][j] = 0.0;
                }
            }
        }

#if defined(__MPI__)
        if(first == YES)
        {
            gather_to_io_node(crdsfp[0],Nx,Ny,"x_crds",pp_grid,xcrds);
            gather_to_io_node(crdsfp[1],Nx,Ny,"y_crds",pp_grid,ycrds);
        }

        gather_to_io_node(fp[0],Nx,Ny,"density",pp_grid,denst);
        gather_to_io_node(fp[1],Nx,Ny,"press",pp_grid,press);
        gather_to_io_node(fp[2],Nx,Ny,"Mach",pp_grid,Mach);
        gather_to_io_node(fp[3],Nx,Ny,"B2",pp_grid,B2_xy);
        gather_to_io_node(fp[4],Nx,Ny,"u",pp_grid,u);
        gather_to_io_node(fp[5],Nx,Ny,"v",pp_grid,v);
        gather_to_io_node(fp[6],Nx,Ny,"vel_len",pp_grid,vel_len);

        // if(debugging("field_loop")) gather_to_io_node(fp[7],Nx,Ny,"A_z",pp_grid,A_z);

        if (is_io_node(pp_mynode()))
        {
            for(i = 0; i < 7; i++)
                fclose(fp[i]);
            // if(debugging("field_loop")) fclose(fp[7]);

            if(first == YES)
            {
                fclose(crdsfp[0]);
                fclose(crdsfp[1]);
            }
        }
#else //if defined(__MPI__)
        for(j = 0; j < Ny; j++)
        {
            for(i = 0; i < Nx; i++)
            {
                if(i != (Xnumpts-1))
                {
                    if(first == YES)
                    {
                        fprintf(crdsfp[0],"%g\t", xcrds[i][j]);
                        fprintf(crdsfp[1],"%g\t", ycrds[i][j]);
                    }
                    fprintf(fp[0],"%g\t", denst[i][j]);
                    fprintf(fp[1],"%g\t", press[i][j]);
                    fprintf(fp[2],"%g\t", Mach[i][j]);
                    fprintf(fp[3],"%g\t", B2_xy[i][j]);
                    //////
                    fprintf(fp[4],"%g\t", u[i][j]);
                    fprintf(fp[5],"%g\t", v[i][j]);
                    fprintf(fp[6],"%g\t", vel_len[i][j]);
                    // if(debugging("field_loop")) fprintf(fp[7],"%g\t", A_z[i][j]);
                }
                else
                {
                    if(first == YES)
                    {
                        fprintf(crdsfp[0],"%g", xcrds[i][j]);
                        fprintf(crdsfp[1],"%g", ycrds[i][j]);
                    }
                    fprintf(fp[0],"%g", denst[i][j]);
                    fprintf(fp[1],"%g", press[i][j]);
                    fprintf(fp[2],"%g", Mach[i][j]);
                    fprintf(fp[3],"%g", B2_xy[i][j]);
                    ///////
                    fprintf(fp[4],"%g", u[i][j]);
                    fprintf(fp[5],"%g", v[i][j]);
                    fprintf(fp[6],"%g", vel_len[i][j]);
                    // if(debugging("field_loop")) fprintf(fp[7],"%g", A_z[i][j]);
                }
            }
            if(first == YES)
            {
                fprintf(crdsfp[0],"\n");
                fprintf(crdsfp[1],"\n");
            }
            fprintf(fp[0],"\n");
            fprintf(fp[1],"\n");
            fprintf(fp[2],"\n");
            fprintf(fp[3],"\n");
            fprintf(fp[4],"\n");
            fprintf(fp[5],"\n");
            fprintf(fp[6],"\n");
            // if(debugging("field_loop")) fprintf(fp[7],"\n");
        }

        for(i = 0; i < 7; i++)
            fclose(fp[i]);
        // fclose(fp[1]); fclose(fp[2]); fclose(fp[3]);
        // fclose(fp[4]); fclose(fp[5]); fclose(fp[6]);
        // if(debugging("field_loop")) fclose(fp[7]);

        if(first == YES)
        {
            fclose(crdsfp[0]);
            fclose(crdsfp[1]);
        }
#endif  // if defined(__MPI__)

        free(denst);
        free(press);
        free(Mach);
        free(B2);
        free(B2_xy);
        //// 
        free(u);
        free(v);
        free(vel_len);
        ////
        free(xcrds);
        free(ycrds);
        free(find_tri);

        // if(debugging("field_loop")) free(A_z);

        first = NO;

        /// TMP
        // if(debugging("field_loop") && Az_is_bad == YES)
        // {
            // printf("ERROR: show_state_to_rotor_MHD(), bad AZ\n");
            // clean_up(ERROR);
        // }

}


LOCAL void gather_to_io_node(
        FILE         *fp,
        int          Nx,
        int          Ny,
        char         *type_str,
        PP_GRID      *pp_grid,
        double       **dat)
{
        int      glob_Nx = Nx, glob_Ny = Ny, i, j, k, ic[3];
        int      *Nx_array, *Ny_array, N_nodes, node_ID, tmpNx = Nx, tmpNy = Ny;
        byte     *glob_dat[200][200];
        byte     *storage = NULL, *buf, *ps;
        size_t   len;

        N_nodes = pp_numnodes();
        vector(&Nx_array, N_nodes, sizeof(int));
        vector(&Ny_array, N_nodes, sizeof(int));

        pp_all_gather((POINTER)&tmpNx, sizeof(int),(POINTER)Nx_array,sizeof(int));
        pp_all_gather((POINTER)&tmpNy, sizeof(int),(POINTER)Ny_array,sizeof(int));

        // MPI_Allgather(&tmpNx, 1,MPI_INT, Nx_array, 1, MPI_INT, MPI_COMM_WORLD);
        //         // MPI_Allgather(&tmpNy, 1,MPI_INT, Ny_array, 1, MPI_INT, MPI_COMM_WORLD);
        //
        //                 // MPI_Gather(&tmpNx, 1,MPI_INT, Nx_array, 1, MPI_INT, 0, MPI_COMM_WORLD);
        //                         // MPI_Gather(&tmpNy, 1,MPI_INT, Ny_array, 1, MPI_INT, 0, MPI_COMM_WORLD);
        pp_global_isum(&glob_Nx,1L);
        pp_global_isum(&glob_Ny,1L);

        scalar(&storage,sizeof(double)*Nx*Ny);
        buf = storage;
        for(j = 0; j < Ny; j++)
        {
            for(i = 0; i < Nx; i++)
            {
                assign(((Locstate)buf), &dat[i][j], sizeof(double));
                buf += sizeof(double);
            }
        }

        BLOCK_SIZE = GetHypPPBlockSize();

        if (is_io_node(pp_mynode()))
        {
            for(j =0 ; j < pp_grid->gmax[1]; j++)
            {
                for(i =0 ; i < pp_grid->gmax[0]; i++)
                {
                    node_ID = pp_grid->gmax[0]*j + i;
                    scalar(&glob_dat[i][j],sizeof(double)*Nx_array[node_ID]*Ny_array[node_ID]);
                }
            }

            buf = glob_dat[0][0];
            for(j = 0; j < Ny; j++)
            {
                for(i = 0; i < Nx; i++)
                {
                    assign(((Locstate)buf), &dat[i][j], sizeof(double));
                    buf += sizeof(double);
                }
            }

            for(j = IO_NODE_ID+1; j < N_nodes; j++)
            {
                find_Cartesian_coordinates(j,pp_grid,ic);
                len = sizeof(double)*Nx_array[j]*Ny_array[j];

                for (ps = glob_dat[ic[0]][ic[1]], i = 0; len >= BLOCK_SIZE;
                                len -= BLOCK_SIZE, ps += BLOCK_SIZE,++i)
                {
                    pp_recv(state_id(i),j,(POINTER)ps,BLOCK_SIZE);
                }
                if (len != 0)
                    pp_recv(state_id(i),j,(POINTER)ps,len);
            }
        }
        else /// END: if (is_io_node(pp_mynode()))
        {
            len = sizeof(double)*Nx*Ny;
            for (ps = storage, i = 0; len >= BLOCK_SIZE;
                                len -= BLOCK_SIZE, ps += BLOCK_SIZE, ++i)
            {
                pp_send(state_id(i),(POINTER)ps,BLOCK_SIZE,IO_NODE_ID);
            }
            if (len != 0)
                pp_send(state_id(i),(POINTER)ps,len,IO_NODE_ID);
        } /// END::: if (is_io_node(pp_mynode())){} else{};

        /// print sub-matrix data to file
        if (is_io_node(pp_mynode()))
        {
            for(j =0 ; j < pp_grid->gmax[1]; j++)
            {
                for(ic[1] = 0; ic[1] < Ny_array[pp_grid->gmax[0]*j]; ic[1]++)
                {
                    for(i =0 ; i < pp_grid->gmax[0]; i++)
                    {
                        node_ID = pp_grid->gmax[0]*j + i;

                        buf = glob_dat[i][j];
                        buf += sizeof(double)*ic[1]*Nx_array[node_ID]; // move the pointer to
                                                                       // the beginning of the data to be printed.
                        for(ic[0] = 0; ic[0] < Nx_array[node_ID]; ic[0]++)
                        {
                            fprintf(fp,"%g\t", *((double*)buf));
                            buf += sizeof(double);
                        }
                    }
                    fprintf(fp,"\n");
                }
            } //// END::: for(j =0 ; j < pp_grid->gmax[1]; j++)
        }
        free(storage);
        if (is_io_node(pp_mynode()))
        {
            for(j =0 ; j < pp_grid->gmax[1]; j++)
            {
                for(i =0 ; i < pp_grid->gmax[0]; i++)
                    free(glob_dat[i][j]);
            }
        }
        free(Nx_array);
        free(Ny_array);

        pp_gsync();
}

EXPORT void      show_state_to_rect(
        const char   *dname,
        int          step,
	Front        *fr)
{
        SURFACE      **surf;
        TRI          *tri;
        RECT_GRID    *gr = fr->rect_grid;
        float        *L = gr->L, *U = gr->U, con_u[10]; 
        double  *cent;
        int          k, l, j, i, dim = 2;
        FILE         *fp[7], *crdsfp[MAXD];
        static char  *fname = NULL, *ppfname = NULL;
        static size_t fname_len = 0, ppfname_len = 0;
        char         outname[7][256],outdir[256], crdsname[MAXD][256];
        const char   *nstep;
        static Locstate st = NULL;
        float      dx, dy, crds[MAXD], dent[MAXD];  
        float      **denst, diam, **xcrds, **ycrds, **press;
        int        Xnumpts = 201, Ynumpts = 801, Nx, Ny, ic[MAXD], ixmin, ixmax, iymin, iymax, ratio;
        static int first = YES;
        int        myid, ICOORDS[MAXD];

        if(debugging("Mach_step"))
        {
            show_state_to_rect_Mach_step(dname, step, fr);
            return;
        }
        if(debugging("rotor") || debugging("blast_MHD") || 
           debugging("Orszag_T") || debugging("field_loop"))
        {
            show_state_to_rotor_MHD(dname, step, fr);

            if(debugging("field_loop"))
            {
                show_Az_state_MHD(dname, step, fr);
                // printf("WARNING: exit after show_Az_state_MHD, in show_state_to_rect\n");
                // clean_up(0);
            }
            return;
        }

        if(debugging("shock_vort"))
        {
            Xnumpts = 201; Ynumpts = 401;
        } 
        else if(debugging("twod_riemann"))
        {
            Xnumpts = 501; Ynumpts = 501;
        }
        else if(debugging("db_Mach"))
        {
	    int Nnodes;
#if defined(__MPI__)
            /**
	      wired for 2 by 2 partition
            **/
            // Xnumpts = 251; Ynumpts = 1001; 

	    // after 062608, use this resolution for 2 by 8 partition
	    // the resolution is 800 by  3200, subdomain: 400 
	    Nnodes = pp_numnodes();
            Xnumpts = 401; Ynumpts = 3200/(Nnodes/2) + 1;
            // Xnumpts = 26; Ynumpts = 101; // debug 
#else
            Xnumpts = 501; Ynumpts = 2001;
#endif // if defined(_MPI_)
        }
        else
            return;

        if(st == NULL)
            g_alloc_state(&st, fr->sizest);

        sprintf(outdir,"%s/%s",dname,"matlab_data");

        ppfname = set_ppfname(ppfname,"dens",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname[0],"%s.ts%s",ppfname,nstep);
        ppfname = set_ppfname(ppfname,"mom0",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname[1],"%s.ts%s",ppfname,nstep);
        ppfname = set_ppfname(ppfname,"mom1",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname[2],"%s.ts%s",ppfname,nstep);
        ppfname = set_ppfname(ppfname,"energy",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname[3],"%s.ts%s",ppfname,nstep);

        ppfname = set_ppfname(ppfname,"press",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname[4],"%s.ts%s",ppfname,nstep);

        ppfname = set_ppfname(ppfname,"u",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname[5],"%s.ts%s",ppfname,nstep);
        ppfname = set_ppfname(ppfname,"v",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(outname[6],"%s.ts%s",ppfname,nstep);


        ppfname = set_ppfname(ppfname,"X_crds",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(crdsname[0],"%s.ts%s",ppfname,nstep);

        ppfname = set_ppfname(ppfname,"Y_crds",&ppfname_len);
        nstep = right_flush(step,7);
        sprintf(crdsname[1],"%s.ts%s",ppfname,nstep);

        if (create_directory(dname,YES) == FUNCTION_FAILED)
        {
            (void) printf("WARNING in show_state_to_rect(), directory "
                          "%s doesn't exist and can't be created\n",dname);
            return;
        }
        if (create_directory(outdir,YES) == FUNCTION_FAILED)
        {
            (void) printf("WARNING in show_state_to_rect(), directory "
                         "%s doesn't exist and can't be created\n",outdir);
            return;
        }

        fname = get_list_file_name(fname,outdir,outname[0],&fname_len);
        if ((fp[0] = fopen(fname,"w")) == NULL)
        {
            (void) printf("WARNING in show_state_to_rect(), "
                           "can't open %s\n",fname);
            return;
        }

        if(debugging("db_Mach") || debugging("shock_vort"))
        {
            // pressure
            fname = get_list_file_name(fname,outdir,outname[4],&fname_len);
            if ((fp[4] = fopen(fname,"w")) == NULL)
            {
                (void) printf("WARNING in show_state_to_rect(), "
                           "can't open %s\n",fname);
                return;
            }
        }

        if(first == YES)
        {
            fname = get_list_file_name(fname,outdir,crdsname[0],&fname_len);
            if ((crdsfp[0] = fopen(fname,"w")) == NULL)
            {
                (void) printf("WARNING in show_state_to_rect(), "
                           "can't open %s\n",fname);
                return;
            }
            fname = get_list_file_name(fname,outdir,crdsname[1],&fname_len);
            if ((crdsfp[1] = fopen(fname,"w")) == NULL)
            {
                (void) printf("WARNING in show_state_to_rect(), "
                           "can't open %s\n",fname);
                return;
            }
        }

        // db_Mach, show [0,1]*[0,3] domain
        // printf("db_Mach, show: output data size %d %d\n", Xnumpts, (Ynumpts-1)/4*3+1);
        // fprintf(crdsfp[0],"%d %d\n", Xnumpts, (Ynumpts-1)/4*3+1);
        // fprintf(crdsfp[1],"%d %d\n", Xnumpts, (Ynumpts-1)/4*3+1);
        // fprintf(fp[0],"%d %d\n", Xnumpts, (Ynumpts-1)/4*3+1);

        // Compute crds
        dx = (gr->U[0]-gr->L[0])/(Xnumpts-1);
        dy = (gr->U[1]-gr->L[1])/(Ynumpts-1);
#if defined(__MPI__)
        Nx = Xnumpts;
        Ny = Ynumpts;
#else
        if(debugging("shock_vort") || debugging("twod_riemann"))
        {
            Nx = Xnumpts;
            Ny = Ynumpts;
        }
        else
        {
            Ny = (Ynumpts-1)/4*3+1;
            Nx =  Xnumpts;
        }
#endif // if defined(__MPI__)

        matrix(&denst,Nx,Ny,sizeof(float));
        matrix(&xcrds,Nx,Ny,sizeof(float));
        matrix(&ycrds,Nx,Ny,sizeof(float));
        if(debugging("db_Mach") || debugging("shock_vort"))
            matrix(&press,Nx,Ny,sizeof(float));

        for(j = 0; j < Ny; j++)
        {
            for(i = 0; i < Nx; i++)
                xcrds[i][j] = ycrds[i][j] = -0.00001; 
        }

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf); !at_end_of_tri_list(tri,*surf);
                tri = tri->next)
            {
                diam = fg_length_side(tri)[0];
                break;
            }
        }
        ratio = 5*((int)(diam/min(dx,dy)) + 1);

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf); !at_end_of_tri_list(tri,*surf);
                tri = tri->next)
            {
                for(i = 0; i < dim; i++)
                    dent[i] = fg_centroid(tri)[i]; 
                ic[0] = irint(floor((dent[0]-gr->L[0])/dx));
                ic[1] = irint(floor((dent[1]-gr->L[1])/dy));
                ixmin = ic[0] - ratio; ixmax = ic[0] + ratio;
                iymin = ic[1] - ratio; iymax = ic[1] + ratio;

                for(j = iymin; j <= iymax; j++)
                {
                    for(i = ixmin; i <= ixmax; i++)
                    {
                        if(i < 0 || i >= Nx || j < 0 || j >= Ny)
                            continue; 
                        crds[0] = gr->L[0] + dx*i;
                        crds[1] = gr->L[1] + dy*j;
                        if(i == 0)
                            crds[0] += 0.00001*dx;
                        if(j == 0)
                            crds[1] += 0.00001*dy;
                        if(i == Xnumpts-1)
                            crds[0] -= 0.00001*dx;
                        if(j == Ynumpts-1)
                            crds[1] -= 0.00001*dy;
                        if(pt_in_tri(crds, tri) == YES)
                        {
                            con_u_at_pt(tri->st, crds, fg_centroid(tri), sqrt(fg_area(tri)), con_u);
                            assign(st, tri->st, fr->sizest);
                            Dens(st) = con_u[0];
                            Mom(st)[0] = con_u[1];
                            Mom(st)[1] = con_u[2];
                            Energy(st) = con_u[3];
                            denst[i][j] = Dens(st);
                            if(debugging("db_Mach") || debugging("shock_vort"))
                                press[i][j] = pressure(st);
                            xcrds[i][j] = crds[0];
                            ycrds[i][j] = crds[1];
                        }
                    }
                }
            }
        }

        for(j = 0; j < Ny; j++)
        {
            for(i = 0; i < Nx; i++)
            {
                if(i != (Xnumpts-1))
                {
                    if(first == YES)
                    {
                        fprintf(crdsfp[0],"%g\t", xcrds[i][j]);
                        fprintf(crdsfp[1],"%g\t", ycrds[i][j]);
                    }
                    fprintf(fp[0],"%g\t", denst[i][j]);
                    if(debugging("db_Mach") || debugging("shock_vort"))
                        fprintf(fp[4],"%g\t", press[i][j]);
                }
                else
                {
                    if(first == YES)
                    {
                        fprintf(crdsfp[0],"%g", xcrds[i][j]);
                        fprintf(crdsfp[1],"%g", ycrds[i][j]);
                    }
                    fprintf(fp[0],"%g", denst[i][j]);
                    if(debugging("db_Mach") || debugging("shock_vort"))
                        fprintf(fp[4],"%g", press[i][j]);
                }
            }
            if(first == YES)
            {
                fprintf(crdsfp[0],"\n");
                fprintf(crdsfp[1],"\n");
            }
            fprintf(fp[0],"\n");
            if(debugging("db_Mach") || debugging("shock_vort"))
                fprintf(fp[4],"\n");
        }

        fclose(fp[0]);
        if(debugging("db_Mach") || debugging("shock_vort"))
            fclose(fp[4]);
        // fclose(fp[1]); fclose(fp[2]); fclose(fp[3]); 
        // fclose(fp[4]); fclose(fp[5]); fclose(fp[6]);
        if(first == YES)
        {
            fclose(crdsfp[0]);
            fclose(crdsfp[1]);
        }

        free(denst);
        free(xcrds);
        free(ycrds);
        if(debugging("db_Mach") || debugging("shock_vort"))
            free(press);

        first = NO;  
}

EXPORT void g_init_FV_MHD_reconstruction(
        Wave       *wv,
        Front      *fr)
{
        RECT_GRID *gr = fr->rect_grid;
        TRI       *tri, *crsp_tri, *nbtri[3], *sten_tris[50], *tmptri, *new_cand[50];
        SURFACE   **surf;
        int       dim = 2, i, j, k, indx, side, total_tri = 0;
        double    *cent, B0[3], tmp;
        size_t    sizest = fr->sizest;
        int       nn_num, repetition, N_use = 0, in_list, new_nn_num = 0, num, sten_tri_num;
        int       debug = NO, vt_adj;
        POINT     *pt[3];
        Locstate  st;
        static Mid_soln   *midsoln = NULL;
        POLYGON           *polyg;

        if(debugging("field_loop"))
        {
            i = 0;
            for(surf = fr->mesh->surfaces; surf && *surf; surf++)
            {
                for (tri = first_tri(*surf);
                     !at_end_of_tri_list(tri,*surf); tri = tri->next)
                {
                    tri->id = i;
                    i++;
                }
            }

            B_tri_mesh_2nd_reconstruction(fr,NULL,RK_STEP,NULL);
#if defined(__MPI__)
            parallel_update_tri_edge_B_buffer(fr, NULL, RK_STEP);
            update_db_Mach_buffer(fr,NULL,RK_STEP,NULL);
#else
            update_edge_B_buffer(fr,NULL,RK_STEP);
            update_buffer(fr,NULL,RK_STEP,NULL);
#endif // if defined(__MPI__)

            check_b_match_on_tri_edges(fr,NULL,RK_STEP);

            i = 0;
            for(surf = fr->mesh->surfaces; surf && *surf;  surf++)
            {
                for (polyg = first_polyg(*surf); !at_end_of_polyg_list(polyg,*surf);
                     polyg = polyg->next)
                {
                    polyg->id = i;
                    i++;
                }
            }

            B_dual_mesh_edge_b_reconstruction_using_tri_B(fr,NULL,NULL,RK_STEP);//from Bn to Bx, By

#if defined(__MPI__)
            parallel_update_dual_cell_B_buffer(fr, NULL, RK_STEP);
#endif // if defined(__MPI__)

            check_b_match_on_dual_edges(fr, NULL, RK_STEP);

            Construct_Mag_field_on_both_grids(fr,NULL,NULL, RK_STEP, i, 0.0, NO);

#if defined(__MPI__)
            update_db_Mach_buffer(fr,NULL,RK_STEP,NULL);
#else
            update_buffer(fr,NULL,RK_STEP,NULL);
#endif // if defined(__MPI__)

            // check_b_match_on_tri_edges(fr,NULL,RK_STEP);
            return;
        }
        else
        {
            return Init_Construct_Mag_field_on_dual_cell(wv,fr);
        }

        if(mass_1st_row == NULL)
        {
            matrix(&mass_1st_row, 1, MAX_N_COEF,sizeof(double));
            for(i = 0; i < 20; i++)
                matrix(&mass_1st_rows[i], 1, MAX_N_COEF,sizeof(double));
        }

        /****
        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf);
                 !at_end_of_tri_list(tri,*surf); tri = tri->next)
            {
                cent = fg_centroid(tri);
                if(tri->BC_type == SUBDOMAIN)
                    continue;

                for(i = 0; i < 3; i++)
                {
                    nbtri[i] = Tri_on_side(tri,i);
                    pt[i] = Point_of_tri(tri)[i];
                }

                if(MAX_N_COEF == 1)
                {
                    tri_B_P0_polynomial_reconstruction(tri, NULL,RK_STEP, B0);
                    continue;
                }

                if(MAX_N_COEF == 3)
                {
                    // tri_B_P1_polynomial_reconstruction(tri, NULL, NULL, RK_STEP);
                    // tri_B_P0_polynomial_reconstruction(tri, midsoln,rk_step, B0);
                    // tri_B_P1_polynomial_reconstruction_ver1(tri, NULL, NULL, RK_STEP);//OLD
                    printf("IN g_init_FV_MHD_reconstruction():: before tri_B_P1_polynomial_reconstruction_ver2()\n");
                    tri_B_P1_polynomial_reconstruction_ver2(tri, NULL, NULL, RK_STEP);//for DG Huijing
                    // tri_comput_P1_polynomials_from_avg_MHD(tri, nbtri, 3, midsoln,limit_store,rk_step);
                    // tri_comput_P1_polynomials_from_avg(tri, nbtri, 3, midsoln,limit_store,rk_step);
                    continue;
                }

                if(MAX_N_COEF == 6)
                {
                    // tri_B_P2_polynomial_reconstruction_consv(tri, NULL,NULL, RK_STEP);
                    // tri_B_P2_polynomial_reconstruction_9edge_1(tri, NULL,NULL, RK_STEP);//OLD
                    tri_B_P2_polynomial_reconstruction_new(tri, NULL,NULL,RK_STEP);//for DG Huijing//NEW
                    continue;
                }
            }
        }
        ****/
        B_tri_mesh_2nd_reconstruction(fr,NULL,RK_STEP,NULL);//from Bn to Bx, By

        //// initialization does not include 0.5|B| in total energy
        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf);
                 !at_end_of_tri_list(tri,*surf); tri = tri->next)
            {
                tri->id = total_tri;
                total_tri++;
                /***
                // cent = fg_centroid(tri);
                if(tri->BC_type == SUBDOMAIN)
                    continue;

                st = tri->st;
                tmp = 0.0;
                for(i = 0; i < 3; i++)
                    tmp +=  sqr(Mag(st)[i]);
                Energy(st) += 0.5*tmp;
                ***/
            }
        }

        if(debugging("mag_acc_vert") || debugging("rotor") || 
           debugging("decay_alfven") || debugging("Orszag_T"))
        {
#if defined(__MPI__)
            update_db_Mach_buffer(fr,NULL,RK_STEP,NULL);
#else
            update_buffer(fr,NULL,RK_STEP,NULL);
#endif // if defined(__MPI__)
        }

        /****
        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf);
                 !at_end_of_tri_list(tri,*surf); tri = tri->next)
            {
                cent = fg_centroid(tri);
                if(tri->BC_type == SUBDOMAIN)
                    continue;

                for(i = 0; i < 3; i++)
                {
                    nbtri[i] = Tri_on_side(tri,i);
                    pt[i] = Point_of_tri(tri)[i];
                }

                if(MAX_N_COEF == 1)
                {
                    FV_P0(tri,nbtri,NULL,NULL,RK_STEP,fr);
                    continue;
                }

                if(MAX_N_COEF == 3)
                {
                    // init_tri_comput_P1_polynomials_from_avg_MHD(tri,nbtri,3);
                    WENO_FV_P1(tri,nbtri,NULL,NULL,RK_STEP,fr);
                    // all_neighboring_cells(tri,sten_tris,&sten_tri_num);
                    // set_HR_sten(nbtri, tri,sten_tris,&sten_tri_num);
                    // limiting_P1_MHD(tri,sten_tris,sten_tri_num,NULL,RK_STEP,fr);
                    continue;
                }
                if(MAX_N_COEF == 6)
                {
                    WENO_FV_P2(tri,nbtri,NULL,NULL,RK_STEP,fr);
                    
                    // get_sten_neighbr_B_field(nbtri, tri, sten_tris, &nn_num);
                    // if(nn_num != 6)
                    // {
                    //     printf("ERROR: g_init_FV_MHD_reconstruction(), area field not enough stencil\n");
                    //     clean_up(ERROR);
                    // }
                    // tri_comput_P2_polynomials_from_avg_MHD(tri, sten_tris, nn_num, NULL, NULL,RK_STEP, NULL);
                    // printf("ERROR: g_init_FV_MHD_reconstruction(), construct area polynomial P2\n");
                    // clean_up(ERROR);
                    continue;
                }
            }
        }
        ****/

        if(midsoln == NULL)
        {
            // printf("sizeof(gas) = %d, fr->sizest = %d\n", sizeof(Gas), fr->sizest);
            // clean_up(0);

            vector(&midsoln,total_tri,sizeof(Mid_soln));
            for(i = 0; i < total_tri; i++)
            {
                vector(&(midsoln[i].st), RK_STEP, sizeof(Locstate)); // The 0 level is used to save cell avg
                                                                     // and reconstructed states temporally.
                vector(&(midsoln[i].worksp_st_store),RK_STEP,fr->sizest);
                for(j = 0; j < RK_STEP; j++)
                    midsoln[i].st[j] = midsoln[i].worksp_st_store + j*fr->sizest;
                /// to save integral of bases functions for constrained DG
                /// constrain_mass_1st_rows[i].mass_1st_rows[0] = NULL;
            }
        }

        printf("Before Subcell_limiting_soln_with_buffer_tris_multiple_times_local_HR_MHD()\n");
        Subcell_limiting_soln_with_buffer_tris_multiple_times_local_HR_MHD(fr,midsoln,NULL,
                                             RK_STEP,HR_times,NULL,total_tri);///????

        if(debugging("rotor"))
        {
#if defined(__MPI__)
            update_db_Mach_buffer(fr,NULL,RK_STEP,NULL);
#else // if defined(__MPI__)
            update_buffer(fr,NULL,RK_STEP,NULL);
#endif // if defined(__MPI__)
        }

        printf("Beofre B_edge_reconstruction\n");

        B_edge_reconstruction(fr,NULL,RK_STEP,NULL);

        printf("Beofre  B_cell_2nd_reconstruction\n");
        B_tri_mesh_2nd_reconstruction(fr,NULL,RK_STEP,NULL);//from Bn to Bx, By

#if defined(__MPI__)
        printf("ERROR: g_init_FV_MHD_reconstruction(), "
          "implementing communication for B on edges of triangles in the buffer zone\n");
        clean_up(ERROR);
#else
        update_buffer(fr,NULL,RK_STEP,NULL);
        // if(debugging("rotor"))
        //     update_edge_B_buffer(fr,midsoln,RK_STEP);
#endif // if defined(__MPI__)
 

        if(debugging("mag_acc_vert"))
        {
            //// accurate test for magnetic vortex evolution
            accurate_mag_vort_L1(fr);
        }

        printf("EXIT in g_init_FV_MHD_reconstruction(), fr->time = %g\n", fr->time);
        clean_up(0);
}


/* 
 * Currently use L-F flux. 3rd-order RK time-discretization 
 */
EXPORT void gFV_tri_vec(
        int        *iperm,
        float      *dh,
        float      dt,
        Wave       *wv,
        Wave       *nwv,
        Front      *fr,
        Front      *nfr,
        COMPONENT  max_comp)
{
        SURFACE           **surf = nfr->mesh->surfaces;
        SURFACE           **osurf = fr->mesh->surfaces;
        TRI               *otri, *tri;
        POLYGON           *opolyg, *polyg;
        int               dim = 2, i, j, k;
        double            max_speed = 0.0, velx, vely, vel;
        double            *cent, *ocent;
        static int        first = YES;
        static int        total_tri = 0, total_polyg = 0;
        static Mid_soln   *midsoln = NULL;
        static Limiting_store **limit_store = NULL; //0: limited soln, 1,2: computed soln
        static Dual_cell_Mid_soln   *dual_cell_midsoln = NULL;
        // size_t         sizest = fr->sizest;
        int               cv_indx, indx;
        int               rk_iter, detect_extr, check_quadrature = YES;
        int               use_limiter = NO, USE_HR = YES; // YES, YES for shock wave simu.
        double            tmp_alpha, press, Gam;
        char              s[256];

        if(debugging("db_Mach"))
        {
            // compute exact shock speed and position at x = 0.0 in
            // the y direction
            float spd, y_incr;
            spd = (post_sk_st[0]*post_sk_st[1]-0.0)/(post_sk_st[0]-pre_sk_st[0]);   
            y_incr = spd*fr->time/sin(radians(60.0));
            sk_y_pos = 1.0/6.0 -sqrt(3)/3.0*(0.0-1.0) + y_incr;
        }

        if(mass_1st_row == NULL)
        {
            matrix(&mass_1st_row, 1, MAX_N_COEF,sizeof(double));
            for(i = 0; i < 20; i++)
                matrix(&mass_1st_rows[i], 1, MAX_N_COEF,sizeof(double));
        }

        if(first == YES)
        {
#if defined(__MPI__)
            BLOCK_SIZE = GetHypPPBlockSize();
#endif // if defined(__MPI__)       

            dmach = 0.5*DBL_EPSILON;
            first = NO;

            // Gauss quadrature points and weights  
            if(Gauss_N == 2)
            {
                q[0] = -1.0/sqrt(3.0); q[1] = 1.0/sqrt(3.0);
                qw[0] = 1.0; qw[1] = 1.0;
            }
            else if(Gauss_N == 3)
            {
                // 3-point 
                q[0] = -sqrt(0.6); q[1] = 0.0; q[2] = sqrt(0.6);
                qw[0] = 5.0/9.0; qw[1] = 8.0/9.0; qw[2] = 5.0/9.0;
            }
            else if(Gauss_N == 4)
            {
                q[0] = -0.86113631159405257522; q[1] = -0.33998104358485626480;
                q[2] =  0.33998104358485626480; q[3] = 0.86113631159405257522;
                qw[0] =  0.34785484513745385737; qw[1] = 0.65214515486254614263;
                qw[2] =  0.65214515486254614263; qw[3] = 0.34785484513745385737;
                /****
                /// Try to use Gauss-Lobatto quadrature to include end points of edge
                /// This might be benificial to E_z
                /// Gauss-Lobatto: algebriac accuracy 2n-3
                /// Gauss: algebriac accuracy 2n-1
                q[0] = -1.0; q[1] = -sqrt(5.0)/5.0;
                q[2] =  sqrt(5.0)/5.0; q[3] = 1.0;
                qw[0] =  1.0/6.0; qw[1] = 5.0/6.0;
                qw[2] =  5.0/6.0; qw[3] = 1.0/6.0;
                ****/
            }
            else if(Gauss_N == 5)
            {
                q[0] = -0.90618; q[1] = -0.538469;
                q[2] = 0.0; q[3] = 0.538469; q[4] = 0.90618;
                qw[0] = 0.236927; qw[1] = 0.478629;
                qw[2] = 0.568889; qw[3] = 0.478629; qw[4] = 0.236927;
                //// Gauss-Lobatto quadrature
                /***
                q[0] = -1.0; q[1] = -sqrt(21.0)/7.0;
                q[2] = 0.0;  q[3] =  sqrt(21.0)/7.0; q[4] = 1.0;
                qw[0] =  0.1; qw[1] = 49.0/90.0; qw[2] = 32.0/45.0;
                qw[3] =  49.0/90.0; qw[4] = 0.1;
                ***/
            }
            else if(Gauss_N == 8)
            {
                qw[0] = 0.1012285; qw[1] = 0.2223810; qw[2] = 0.3137066;
                qw[3] = 0.3626838; qw[4] = 0.3626838; qw[5] = 0.3137066;
                qw[6] = 0.2223810; qw[7] = 0.1012285;
                q[0] = -0.9602899; q[1] = -0.7966665; q[2] = -0.5255324;
                q[3] = -0.1834346; q[4] = 0.1834346; q[5] = 0.5255324;
                q[6] = 0.7966665; q[7] = 0.9602899;
            }
            else
            {
                // To test mid point
                q[0] = 0.0;
                qw[0] = 1.0;
            }

/// To get the Lobatto Quadrature's Abscissas and Weight, ref:
/// Abscissas and Weight Coefficients for Lobatto Quadrature.
///  H.H.Michels. 1963, Mathematics of Computation, Vol. 17, No. 83 (Jul., 1963), pp. 237-244.
            if(Lobatto_N == 2)
            {
                Lo_q[0] = -1.0; Lo_q[1] = 1.0;
                Lo_qw[0] =  1.0; Lo_qw[1] = 1.0;
            }
            else if(Lobatto_N == 3)
            {
                Lo_q[0] = -1.0; Lo_q[1] = 0.0;
                Lo_q[2] =  1.0;
                Lo_qw[0] =  1.0/3.0; Lo_qw[1] = 4.0/3.0;
                Lo_qw[2] =  1.0/3.0;
            }
            else if(Lobatto_N == 4)
            {
                Lo_q[0] = -1.0; Lo_q[1] = -sqrt(5.0)/5.0;
                Lo_q[2] =  sqrt(5.0)/5.0; Lo_q[3] = 1.0;
                Lo_qw[0] =  1.0/6.0; Lo_qw[1] = 5.0/6.0;
                Lo_qw[2] =  5.0/6.0; Lo_qw[3] = 1.0/6.0;
            }
            else if(Lobatto_N == 5)
            {
                Lo_q[0] = -1.0; Lo_q[1] = -sqrt(21.0)/7.0;
                Lo_q[2] = 0.0;  Lo_q[3] =  sqrt(21.0)/7.0; Lo_q[4] = 1.0;
                Lo_qw[0] =  0.1; Lo_qw[1] = 49.0/90.0; Lo_qw[2] = 32.0/45.0;
                Lo_qw[3] =  49.0/90.0; Lo_qw[4] = 0.1;
            }
            else if(Lobatto_N == 6)
            {
                Lo_q[0] = -1.0; Lo_q[1] = -sqrt(1.0/21.0*(7.0+2.0*sqrt(7.0)));
                Lo_q[2] = -sqrt(1.0/21.0*(7.0-2.0*sqrt(7.0)));  Lo_q[3] =  sqrt(1.0/21.0*(7.0-2.0*sqrt(7.0)));
                Lo_q[4] =  sqrt(1.0/21.0*(7.0+2.0*sqrt(7.0)));     Lo_q[5] = 1.0;
                Lo_qw[0] =  1.0/15.0; Lo_qw[1] = 1.0/30.0*(14.0-sqrt(7.0));
                Lo_qw[2] = 1.0/30.0*(14.0+sqrt(7.0)); Lo_qw[3] =  1.0/30.0*(14.0+sqrt(7.0));
                Lo_qw[4] = 1.0/30.0*(14.0-sqrt(7.0));       Lo_qw[5] = 1.0/15.0;
            }
            else
            {
                printf("ERROR: gFV_tri_vec()\n");
                printf("Initialize needed quadrature\n");
                clean_up(ERROR);
            }

            i = 0;
            for(; osurf && *osurf;  osurf++, surf++)
            {
                for (otri = first_tri(*osurf), tri = first_tri(*surf);
                    !at_end_of_tri_list(otri,*osurf);
                     otri = otri->next, tri = tri->next)
                {
                    otri->id = i;
                    tri->id = i;
                    i++;
                }
            }
            total_tri = i;

            surf = nfr->mesh->surfaces;
            osurf = fr->mesh->surfaces;
            i = 0; 
            for(; osurf && *osurf;  osurf++, surf++)
            {
                for (opolyg = first_polyg(*osurf), polyg = first_polyg(*surf);
                    !at_end_of_polyg_list(opolyg,*osurf);
                     opolyg = opolyg->next, polyg = polyg->next)
                {
                    opolyg->id = i;
                    polyg->id = i;
                    i++;
                }
            }
            total_polyg = i;
        } /// END:::: if(first == YES)

        if(midsoln == NULL)
        {
            // printf("sizeof(gas) = %d, fr->sizest = %d\n", sizeof(Gas), fr->sizest);
            vector(&dual_cell_midsoln,total_polyg,sizeof(Dual_cell_Mid_soln));
            vector(&midsoln,total_tri,sizeof(Mid_soln));

            ////// save HR stencil
            vector(&alltri_HR_sten,total_tri,sizeof(Tri_HR_sten));
            for(i = 0; i < total_tri; i++)
            {
                alltri_HR_sten[i].HR_sten_set_3rd = alltri_HR_sten[i].HR_sten_set_2nd =
                alltri_HR_sten[i].HR_sten_set_1st = alltri_HR_sten[i].Bsten_set = NO; 
                alltri_HR_sten[i].WENO_cent_set = alltri_HR_sten[i].WENO_side_set[0] =
                alltri_HR_sten[i].WENO_side_set[1] = alltri_HR_sten[i].WENO_side_set[2] =
                alltri_HR_sten[i].WENO_rev_set[0] = alltri_HR_sten[i].WENO_rev_set[1] = 
                alltri_HR_sten[i].WENO_rev_set[2] = NO;
                ////////// for 2nd time reconstructing zone centered B field from edge. Now 
                ////////// edge based polynomial should have been limited. 
                alltri_HR_sten[i].B_2nd_recons_sten_set = NO;
                alltri_HR_sten[i].P2_B_2nd_recons_sten_set = NO;
            }

            // Do not use limit_store anymore
            // vector(&limit_store,RK_STEP, sizeof(Limiting_store*));
            // for(i = 0; i < RK_STEP; i++)
            //     vector(&limit_store[i],total_tri,sizeof(Limiting_store));

            /// to save integral of bases functions for constrained DG
            /// vector(&constrain_mass_1st_rows,total_tri,sizeof(Tri_mass_1st_rows));

            for(i = 0; i < total_tri; i++)
            {
                vector(&(midsoln[i].st), RK_STEP, sizeof(Locstate)); // The 0 level is used to save cell avg
                                                                     // and reconstructed states temporally.
                vector(&(midsoln[i].worksp_st_store),RK_STEP,fr->sizest);
                for(j = 0; j < RK_STEP; j++)
                    midsoln[i].st[j] = midsoln[i].worksp_st_store + j*fr->sizest;
                /// to save integral of bases functions for constrained DG
                /// constrain_mass_1st_rows[i].mass_1st_rows[0] = NULL;
            }
            
            if(RK_STEP == 4)
            {
                vector(&RK0_rhs,total_tri,sizeof(double**));
                vector(&RK1_rhs,total_tri,sizeof(double**));
                vector(&RK0_rhs_adj,total_tri,sizeof(double**));
                vector(&RK1_rhs_adj,total_tri,sizeof(double**));

                vector(&RK0_rhs_avg_adj,total_tri,sizeof(double*));
                vector(&RK1_rhs_avg_adj,total_tri,sizeof(double*));
                vector(&RK0_rhs_avg,total_tri,sizeof(double*));
                vector(&RK1_rhs_avg,total_tri,sizeof(double*));

                for(i = 0; i < total_tri; i++)
                {
                    matrix(&RK0_rhs[i],9,MAX_N_COEF,sizeof(double));
                    matrix(&RK1_rhs[i],9,MAX_N_COEF,sizeof(double));
                    matrix(&RK0_rhs_adj[i],9,MAX_N_COEF,sizeof(double));
                    matrix(&RK1_rhs_adj[i],9,MAX_N_COEF,sizeof(double));

                    vector(&RK0_rhs_avg[i],9,sizeof(double));
                    vector(&RK1_rhs_avg[i],9,sizeof(double));
                    vector(&RK0_rhs_avg_adj[i],9,sizeof(double));
                    vector(&RK1_rhs_avg_adj[i],9,sizeof(double));
                }
            }

            printf("Report storage_use after alloc RK: %-d K\n", get_vmalloc_storage_use()/1000);
            printf("print subdomain size\n");
            print_rectangular_grid(fr->rect_grid);

            if(debugging("Shu_Osher"))
            {
                // printf("before  update_buffer_x_ref()\n");
                update_buffer_x_ref(fr,midsoln,RK_STEP,limit_store);
                // printf("before  Subcell_limiting_soln_wi()\n");
                Subcell_limiting_soln_with_buffer_tris_multiple_times_rearrange_order(fr,midsoln,
                       limit_store,RK_STEP,1,YES,total_tri);
            }
        }/////// END:::: if(midsoln == NULL)

        newdt = HUGE_VAL; 

        // printf("\n\n....... IN gFV_tri_vec(), before STEPPING\n");
        // accuracy_Mag_field_on_dual_cell_L1(fr);
        // accuracy_Mag_field_on_dual_cell_L1_DG(fr);
        // accurate_mag_vort_L1(fr);
        // printf("END testing accuracy before STEPPING\n");

        for(rk_iter = 0; rk_iter < RK_STEP; rk_iter++)
        {
            /// printf("\n\n-------------%d DG ITERATION ---------\n", rk_iter);
            /// sprintf(s,"iteration %d",rk_iter);
            set_RK_time_for_bdry(nfr, fr, rk_iter, dt);
            switch(1)
            {
            case 1:
            /*********Original DG **********/
            /*******************************
                for(osurf = fr->mesh->surfaces, surf = nfr->mesh->surfaces; 
                    surf && *surf; surf++, osurf++)    
                {
                    for (tri = first_tri(*surf), otri = first_tri(*osurf); 
                         !at_end_of_tri_list(tri,*surf);
                         tri = tri->next, otri = otri->next) 
                        adv_fw(tri,otri,dt,max_speed,midsoln,rk_iter,fr,limit_store,YES,YES,YES);
                }
            break;
            *******************************/

            /*********DG for MHD***********/
                if(N_EQN == 8)
                {
                    // printf("\n----Before adv_dual_cell_B_edge_DG()\n\n");fflush(stdout);
                    //////////////////////////////////////////////
                    /// clean up flag for saving rhs of DG.
                    //////////////////////////////////////////////
                    for(osurf = fr->mesh->surfaces; osurf && *osurf; osurf++)
                    {
                        for (opolyg = first_polyg(*osurf); !at_end_of_polyg_list(opolyg,*osurf);
                             opolyg = opolyg->next)
                            memset((opolyg->private_data)._iflags, NO, sizeof(bool)*MAX_N_POLY_SIDE); 
                    }

                    for(osurf = fr->mesh->surfaces, surf = nfr->mesh->surfaces;
                        surf && *surf; surf++, osurf++)
                    {
                        for (polyg = first_polyg(*surf), opolyg = first_polyg(*osurf);
                             !at_end_of_polyg_list(polyg,*surf);
                             polyg = polyg->next, opolyg = opolyg->next)
                        {
                            adv_dual_cell_B_edge_DG(polyg,opolyg,dt,max_speed,
                                midsoln,dual_cell_midsoln,rk_iter,fr);
                        }
                    }
                    /**
                    if (!debugging("field_loop"))
                    {
                        merge_b_on_dual_edges_removing_rounding_error(nfr,dual_cell_midsoln,rk_iter+1);
                    }
                    **/
                    // printf("----After adv_dual_cell_B_edge_DG()\n");fflush(stdout);

                    /// Do update B on the edges of triangles in the buffer zone.
                    /*******
#if defined(__MPI__)
                    printf("ERROR: gFV_tri_vec(), implementing communication"
                           " for B on edges of triangles in the buffer zone\n");
                    clean_up(ERROR);
#else
                    if(debugging("mag_acc_vert") || debugging("rotor") || 
                       debugging("decay_alfven") || debugging("Orszag_T"))
                        update_edge_B_buffer(nfr,midsoln,rk_iter+1);
                    if(debugging("blast_MHD"))
                    {
                        update_center_and_edge_phys_buffer(fr,nfr,midsoln,rk_iter+1);
                    }
#endif // if defined(__MPI__)
                    ********/
                    ///END:::: Do update B on the edges of triangles in the buffer zone.
                }

                /// This is for evolving entropy
                if(rk_iter == 0)
                {
                    for(osurf = fr->mesh->surfaces, surf = nfr->mesh->surfaces;
                        surf && *surf; surf++, osurf++)
                    {
                        for (tri = first_tri(*surf), otri = first_tri(*osurf);
                             !at_end_of_tri_list(tri,*surf);
                             tri = tri->next, otri = otri->next)
                        {
                            if(otri->BC_type == SUBDOMAIN)
                                continue;
                            press = POLY_thermal_pressure_MHD(otri->st);
                            Gam = gruneisen_gamma(otri->st);
                            Ent(otri->st) = press/pow(Dens(otri->st), Gam);
                        }
                    }
                }

                // printf("----Before B_tri_mesh_2nd_reconstruction()\n");fflush(stdout);
                // B_tri_mesh_2nd_reconstruction(nfr,midsoln,rk_iter+1,limit_store);
                // printf("----After B_tri_mesh_2nd_reconstruction()\n");fflush(stdout);

                /////////////  communicate buffer to set up surface state
                // attach_cell_averages_in_buffer(nfr,midsoln);
                ///////////// END:  communicate buffer to set up surface state

                // printf("----Before adv_fw_MHD_DG()\n");fflush(stdout);
                for(osurf = fr->mesh->surfaces, surf = nfr->mesh->surfaces;
                    surf && *surf; surf++, osurf++)
                {
                    for (tri = first_tri(*surf), otri = first_tri(*osurf);
                         !at_end_of_tri_list(tri,*surf);
                         tri = tri->next, otri = otri->next)
                    {
                        // adv_fw_cell_avg(tri,otri,dt,max_speed,midsoln,rk_iter,fr,limit_store);
                        adv_fw_MHD_DG(tri,otri,dt,max_speed,midsoln,rk_iter,fr,
                                      limit_store,NO,NO,YES,dual_cell_midsoln);//NEW
                    }
                }
                /////////////  communicate buffer to set up surface state
                // attach_cell_averages_in_buffer(nfr,midsoln);
                ///////////// END:  communicate buffer to set up surface state

                // constrained_trans_B_reconstruction_DG(nfr,midsoln,rk_iter+1,limit_store);//NEW
            break; /// END::: case 1:

            case 5:
            /************** finite volume method *************/
                if(N_EQN == 8)
                {
                    for(osurf = fr->mesh->surfaces; osurf && *osurf; osurf++)
                    {
                        for (opolyg = first_polyg(*osurf); !at_end_of_polyg_list(opolyg,*osurf);
                             opolyg = opolyg->next)
                            memset((opolyg->private_data)._iflags, NO, sizeof(bool)*MAX_N_POLY_SIDE);
                    }

                    for(osurf = fr->mesh->surfaces, surf = nfr->mesh->surfaces;
                        surf && *surf; surf++, osurf++)
                    {
                        for (polyg = first_polyg(*surf), opolyg = first_polyg(*osurf);
                             !at_end_of_polyg_list(polyg,*surf);
                             polyg = polyg->next, opolyg = opolyg->next)
                        {
                            adv_dual_cell_B_edge_DG(polyg,opolyg,dt,max_speed,
                                midsoln,dual_cell_midsoln,rk_iter,fr);
                        }
                    }
                    if (!debugging("field_loop"))
                    {
                        merge_b_on_dual_edges_removing_rounding_error(nfr,dual_cell_midsoln,rk_iter+1);
                    }
                }
                ///END:::: if(N_EQN == 8){} for case 5:

                /// This is for evolving entropy
                if(rk_iter == 0)
                {
                    for(osurf = fr->mesh->surfaces, surf = nfr->mesh->surfaces;
                        surf && *surf; surf++, osurf++)
                    {
                        for (tri = first_tri(*surf), otri = first_tri(*osurf);
                             !at_end_of_tri_list(tri,*surf);
                             tri = tri->next, otri = otri->next)
                        {
                            if(otri->BC_type == SUBDOMAIN)
                                continue;
                            press = POLY_thermal_pressure_MHD(otri->st);
                            Gam = gruneisen_gamma(otri->st);
                            Ent(otri->st) = press/pow(Dens(otri->st), Gam);
                        }
                    }
                }

                // printf("Before adv_fw_cell_avg_MHD()\n"); fflush(stdout);

                for(osurf = fr->mesh->surfaces, surf = nfr->mesh->surfaces;
                    surf && *surf; surf++, osurf++)
                {
                    for (tri = first_tri(*surf), otri = first_tri(*osurf);
                         !at_end_of_tri_list(tri,*surf);
                         tri = tri->next, otri = otri->next)
                    {
                        // adv_fw_cell_avg(tri,otri,dt,max_speed,midsoln,rk_iter,fr,limit_store);
                        adv_fw_cell_avg_MHD(tri,otri,dt,max_speed,midsoln,rk_iter,fr,
                                            limit_store,NO,NO,YES,dual_cell_midsoln);
                        // adv_fw_MHD_DG(tri,otri,dt,max_speed,midsoln,rk_iter,fr,
                        //               limit_store,NO,NO,YES,dual_cell_midsoln);//NEW
                    }
                }
                /** 03/04/2014. For central scheme, this is not needed
                /////////////  communicate buffer to set up average.
                attach_cell_averages_in_buffer(nfr,midsoln); // ?? Do we need to do it here? 
                ///////////// END:  communicate buffer to set up average.
                **/

                /*** reconstruct polynomial on zone center from avg. for HR limiting ***/
                /*** For WENO FV, only do WENO reconstruction for the B_x, B_y field using 
                     the constraint transport approach. 
                 /// 03/04/2014. For Central Scheme, this is not needed. 
                 WENO_constrained_trans_B_reconstruction(nfr,midsoln,rk_iter+1,limit_store);
                ***/

                /* WENO finite volume does not need to do this step. */
                /** 03/04/2014. For Central Scheme, this step should not be done here
                if(YES == USE_HR)
                {
                    preliminary_reconstruction_zone_center(nfr,midsoln,rk_iter+1,limit_store);
                }
                **/
            break; //// END::: case 5:
            } /// END: switch(case)

            ////////////////////////////////
            /// clean up save edge flux flag
            ////////////////////////////////
            for(osurf = fr->mesh->surfaces, surf = nfr->mesh->surfaces;
                surf && *surf; surf++, osurf++)
            {
                for (tri = first_tri(*surf), otri = first_tri(*osurf);
                     !at_end_of_tri_list(tri,*surf);
                     tri = tri->next, otri = otri->next)
                {
                    fg_sf_flag(otri)[0] = fg_sf_flag(otri)[1] = fg_sf_flag(otri)[2] = NO;
                    fg_sf_flag(tri)[0] = fg_sf_flag(tri)[1] = fg_sf_flag(tri)[2] = NO;
                    
                    fg_sf_flag_adj(otri)[0] = fg_sf_flag_adj(otri)[1] = fg_sf_flag_adj(otri)[2] = NO;
                    fg_sf_flag_adj(tri)[0] = fg_sf_flag_adj(tri)[1] = fg_sf_flag_adj(tri)[2] = NO;
                }
            }
            if(N_EQN == 4)
                attach_cell_averages_in_buffer_at_step(nfr,rk_iter+1,midsoln);
	    
            if(debugging("Sod") || debugging("Lax"))
            {
                // update_buffer_x_peri(nfr,midsoln,rk_iter+1,limit_store); 
                update_buffer_x_ref(nfr,midsoln,rk_iter+1,limit_store); 
            }
            else if(debugging("shock_vort"))
            {
                update_buffer_x_ref(nfr,midsoln,rk_iter+1,limit_store);
            }
            else if(debugging("v_evo")|| debugging("acc_vert") || debugging("mag_acc_vert") || 
                    debugging("rotor")|| debugging("decay_alfven") || debugging("Orszag_T") ||
                    debugging("blast_MHD") || debugging("field_loop")) // 01-28-2015, allow parallel comm for blast_MHD
            {
#if defined(__MPI__)
                // printf("ITeration %d, before update_db_Mach_buffer\n\n", rk_iter); fflush(stdout);
                update_db_Mach_buffer(nfr,midsoln,rk_iter+1,limit_store);
                parallel_update_dual_cell_B_buffer(nfr, dual_cell_midsoln, rk_iter+1); //01-28-15 add for parallel
                                                                                   // This may not be needed,
                                                                                   // since all closed dual cells are computed.
                if(debugging("mag_acc_vert"))
                    impose_mag_acc_vert_BC(nfr,midsoln,rk_iter+1,current_time);
                else if(debugging("decay_alfven") || debugging("field_loop"))
                {
                    // impose_decay_alfven_BC(nfr,midsoln,rk_iter+1,current_time); // for exact BC of decay_alfven on tris
                    NULL;
                }
                else if(debugging("rotor") || debugging("Orszag_T"))
                {
                    NULL;
                }
                else if(debugging("blast_MHD"))
                {
                    // printf("ERROR: need to implement parallel BC in gFV_tri_vec() for blast_MHD"); 
                    // clean_up(ERROR);
                    // update_center_and_edge_phys_buffer(fr,nfr,midsoln,rk_iter+1); // need to think if 
                                                                   // need to update B on tri edges. 
                    impose_blast_MHD_tri_cell_phys_buffer(nfr,midsoln,rk_iter+1,current_time);
                }
                else
                {
                    printf("ERROR: need to implement parallel BC in gFV_tri_vec()"); 
                    clean_up(ERROR);
                }
#else
                /*** OLD 01-28-2015 for adding decay_alfven
                if(debugging("mag_acc_vert"))
                   impose_mag_acc_vert_BC(nfr,midsoln,rk_iter+1,current_time);
                else
                    update_buffer(nfr,midsoln,rk_iter+1,limit_store);
                ***/
                if(debugging("mag_acc_vert"))
                    impose_mag_acc_vert_BC(nfr,midsoln,rk_iter+1,current_time);
                else if(debugging("decay_alfven")) //01-28-2015 added for simulating decay_alfven
                {
                    // impose_decay_alfven_BC(nfr,midsoln,rk_iter+1,current_time);
                    update_buffer(nfr,midsoln,rk_iter+1,limit_store); //01-28-2015, test periodic BC
                }
                else if(debugging("field_loop"))
                {
                    update_buffer(nfr,midsoln,rk_iter+1,limit_store); //periodic BC
                }
                else
                    update_buffer(nfr,midsoln,rk_iter+1,limit_store);
#endif // if defined(__MPI__)
            }
            else if(debugging("blast_MHD"))
                impose_blast_MHD_tri_cell_phys_buffer(nfr,midsoln,rk_iter+1,current_time);

            if(use_limiter == YES)
            {
                if(YES == USE_HR)
                {
                    for(j = 0; j < 1; j++)
                    {
                        if(j == 0) check_quadrature = YES;
                        else check_quadrature = NO; 
                        // Subcell_limiting_soln_with_buffer_tris_multiple_times_rearrange_order(nfr,midsoln,limit_store,
                        //             rk_iter+1,HR_times,check_quadrature,total_tri);  /// locate for HR //TMP
                        Subcell_limiting_soln_with_buffer_tris_multiple_times_local_HR_MHD(nfr,midsoln,
                             limit_store,rk_iter+1,HR_times,check_quadrature,total_tri);
                        // Subcell_limiting_soln_with_buffer_tris_multiple_times_local_HR(nfr,midsoln,limit_store,
                        //             rk_iter+1,HR_times,check_quadrature,total_tri);
                    }
                }
                else
                {
                    WENO_FV_limiting_soln(nfr,midsoln,limit_store,rk_iter+1,HR_times,YES,total_tri);
                }

                if(debugging("Sod") || debugging("Lax"))
                {
                    // update_buffer_x_peri(nfr,midsoln,rk_iter+1,limit_store);
                    update_buffer_x_ref(nfr,midsoln,rk_iter+1,limit_store);
                }
                else if(debugging("shock_vort"))
                {
                    update_buffer_x_ref(nfr,midsoln,rk_iter+1,limit_store);
                }
                else if(debugging("v_evo") || debugging("acc_vert") || debugging("mag_acc_vert") || 
                        debugging("rotor") || debugging("decay_alfven") || debugging("Orszag_T") ||
                        debugging("blast_MHD") || debugging("field_loop")) // 01-28-2015, add parallel comm for blast_MHD
                {
#if defined(__MPI__)
                    update_db_Mach_buffer(nfr,midsoln,rk_iter+1,limit_store);
                    if(debugging("mag_acc_vert"))
                        impose_mag_acc_vert_BC(nfr,midsoln,rk_iter+1,current_time);
#else // if defined(__MPI__)
                    if(debugging("mag_acc_vert"))
                        impose_mag_acc_vert_BC(nfr,midsoln,rk_iter+1,current_time);
                    else if(debugging("blast_MHD")) // //01-28-2015, added
                    {
                        impose_blast_MHD_tri_cell_phys_buffer(nfr,midsoln,rk_iter+1,current_time);
                    }
                    else
                        update_buffer(nfr,midsoln,rk_iter+1,limit_store);
#endif // if defined(__MPI__)
                }
            } /***** END if(use_limiter == YES)   *****/
    
            ////// /// impose continuity upto the order of the base DG scheme.
            /****
            B_edge_reconstruction(nfr,midsoln,rk_iter+1,limit_store);
                /// from zxu2: /home/zxu2/MHD_P-1-2-3_FV/FronTier/src/gas

            /// Do update B on the edges of triangles in the buffer zone.
#if defined(__MPI__)
            printf("ERROR: gFV_tri_vec(), implementing communication for "
                   "B on edges of triangles in the buffer zone\n");
            clean_up(ERROR);
#else
            if(debugging("mag_acc_vert") || debugging("rotor") || 
               debugging("decay_alfven") || debugging("Orszag_T"))
                update_edge_B_buffer(nfr,midsoln,rk_iter+1);
            if(debugging("blast_MHD"))
            {
                update_center_and_edge_phys_buffer(fr,nfr,midsoln,rk_iter+1);
            }
#endif // if defined(__MPI__)
            ///END:::: Do update B on the edges of triangles in the buffer zone.

            B_tri_mesh_2nd_reconstruction(nfr,midsoln,rk_iter+1,limit_store);
                 /// from zxu2: /home/zxu2/MHD_P-1-2-3_FV/FronTier/src/gas
            // constrained_trans_B_reconstruction_DG(nfr,midsoln,rk_iter+1,limit_store);//Huijing
            ***/
            // if(N_EQN == 8)
            //     attach_cell_averages_in_buffer_at_step(nfr,rk_iter+1,midsoln);

            if(use_limiter == YES)
            {
                Dual_cell_edge_B_reconstruction_P1(nfr,midsoln,dual_cell_midsoln,rk_iter+1,current_time);
#if defined(__MPI__)
                parallel_update_dual_cell_B_buffer(nfr, dual_cell_midsoln, rk_iter+1); //01-28-15 add for parallel
#endif // if defined(__MPI__)
            }

            /// NEED NEW function to check positivity of pressure and density at quadrature point
            /// for next step. This is for local divergence-free B field. Bn could have jump
            /// across triangle cell interface.
            /// MHD_fix_neg_dens_press(nfr,midsoln,limit_store,rk_iter+1,total_tri);

            /***********************
            Construct B field on both dual and tri cells
            ************************/
            Construct_Mag_field_on_both_grids(nfr,midsoln,dual_cell_midsoln, 
                                             rk_iter+1, total_polyg, current_time,use_limiter);

            if(debugging("rotor")|| debugging("decay_alfven") || debugging("Orszag_T") ||
               debugging("mag_acc_vert") || debugging("blast_MHD") || debugging("field_loop")) //01-28-2015 added parallel comm for blast_MHD
            {
#if defined(__MPI__)
                update_db_Mach_buffer(nfr,midsoln,rk_iter+1,limit_store);
#else
                if(debugging("mag_acc_vert"))
                    impose_mag_acc_vert_BC(nfr,midsoln,rk_iter+1,current_time);
                else if(debugging("decay_alfven"))
                {
                    // impose_decay_alfven_BC(nfr,midsoln,rk_iter+1,current_time); //01-28-2015, added
                    update_buffer(nfr,midsoln,rk_iter+1,limit_store); // 01-28-2015, test periodic BC
                }
                else if(debugging("field_loop"))
                {
                    update_buffer(nfr,midsoln,rk_iter+1,limit_store); // periodic BC
                }
                else if(debugging("blast_MHD")) // //01-28-2015, added
                {
                    impose_blast_MHD_tri_cell_phys_buffer(nfr,midsoln,rk_iter+1,current_time);
                }
                else
                    update_buffer(nfr,midsoln,rk_iter+1,limit_store);
#endif // if defined(__MPI__)
            }

            if(use_limiter == NO && (rk_iter + 1) == RK_STEP)
                local_find_time_step_on_tris(nfr);

#if defined(__MPI__)
            if((rk_iter+1) == RK_STEP)
            {
                double newdts[2500];
                int    numnodes;

                numnodes = pp_numnodes();
                pp_all_gather((POINTER)&newdt,sizeof(float),
                      (POINTER)newdts,sizeof(float));
                for(i = 0; i < numnodes; i++)
                {
                    if (newdts[i] < newdt)
                        newdt = newdts[i];
                }
            }
#endif // if defined(__MPI__)
        } // END::::: for(rk_iter = 0; rk_iter < RK_STEP; rk_iter++)

        if(debugging("decay_alfven") && (fr->step%100 == 0))
        {
            double max_vz = -1.0e10, max_Bz = -1.0e10;
            // TRI *max_Bz_tri;
            for(surf = nfr->mesh->surfaces; surf && *surf; surf++)
            {
                for (tri = first_tri(*surf); !at_end_of_tri_list(tri,*surf); tri = tri->next)
                {
                    if(tri->BC_type == SUBDOMAIN) continue;
                    if(fabs( Mag(tri->st)[2]) >= max_Bz)
                    {
                        max_Bz = fabs( Mag(tri->st)[2]);
                        // max_Bz_tri = tri;
                    }
                    if(fabs( Mom(tri->st)[2]/Dens(tri->st)) >= max_vz)
                        max_vz = fabs(Mom(tri->st)[2]/Dens(tri->st));
                }
            }  
            // printf("At_time= %g max_Vz= %14.12g max_Bz= %14.12g log10_max_Vz= %g, before pp_, tri %d\n",
            //               fr->time, max_vz, max_Bz, log10(max_vz), max_Bz_tri->id);
            // print_tri_crds(max_Bz_tri);
            pp_global_max(&max_vz, 1);
            pp_global_max(&max_Bz, 1);
            printf("At_time= %g max_Vz= %14.12g max_Bz= %14.12g log10_max_Vz= %g\n",
                          fr->time, max_vz, max_Bz, log10(max_vz));
        }

        if(debugging("field_loop") && (fr->step%200 == 0))
        {
            double max_vz = 0.0, max_Bz = 0.0;
            // TRI *max_Bz_tri;
            for(surf = nfr->mesh->surfaces; surf && *surf; surf++)
            {
                for (tri = first_tri(*surf); !at_end_of_tri_list(tri,*surf); tri = tri->next)
                {
                    if(tri->BC_type == SUBDOMAIN) continue;
                    if(fabs( Mag(tri->st)[2]) >= max_Bz)
                    {
                        max_Bz = fabs( Mag(tri->st)[2]);
                        // max_Bz_tri = tri;
                    }
                    if(fabs( Mom(tri->st)[2]/Dens(tri->st)) >= max_vz)
                        max_vz = fabs(Mom(tri->st)[2]/Dens(tri->st));
                }
            }  
            // printf("At_time= %g max_Vz= %14.12g max_Bz= %14.12g log10_max_Vz= %g, before pp_, tri %d\n",
            //               fr->time, max_vz, max_Bz, log10(max_vz), max_Bz_tri->id);
            // print_tri_crds(max_Bz_tri);
            pp_global_max(&max_vz, 1);
            pp_global_max(&max_Bz, 1);
            printf("At_time= %g max_Vz= %14.12g max_Bz= %14.12g log10_max_Vz= %g\n",
                          fr->time, max_vz, max_Bz, log10(max_vz));
        }

        // nfr->time = dt; 
        // Burgers_L1(nfr);
        // printf("\n\nEXIT in gDG_tri_vec, dt = %g\n", dt);
        // clean_up(0);
        // print_bottom_tris_state(nfr);
}

LOCAL void MHD_fix_neg_dens_press(
	    Front           *fr,
        Mid_soln        *midsoln,
        Limiting_store  **limit_store,
        int             rk_step,
        int             total_tri)
{
        TRI       *tri, *crsp_tri, *nbtri[3], *sten_tri[30];
        SURFACE   **surf;
        int       dim = 2, i, j, side, tmpi, cv_indx, indx, sten_tri_num;
        double     *cent, max_vz = -1.0e10, max_Bz = -1.0e10;
        size_t    sizest = fr->sizest;
        Locstate  st2;
        float     max_dt;
        TRI       **limit_tris, **row_limit_tris[1500];
        int       N_alloc = 800, N_row, N_use =0, N, Row_alloc = 1500, local_iter_N = 1;
        int       loop_num = 0, detect_extr = NO, comput_time = NO, check_quadr = NO;
        static Locstate Roe_st[3] = {NULL, NULL, NULL},sta;  //Roe mean value
        static float  **L[3], **R[3];
        float     conu[3][4][MAX_N_COEF];  // [side][# eqn][coef]
        float     outcome[4][MAX_N_COEF], old_avg[4], new_avg[4];
        char      s[256];
        int       debug = NO, bigHR_round, update_layer[3] = {NO, YES, YES};
        Locstate  st, nst;

        ////// Start: collect all tris.
        vector(&limit_tris, N_alloc, sizeof(TRI*));
        row_limit_tris[0] = limit_tris;
        N_row = 1;

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf);
             !at_end_of_tri_list(tri,*surf); tri = tri->next)
            {
                if(tri->BC_type == SUBDOMAIN)
                    continue;

                if(Boundary_tri(tri) || tri_on_phy_bdry(tri))
                {
                    NULL;
                }
                if(rk_step == RK_STEP)
                    st = tri->st;
                else
                    st = midsoln[tri->id].st[rk_step];

                if((N_EQN == 4 || N_EQN == 8) && YES == unphysical_st_at_quadrature(tri, st))
                   tri->redo_limiting = YES;
                else
                   tri->redo_limiting = NO;

                limit_tris[N_use] = tri;
                N_use++;
                if(N_use == N_alloc)
                {
                    if(N_row +1 >= Row_alloc)
                    {
                        printf("ERROR: MHD_fix_neg_dens_press, exceed alloc. limit\n");
                            clean_up(ERROR);
                    }
                    vector(&limit_tris, N_alloc, sizeof(TRI*));
                    row_limit_tris[N_row] = limit_tris;
                    N_row++;
                    N_use = 0;
               }
            }
        }
        ////// END: collect all tris.

        if(N_EQN == 8)
        {
            /// For MHD, we still try to maintain continuity of Bn if 
            /// after WENO reconstruction, there is negative pressure or density.
            for(i = 0; i < N_row; i++)
            {
                if(i == N_row-1) N = N_use;
                else N = N_alloc;
                for(j = 0; j < N; j++)
                {
                    if(row_limit_tris[i][j]->redo_limiting == YES)
                    {
                        printf("TRI[%d] used fix_unphysical_st_cont_Bn_edge()\n", row_limit_tris[i][j]->id);
                        fix_unphysical_st_cont_Bn_edge(row_limit_tris[i][j],midsoln,rk_step,fr);
                    }
                }
            }
        }

        for(i = 0; i < N_row; i++)
            free(row_limit_tris[i]);
}


LOCAL void entropy_fix_for_blast_MHD(
         Locstate  st,
         TRI       *tri,
         Mid_soln  *midsoln,
         int      rk_step,
         double    *outB0)
{
        double   entr, p, Gam, ke, new_engy;
        double   u[4], den, B_sqr;
        int      dim = 3, i;

        entr = Ent(st);
        if(entr < 0.0)
        {
            printf("ERROR: entropy_fix_for_blast_MHD---after 80 loops\n");
            printf("TRI[%d] has negative entropy\n", tri->id);
            verbose_print_state("fixed state",st);
            clean_up(ERROR);
        }

        Gam = gruneisen_gamma(st);
        p = entr*(pow(Dens(st), Gam));

        tri_B_P0_polynomial_reconstruction(tri, midsoln,rk_step, outB0);
        FV_P0(tri, NULL, midsoln, NULL, rk_step, NULL);

        for(i = 0; i < dim; i++)
            u[i] = Mom(st)[i]/Dens(st);
        for (ke = 0.0, i = 0; i < dim; ++i)
            ke += sqr(u[i]);
        ke *= 0.5*Dens(st);
        for(B_sqr = 0.0, i = 0; i < 3; i++)
            B_sqr += sqr(Mag(st)[i]);

        new_engy = p/Gam + ke + 0.5*B_sqr;
        Energy(st) = new_engy;
        dg_Energy(st)[0] = new_engy;

        p = POLY_thermal_pressure_MHD(st);
        if(p < 0.0)
        {
            printf("ERROR: entropy_fix_for_blast_MHD, entropy = %g, p = %g, den %g\n", entr, p, Dens(st));
            printf("TRI[%d] enter entropy_fix_for_blast_MHD\n", tri->id);
            print_tri_crds(tri);
            verbose_print_state("fixed state",st);
            clean_up(ERROR);
        }
}


LOCAL void compress_Bn_on_edge(
         TRI       *tri,
         Mid_soln  *midsoln,
         int       rk_step,
         double    compress_f,
         Front     *fr)
{
         int  side;

         if(MAX_N_COEF == 3)
         {
             if(rk_step == RK_STEP)
             {
                 for(side = 0; side < 3; side++)
                 {
                     fg_side_dgB(tri)[side][1] *= compress_f;
                 }
             }
             else
             {
                 for(side = 0; side < 3; side++)
                 {
                     midsoln[tri->id].edge_dgBn[side][1][rk_step] *= compress_f;
                 }
             }
         }
         else if(MAX_N_COEF == 6)
         {
             if(rk_step == RK_STEP)
             {
                 for(side = 0; side < 3; side++)
                 {
                     fg_side_dgB(tri)[side][1] *= compress_f;
                     fg_side_dgB(tri)[side][2] *= compress_f;
                 }
             }
             else
             {
                 for(side = 0; side < 3; side++)
                 {
                     midsoln[tri->id].edge_dgBn[side][1][rk_step] *= compress_f;
                     midsoln[tri->id].edge_dgBn[side][2][rk_step] *= compress_f;
                 }
             }
         }
         else
         {
             printf("ERROR: compress_Bn_on_edge(), implement %d\n", MAX_N_COEF);
             clean_up(ERROR);
         }
}


LOCAL void fix_unphysical_st_cont_Bn_edge(
         TRI       *tri,
         Mid_soln  *midsoln,
         int       rk_iter, 
         Front     *fr)
{
         int    imax[8], imin[8], imax_side[8], imin_side[8], dim = 2;
         float  umin[8], umax[8];
         int    side, i, k, tmpside;
         TRI    *nbtri[3];
         float  a_max[10], a_min[10], u_avg[10], con_u[10], MB[2][16];
         float  st_min[10], st_max[10], *pcrds[3], qcrds[3], dx, dy, sum, xx1, yy1, *cent, B0[3];
         Locstate st, Tst, tmpst;
         int    debug_flag = NO, loop, enter_loop = NO;
         static float compress_factor = 0.6;
         float  Bn[3], bb[3], len[3], A[2][2], ans[2], ddx[3], ddy[3], coef[2][2], neighb_p_gas[3];
         float  p_gas_max, p_gas_min;

         tri->redo_limiting = NO;

         if(rk_iter == RK_STEP)
         {
             st = tri->st;
             /// for(i = 0; i < 3; i++)
             ///     Bn[i] = fg_side_B(tri)[i];
         }
         else
         {
             st = midsoln[tri->id].st[rk_iter];
             /// for(i = 0; i < 3; i++)
             ///     Bn[i] = midsoln[tri->id].edge_Bn[i][rk_iter];
         }


         loop = 0;
         while(unphysical_st_at_quadrature(tri, st) == YES)
         {
             enter_loop = YES;

             compress_Bn_on_edge(tri, midsoln, rk_iter, compress_factor, fr);
             if(MAX_N_COEF == 3)
                 tri_B_P1_polynomial_2nd_reconstruction(tri, midsoln,NULL,rk_iter);
             else if(MAX_N_COEF == 6)
                 tri_B_P2_polynomial_2nd_reconstruction(tri, midsoln,NULL,rk_iter);
             scale_state_by_factor(st, compress_factor);

             loop++;

             if(loop == 80)
             {
                 // verbose_print_state("scaled state",st);
                 printf("WARNING: press of tri[%d] can not be fixed by adjusting variations,"
                         " use entropy fix\n", tri->id);

                 if(debugging("blast_MHD"))
                 {
                     // entropy_fix_for_blast_MHD(st,tri,midsoln,rk_iter,B0); 01-28-2015
                     // printf("WARNING: fix_unphysical_st, loop = %d, entropy = %g, p = %g, den %g\n", loop, entr, p, Dens(st));
                     // printf("TRI[%d] enter fix_unphysical_st---\n", tri->id);
                     // print_tri_crds(tri);
                     // verbose_print_state("fixed state",st);
                     // clean_up(ERROR);
                     // break;
                 } ///// END::: if(debugging("blast_MHD"))
                 printf("ERROR: UNphysical_st_at_quadrature, loop = %d\n", loop);
                 printf("TRI[%d] enter fix_unphysical_st---\n", tri->id);
                 print_tri_crds(tri);

                 verbose_print_state("fixed state",st);
                 clean_up(ERROR);

                 cent = fg_centroid(tri);
                 for(i = 0; i < 3; i++)
                     pcrds[i] = Coords(Point_of_tri(tri)[i]);
                 // (*Params(st)->_alloc_state)(&Tst,Params(st)->sizest);
                 // assign(Tst, st, Params(st)->sizest);

                 for(side = 0; side < 3; side++)
                 {
                      if(rk_iter == RK_STEP)
                          Tst = Tri_on_side(tri,side)->st;
                      else
                          Tst = midsoln[Tri_on_side(tri,side)->id].st[rk_iter];
                      printf("Thermal pressure on side[%d] = %g, density %g, Total energy %g\n",
                                        side, POLY_thermal_pressure_MHD(Tst), Dens(Tst), Energy(Tst));
                      // verbose_print_state("neighb cell state",Tst);
                 }

                 verbose_print_state("scaled state",st);
                 // show_states_at_edge_quadrature(tri,st);

                 for(k = 0; k < MAX_N_COEF; k++)
                 {
                     MB[0][k] = dg_B(st)[0][k];
                     MB[1][k] = dg_B(st)[1][k];
                 }

                 verify_edge_B(tri,rk_iter, midsoln, MB[0], MB[1]);

                 tri_B_P0_polynomial_reconstruction(tri, midsoln,rk_iter, B0);
                 printf("Constant reconstruction B gives [%g %g]\n", B0[0], B0[1]);
                 verbose_print_state("B const state",st);

                 clean_up(ERROR);
             } /// END: if(loop == 100)
         } //// END:::: while(unphysical_st_at_quadrature(tri, st) == YES)

         /// NOW scale B in neighboring tris to make sure 
         /// continuity of Bn across cell edge.
         for(side = 0; side < 3; side++)
         {
             nbtri[side] = Tri_on_side(tri, side);
             if(nbtri[side] == NULL) continue;
             if(nbtri[side]->BC_type == SUBDOMAIN) continue;
             for(tmpside = 0; tmpside < 3; tmpside++)
             {
                 if(Tri_on_side(nbtri[side], tmpside) == tri)
                     break;
             }

             if(rk_iter == RK_STEP)
             {
                 if(MAX_N_COEF == 3)
                     fg_side_dgB(nbtri[side])[tmpside][1] = fg_side_dgB(tri)[side][1];
                 else if(MAX_N_COEF == 6)
                 {
                     fg_side_dgB(nbtri[side])[tmpside][1] = fg_side_dgB(tri)[side][1];
                     fg_side_dgB(nbtri[side])[tmpside][2] = -fg_side_dgB(tri)[side][2];
                 }
             }
             else
             {
                 if(MAX_N_COEF == 3)
                     midsoln[nbtri[side]->id].edge_dgBn[tmpside][1][rk_iter] =
                              midsoln[tri->id].edge_dgBn[side][1][rk_iter];
                 else if(MAX_N_COEF == 6)
                 {
                     midsoln[nbtri[side]->id].edge_dgBn[tmpside][1][rk_iter] =
                              midsoln[tri->id].edge_dgBn[side][1][rk_iter];
                     midsoln[nbtri[side]->id].edge_dgBn[tmpside][2][rk_iter] =
                              -midsoln[tri->id].edge_dgBn[side][2][rk_iter];
                 }
             }
             if(MAX_N_COEF == 3)
                 tri_B_P1_polynomial_2nd_reconstruction(nbtri[side], midsoln,NULL,rk_iter);
             else if(MAX_N_COEF == 6)
                 tri_B_P2_polynomial_2nd_reconstruction(nbtri[side], midsoln,NULL,rk_iter);
             /// Sometimes, this change may lead to negative pressure. 
             if(rk_iter == RK_STEP)
                 tmpst = nbtri[side]->st;
             else
                 tmpst = midsoln[nbtri[side]->id].st[rk_iter];

             loop = 0;

             while(unphysical_st_at_quadrature(nbtri[side], tmpst) == YES && nbtri[side]->redo_limiting == NO)
             {
                 /// redo_limiting = NO indicates that this cell (nbtri[side]) has already been handled by
                 /// fixing negative pressure. Now bad state appears again because of the change in the 
                 /// magnetic field in the neighbor (tri) and the attempt to keep continuity of Bn.
                 /// Here we do a simple treatment to handle this case. We simply fix the state on nbtri[side]
                 /// without concerning about continuity of Bn anymore.
                 compress_Bn_on_edge(nbtri[side], midsoln, rk_iter, compress_factor, fr);
                 if(MAX_N_COEF == 3)
                     tri_B_P1_polynomial_2nd_reconstruction(nbtri[side], midsoln,NULL,rk_iter);
                 else if(MAX_N_COEF == 6)
                     tri_B_P2_polynomial_2nd_reconstruction(nbtri[side], midsoln,NULL,rk_iter);
                 scale_state_by_factor(tmpst, compress_factor);
                 loop++;
                 if(loop == 80)
                 {
                     printf("WARNING: fix_unphysical_st_cont_Bn_edge()\n");
                     printf("WARNING: press of nbtri[%d] side %d of (tri %d) can not"
                            " be fixed by adjusting variations, use entropy fix\n",
                              nbtri[side]->id, side, tri->id);
                     entropy_fix_for_blast_MHD(tmpst,nbtri[side],midsoln, rk_iter,B0);
                 }
             }
         }

         ///// TMP
         if(debug_flag == YES)
         {
             cent = fg_centroid(tri);

             for(side =0; side < 3; side++)
             {
                 pcrds[0] = Coords(Point_of_tri(tri)[side]);
                 pcrds[1] = Coords(Point_of_tri(tri)[ (side +1)%3 ]);

                 xx1 = pcrds[0][0] - cent[0];
                 yy1 = pcrds[0][1] - cent[1];

                 dy = pcrds[1][1] - pcrds[0][1];
                 dx = pcrds[1][0] - pcrds[0][0];
                 len[0] = fg_length_side(tri)[side];

                 sum = dy*dg_B(st)[0][0] + (0.5*dy*dx + dy*xx1)*dg_B(st)[0][1] +
                                           (dy*(yy1 + 0.5*dy))*dg_B(st)[0][2] +
                             (dy*(sqr(xx1) + xx1*dx + sqr(dx)/3.0))*dg_B(st)[0][3] +
                             (dy*(xx1*yy1 + 0.5*xx1*dy +0.5*yy1*dx + dx*dy/3.0))*dg_B(st)[0][4] +
                             (dy*(sqr(yy1) + yy1*dy + sqr(dy)/3.0))*dg_B(st)[0][5] +
                       -dx*dg_B(st)[1][0] + (-dx*(xx1 + 0.5*dx))*dg_B(st)[1][1] +
                          (dx*(yy1 + 0.5*dy))*dg_B(st)[0][1] +
                         (-dx*(sqr(xx1) + xx1*dx + sqr(dx)/3.0))*dg_B(st)[1][3] +
                          (2.0*dx*(xx1*yy1 +0.5*xx1*dy + 0.5*yy1*dx +  dx*dy/3.0))*dg_B(st)[0][3] +
                          (0.5*dx*(sqr(yy1) + yy1*dy + sqr(dy)/3.0))*dg_B(st)[0][4];
                 sum /= len[0];
                 printf("Side[%d], cell average recoved Bn = %13.12g\n",
                       side, sum);
            }
            // printf("\n Looped %d times to fix unphysical states\n", loop);
            // verbose_print_state("scaled state",st);
            // printf("state at edge quadratures after scaling\n");
            // show_states_at_edge_quadrature(tri,st);
         } //// END:::: if(debug_flag == YES)

}

EXPORT void single_Dual_cell_edge_B_reconstruction_P1(
        POLYGON            *polyg,  // new dual cell is passed in
        Mid_soln           *midsoln,
        Dual_cell_Mid_soln *dual_cell_midsoln,
        int                rk_step,
        int                enable_debug)
{
        int          i, tmpi, side, rk_iter, indx;
        int          debug = NO, N_sides = 0;
        double       len;
        double       Bn_from_tri[MAX_N_POLY_SIDE][N_COEF_EDGE];
        double       partBn_from_tri[MAX_N_POLY_SIDE][2][N_COEF_EDGE];
        double       Dual_Bn[MAX_N_POLY_SIDE][N_COEF_EDGE];
        double       eps = 1.0e-8, sum, wtmp[4];
        double       TVB_M = 0.02013; // use to be 0.1.

        if(enable_debug == YES)
        {
            if(polyg->id == 410 || polyg->id == 436)
                debug = YES; 
        }      
 
        if(RK_STEP == 2)
        {
            if(RK_STEP == rk_step)
                rk_iter = 0;
            else
                rk_iter = 1;
        }
        else
        {
            printf("ERROR: single_Dual_cell_edge_B_reconstruction_P1()\n");
            printf("Need to adjust rk_iter for properly using "
                  " dual_cell_B_edge_from_tri_DG_P1() \n");
            clean_up(ERROR);
        }

        N_sides = polyg->n_sides;

        if(RK_STEP == rk_step)
        {
            for(side = 0; side < N_sides; side++)
            {
                for(indx = 0; indx < N_COEF_EDGE; indx++)
                    Dual_Bn[side][indx] = polyg_side_dgB(polyg)[side][indx];
            }
        }
        else
        {
            for(side = 0; side < N_sides; side++)
            {
                 for(indx = 0; indx < N_COEF_EDGE; indx++)
                     Dual_Bn[side][indx] = dual_cell_midsoln[polyg->id].edge_dgBn[rk_step][side][indx];
            }
        }

        dual_cell_B_edge_from_tri_DG_P1(NULL,polyg,midsoln,dual_cell_midsoln,
                              rk_iter,NULL,Bn_from_tri, partBn_from_tri, debug);

        for(side = 0; side < N_sides; side++)
        {
            sum = 0.0;
            for(i = 0; i < 2; i++)
            {
                wtmp[i] = 1.0/(eps + sqr(partBn_from_tri[side][i][1]) );
                sum += wtmp[i];
            }
            for(i = 0; i < 2; i++)
                wtmp[i]/=sum;

            ///// TMP
            if(YES == debug)
            {
                printf("dual(%d) side[%d], origin slope %g, slopes from beneath tris (%g, %g),",
                                    polyg->id, side, Dual_Bn[side][1],
                                partBn_from_tri[side][0][1], partBn_from_tri[side][1][1]);
            }
            ///// END::: TMP

            sum = wtmp[0]*partBn_from_tri[side][0][1] +
                  wtmp[1]*partBn_from_tri[side][1][1];
            // if(fabs(Dual_Bn[side][1]) > TVB_M*sqr(polyg_length_side(polyg)[side])) // OLD
            if(fabs(Dual_Bn[side][1]) > TVB_M*sqr(polyg_length_side(polyg)[side]) &&
               fabs(Dual_Bn[side][1]) > fabs(sum))
            {
                // Dual_Bn[side][1] = wtmp[0]*partBn_from_tri[side][0][1] +
                //                    wtmp[1]*partBn_from_tri[side][1][1];
                Dual_Bn[side][1] = sum;
            }

            ///// TMP
            if(YES == debug)
            {
                printf("\n----- limited slope %g, TVB bound = %g, weights(%g, %g)\n\n",
                        Dual_Bn[side][1], TVB_M*sqr(polyg_length_side(polyg)[side]), wtmp[0], wtmp[1]);
            }
            ///// END::: TMP
        }//// END::: for(side = 0; side < N_sides; side++)


        if(RK_STEP == rk_step)
        {
            for(side = 0; side < N_sides; side++)
                polyg_side_dgB(polyg)[side][1] = Dual_Bn[side][1];
        }
        else
        {
            for(side = 0; side < N_sides; side++)
                dual_cell_midsoln[polyg->id].edge_dgBn[rk_step][side][1] = Dual_Bn[side][1];
        }
}

/// use TVB type limiting
EXPORT void Dual_cell_edge_B_reconstruction_P1(
        Front              *fr, // the new front is passed in
        Mid_soln           *midsoln,
        Dual_cell_Mid_soln *dual_cell_midsoln,
        int                rk_step,
        double             time)
{
        int          i, tmpi, side, rk_iter, indx;
        SURFACE      **surf;
        POLYGON      *polyg, *nbpolyg;
        int          debug = NO, N_sides = 0;
        double       len;
        double       Bn_from_tri[MAX_N_POLY_SIDE][N_COEF_EDGE];
        double       partBn_from_tri[MAX_N_POLY_SIDE][2][N_COEF_EDGE];
        double       Dual_Bn[MAX_N_POLY_SIDE][N_COEF_EDGE];
        double       eps = 1.0e-8, sum, wtmp[4]; // old eps = 1.0e-8 
                       /// Also need to see function single_Dual_cell_edge_B_reconstruction_P1()
        double       TVB_M = 0.02013; // use to be 0.1, 0.01 seems to compromise accuracy. 
                                      // 0.02013 to avoid comparing floating point
        if(RK_STEP == 2)
        {
            if(RK_STEP == rk_step)
                rk_iter = 0;
            else
                rk_iter = 1;
        }
        else
        {
            printf("ERROR: Dual_cell_edge_B_reconstruction_P1()\n");
            printf("Need to adjust rk_iter for properly using " 
                  " dual_cell_B_edge_from_tri_DG_P1() \n");
            clean_up(ERROR);
        }

        // for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        // {
        //     for (polyg = first_polyg(*surf); !at_end_of_polyg_list(polyg,*surf); 
        //                  polyg = polyg->next)
        //         memset((polyg->private_data)._iflags, NO, sizeof(bool)*MAX_N_POLY_SIDE);
        // }

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (polyg = first_polyg(*surf); !at_end_of_polyg_list(polyg,*surf);
                 polyg = polyg->next )
            {
                if(polyg->closed != YES) continue;

                N_sides = polyg->n_sides;

                //// TMP
                /***
                if(polyg->id == 407 && fr->step>=43200)
                {
                    debug = YES;
                    printf("\n\nDual cell (%d) in dual_cell_B_edge_from_tri_DG_P1() \n", polyg->id);
                }
                else if(polyg->id == 381 && fr->step>=43200)
                {
                    debug = YES;
                    printf("\n\nDual cell (%d) in dual_cell_B_edge_from_tri_DG_P1() \n", polyg->id);
                }
                else
                {
                    debug = NO;
                }
                ***/
                //// END TMP

                if(RK_STEP == rk_step)
                {
                    for(side = 0; side < N_sides; side++)
                    {
                        for(indx = 0; indx < N_COEF_EDGE; indx++)
                            Dual_Bn[side][indx] = polyg_side_dgB(polyg)[side][indx];
                    }
                }
                else
                {
                    for(side = 0; side < N_sides; side++)
                    {
                        for(indx = 0; indx < N_COEF_EDGE; indx++)
                            Dual_Bn[side][indx] = dual_cell_midsoln[polyg->id].edge_dgBn[rk_step][side][indx];
                    }
                }

                dual_cell_B_edge_from_tri_DG_P1(NULL,polyg,midsoln,dual_cell_midsoln,
                                   rk_iter,fr,Bn_from_tri, partBn_from_tri,debug);

                for(side = 0; side < N_sides; side++)
                {
                    sum = 0.0;
                    for(i = 0; i < 2; i++)
                    {
                        wtmp[i] = 1.0/(eps + sqr(partBn_from_tri[side][i][1]) );  
                        sum += wtmp[i];
                    }
                    for(i = 0; i < 2; i++)
                        wtmp[i]/=sum;

                    ///// TMP
                    /***
                    if(YES == debug)
                    {
                        printf("side[%d], origin slope %15.14g, slopes from beneath tris (%15.14g, %15.14g),",
                                    side, Dual_Bn[side][1],
                                partBn_from_tri[side][0][1], partBn_from_tri[side][1][1]);
                    }
                    ***/
                    ///// END::: TMP
#if defined(_FV_CENTRAL_)
                    // Dual_Bn[side][1] = wtmp[0]*partBn_from_tri[side][0][1] + 
                    //                    wtmp[1]*partBn_from_tri[side][1][1];
                    Dual_Bn[side][1] = 0.5*partBn_from_tri[side][0][1] + 
                                       0.5*partBn_from_tri[side][1][1];
#else /* #if defined(_FV_CENTRAL_) */
                    sum = wtmp[0]*partBn_from_tri[side][0][1] +
                          wtmp[1]*partBn_from_tri[side][1][1];
                    // if(fabs(Dual_Bn[side][1]) > TVB_M*sqr(polyg_length_side(polyg)[side])) // OLD
                    if(fabs(Dual_Bn[side][1]) > TVB_M*sqr(polyg_length_side(polyg)[side]) &&
                       fabs(Dual_Bn[side][1]) > fabs(sum))
                    {
                        Dual_Bn[side][1] = sum;
                        // Dual_Bn[side][1] = wtmp[0]*partBn_from_tri[side][0][1] + 
                        //                    wtmp[1]*partBn_from_tri[side][1][1];
                    }
                    /**/
                    if (debugging("field_loop"))
                    {
                        Dual_Bn[side][1] = 0.5*partBn_from_tri[side][0][1] + 
                                           0.5*partBn_from_tri[side][1][1];
                    }
#endif /* #if defined(_FV_CENTRAL_) */

                    ///// TMP
                    /***
                    if(YES == debug)
                    {
                        printf(" limited slope %16.15g, TVB bound = %16.15g. sum = %16.15g\n", 
                               Dual_Bn[side][1], TVB_M*sqr(polyg_length_side(polyg)[side]), sum);
                    }
                    ***/
                    ///// END::: TMP
                }

                if(RK_STEP == rk_step)
                {
                    for(side = 0; side < N_sides; side++)
                        polyg_side_dgB(polyg)[side][1] = Dual_Bn[side][1];
                }
                else
                {
                    for(side = 0; side < N_sides; side++)
                        dual_cell_midsoln[polyg->id].edge_dgBn[rk_step][side][1] = Dual_Bn[side][1];
                } 
            }/// END::: for (polyg = first_polyg(*surf);...
        }/// END::: for(surf = fr->mesh->surfaces;...

        // TMP
        /**
        if(fr->step >= 43200)
        {
            printf("\n\n-----------Dual_cell_edge_B_reconstruction_P1()\n");
            TRI        *tri;
            POLYGON    *tmp_polyg[3];
            int        tmpii; 
            for(surf = fr->mesh->surfaces; surf && *surf; surf++)
            {
                for(tri = first_tri(*surf); !at_end_of_tri_list(tri,*surf); tri = tri->next)
                {
                    if(tri->id == 732)
                    {
                        printf("\n********************************\n");
                        printf("\n-------tri %d:\n", tri->id);
                        for(tmpii = 0; tmpii < 3; tmpii++)
                            tmp_polyg[tmpii] = fg_polyg_at_tri_vert(tri)[tmpii];
                        printf("Check Bn from polygons (%d, %d, %d)\n\n",
                        tmp_polyg[0]->id, tmp_polyg[1]->id, tmp_polyg[2]->id);
                        match_Bn_at_comm_dual_cell_edges(tmp_polyg[0], tmp_polyg[1],
                                dual_cell_midsoln, rk_step);
                        match_Bn_at_comm_dual_cell_edges(tmp_polyg[1], tmp_polyg[2],
                                dual_cell_midsoln, rk_step);
                        match_Bn_at_comm_dual_cell_edges(tmp_polyg[2], tmp_polyg[0],
                                dual_cell_midsoln, rk_step);
                    }
                }
            }
        }
        **/  
        // END:::TMP
}

LOCAL void B_edge_reconstruction(
        Front    *fr,
        Mid_soln *midsoln,
        int      rk_step,
        Limiting_store **limit_store)
{
        RECT_GRID *gr = fr->rect_grid;
        TRI       *tri, *crsp_tri, *nbtri[3], *sten_tris[50], *tmptri, *new_cand[50];
        SURFACE   **surf;
        int       dim = 2, i, j, k, indx, side, pre_con_method = 1;
        float     *cent, B0[3];
        size_t    sizest = fr->sizest;
        Locstate  st;
        int       nn_num, repetition, N_use = 0, in_list, new_nn_num = 0, num;
        int       debug = NO, vt_adj;
        POINT     *pt[3];
        double    **con_u = NULL;

        // printf("ENTER B_edge_reconstruction()");

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf);
                 !at_end_of_tri_list(tri,*surf); tri = tri->next)
            {
                cent = fg_centroid(tri);
                if(tri->BC_type == SUBDOMAIN)
                    continue;
                if(MAX_N_COEF == 3)
                {
                    edge_B_P1_polynomial_reconstruction(tri, midsoln,limit_store,rk_step);
                    continue;
                }

                if(MAX_N_COEF == 6)
                {
                    edge_B_P2_polynomial_reconstruction(tri, midsoln,limit_store,rk_step);
                    continue;
                }

                printf("ERROR: implement: B_edge_reconstruction(), case MAX_N_COEF = %d\n", MAX_N_COEF);
                clean_up(ERROR);

            }
        }
}

//// Construct P2 poly of Magnetic field on tri by normal edge values.
//// Conservation is NOT enforced on tri and its neighbors. 
//// We also use the fact a1+b2=0.0 so that we solve 5 by 5 system.
////  b2 = -a1 relation is used.
////  The solution of the linear system are coefficients in the order of
////  a0; a1; a2; b0; b1; b2;
////  with: B_x= a0 + a1 (x-x_c) + a2 (y-y_c);
////        B_y= b0 + b1 (x-x_c) + b2 (y-y_c);
////  Do not form loops 
//// The solution is set to be 0 for debugging the hydro code
LOCAL void edge_B_P2_polynomial_reconstruction(
         TRI       *tri,
         Mid_soln *midsoln,
         Limiting_store **limit_store,
         int      rk_step)
{
        TRI       *Bnbtri[3], *tmp_tri, *tris[20];
        int       side, i, tmp_side, j;
        static int dim = 2;
        Locstate  st, nbst[3];
        // double    t[3], nor[3], *pcrds[3];
        double    Moment[2][MAX_N_COEF];
        double    Bn[2], dgBn[3], new_slop, snd_mom_vari;

        for(side = 0; side < 3; side++)
        {
            Bnbtri[side] = Tri_on_side(tri,side);
        }

        if(rk_step == RK_STEP)
        {
            st = tri->st;
            for(side = 0; side < 3; side++)
                nbst[side] = Bnbtri[side]->st;
        }
        else
        {
            st = midsoln[tri->id].st[rk_step];
            for(side = 0; side < 3; side++)
                nbst[side] = midsoln[Bnbtri[side]->id].st[rk_step];
        }

        //// TMP
        /***
        if(tri->id == 232)
        {
            printf("\n\n-------TRI[%d] in edge_B_P2_polynomial_reconstruction()\n", tri->id);
        }
        ***/

        for(side = 0; side < 3; side++)
        {
            for(tmp_side = 0; tmp_side < 3; tmp_side++)
            {
                if(tri == Tri_on_side(Bnbtri[side],tmp_side))
                    break;
            }

            if(rk_step == RK_STEP)
            {
                Bn[0] = fg_side_B(tri)[side];
                Bn[1] = fg_side_B(Bnbtri[side])[tmp_side];
                for(j = 0; j < N_COEF_EDGE; j++)
                    dgBn[j] = fg_side_dgB(tri)[side][j];    
            }
            else
            {
                Bn[0] = midsoln[tri->id].edge_Bn[side][rk_step];
                Bn[1] = midsoln[Bnbtri[side]->id].edge_Bn[tmp_side][rk_step];
                for(j = 0; j < N_COEF_EDGE; j++)
                    dgBn[j] = midsoln[tri->id].edge_dgBn[side][j][rk_step];    
            }


            from_cell_norB_along_edge_P2(tri,midsoln,rk_step,side,YES,Moment[0]);
            from_cell_norB_along_edge_P2(Bnbtri[side],midsoln,rk_step,tmp_side,NO,Moment[1]);
            new_slop = minmod(Moment[0][1], Moment[1][1]);
            // new_slop = minmod(dgBn[1], new_slop); // add slop from DG into consideration
            snd_mom_vari = minmod(Moment[0][2], Moment[1][2]);
            // snd_mom_vari = minmod(dgBn[2], snd_mom_vari); // add slop from DG into consideration

            if(rk_step == RK_STEP)
            {
                fg_side_dgB(tri)[side][0] = Bn[0];
                fg_side_dgB(tri)[side][1] = new_slop;
                fg_side_dgB(tri)[side][2] = snd_mom_vari;
            }
            else
            {
                midsoln[tri->id].edge_dgBn[side][0][rk_step] = Bn[0];
                midsoln[tri->id].edge_dgBn[side][1][rk_step] = new_slop;
                midsoln[tri->id].edge_dgBn[side][2][rk_step] = snd_mom_vari;
            }
        }
        // printf("EXIT in edge_B_P2_polynomial_reconstruction()\n");
        // exit(0);
}


//// Construct P1 poly of Magnetic field on tri by normal edge values.
//// Conservation is NOT enforced on tri and its neighbors. 
//// We also use the fact a1+b2=0.0 so that we solve 5 by 5 system.
////  b2 = -a1 relation is used.
////  The solution of the linear system are coefficients in the order of
////  a0; a1; a2; b0; b1; b2;
////  with: B_x= a0 + a1 (x-x_c) + a2 (y-y_c);
////        B_y= b0 + b1 (x-x_c) + b2 (y-y_c);
////  Do not form loops 
//// The solution is set to be 0 for debugging the hydro code
LOCAL void edge_B_P1_polynomial_reconstruction(
         TRI       *tri,
         Mid_soln *midsoln,
         Limiting_store **limit_store,
         int      rk_step)
{
        TRI       *Bnbtri[3], *tmp_tri, *tris[20];
        int       side, i, tmp_side;
        static int dim = 2;
        Locstate  st, nbst[3];
        double    t[3], nor[3], *pcrds[3];
        double    Moment[2][MAX_N_COEF];
        double    Bn[2], new_slop;

        for(side = 0; side < 3; side++)
        {
            Bnbtri[side] = Tri_on_side(tri,side);
            pcrds[side] = Coords(Point_of_tri(tri)[side]);
        }

        if(rk_step == RK_STEP)
        {
            st = tri->st;
            for(side = 0; side < 3; side++)
                nbst[side] = Bnbtri[side]->st;
        }
        else
        {
            st = midsoln[tri->id].st[rk_step];
            for(side = 0; side < 3; side++)
                nbst[side] = midsoln[Bnbtri[side]->id].st[rk_step];
        }

        for(side = 0; side < 3; side++)
        {
            // for(i = 0; i < dim; i++)
            //     t[i] = fg_side_vector(tri)[side][i];
            // nor[0] = t[1];
            // nor[1] = -t[0];
            for(tmp_side = 0; tmp_side < 3; tmp_side++)
            {
                if(tri == Tri_on_side(Bnbtri[side],tmp_side))
                    break;
            }

            if(rk_step == RK_STEP)
            {
                // Bn[0] = fg_side_B(tri)[side];
                // Bn[1] = fg_side_B(Bnbtri[side])[tmp_side];
                ////TMP
                Bn[0] = fg_side_dgB(tri)[side][0];
                Bn[1] = fg_side_dgB(Bnbtri[side])[tmp_side][0];
            }
            else
            {
                // Bn[0] = midsoln[tri->id].edge_Bn[side][rk_step];
                // Bn[1] = midsoln[Bnbtri[side]->id].edge_Bn[tmp_side][rk_step];
                ////TMP
                Bn[0] = midsoln[tri->id].edge_dgBn[side][0][rk_step];
                Bn[1] = midsoln[Bnbtri[side]->id].edge_dgBn[tmp_side][0][rk_step];
            }

            from_cell_norB_along_edge_P1(tri,midsoln,rk_step,side,YES,Moment[0]);
            from_cell_norB_along_edge_P1(Bnbtri[side],midsoln,rk_step,tmp_side,NO,Moment[1]);
            // Moment[1][0] *= 1.0;
            // Moment[1][1] *= 1.0;
            // printf("side[%d], tri_edge_Bm[%e, %e], nb_Bm[%e, %e], Bn0 = %e; Bn1 = %e\n", side, Moment[0][0], Moment[0][1],
            //        Moment[1][0], Moment[1][1], Bn[0], Bn[1]);
            new_slop = minmod(Moment[0][1], Moment[1][1]);

            if(rk_step == RK_STEP)
            {
                fg_side_dgB(tri)[side][0] = Bn[0];
                fg_side_dgB(tri)[side][1] = new_slop;
            }
            else
            {
                midsoln[tri->id].edge_dgBn[side][0][rk_step] = Bn[0];
                midsoln[tri->id].edge_dgBn[side][1][rk_step] = new_slop;
            }
            // printf("edge_B_P1_polynomial_reconstruction(), side[%d], tri[%d], nbtri[%d]\n", side, tri->id, Bnbtri[side]->id);
        }
        // printf("WARNING: edge_B_P1_polynomial_reconstruction(), stop\n");
        // clean_up(0);
}


// the equation of the edge is: P(t) = (P1 + P0)/2 + (P1-P0)/2*t;
// t in[-1, 1].
// P1 Legendre polynomial is used to represent Bn field defined on the edge. 
// Bn(t) = a + b*t.  t in [-1, 1].
LOCAL void from_cell_norB_along_edge_P1(
        TRI       *tri,
        Mid_soln  *midsoln,
        int       rk_step,
        int       side,
        int       is_counter_clock,
        double    *Moment)
{
        double     t[3], nor[3], *pcrds[3];
        int        i;
        static int dim = 2;
        float      *cent = fg_centroid(tri);
        Locstate   st;
        double     Bx[MAX_N_COEF], By[MAX_N_COEF];

        if(rk_step == RK_STEP)
            st = tri->st;
        else
            st = midsoln[tri->id].st[rk_step];

        if(YES == is_counter_clock)
        {
            pcrds[0] = Coords(Point_of_tri(tri)[side]);
            pcrds[1] = Coords(Point_of_tri(tri)[(side+1)%3]);

            for(i = 0; i < dim; i++)
                t[i] = fg_side_vector(tri)[side][i];

            nor[0] = t[1];
            nor[1] = -t[0];
        }
        else
        {
            pcrds[1] = Coords(Point_of_tri(tri)[side]);
            pcrds[0] = Coords(Point_of_tri(tri)[(side+1)%3]);

            for(i = 0; i < dim; i++)
                t[i] = fg_side_vector(tri)[side][i];

            nor[0] = -t[1];
            nor[1] = t[0];
        }

        Bx[0] = dg_B(st)[0][0] + dg_B(st)[0][1]*( 0.5*(pcrds[0][0] + pcrds[1][0]) - cent[0]) +
                dg_B(st)[0][2]*( 0.5*(pcrds[0][1] + pcrds[1][1]) - cent[1]);
        Bx[1] = 0.5*(dg_B(st)[0][1]*(pcrds[1][0] - pcrds[0][0]) + dg_B(st)[0][2]*(pcrds[1][1] - pcrds[0][1]) );
        ////////////////
        By[0] = dg_B(st)[1][0] + dg_B(st)[1][1]*( 0.5*(pcrds[0][0] + pcrds[1][0]) - cent[0]) +
                dg_B(st)[1][2]*( 0.5*(pcrds[0][1] + pcrds[1][1]) - cent[1]);
        By[1] = 0.5*(dg_B(st)[1][1]*(pcrds[1][0] - pcrds[0][0]) + dg_B(st)[1][2]*(pcrds[1][1] - pcrds[0][1]) );

        for(i = 0; i < 2; i++)
        {
            Moment[i] = Bx[i]*nor[0] + By[i]*nor[1];
        }

        // printf("Normal[%g, %g]\n", nor[0], nor[1]);
}


// the equation of the edge is: P(t) = (P1 + P0)/2 + (P1-P0)/2*t;
// t in[-1, 1].
// P2 Legendre polynomial is used to represent Bn field defined on the edge. 
// Bn(t) = a + b*t + c*(3t^2 - 1)/2.  t in [-1, 1]. 
LOCAL void from_cell_norB_along_edge_P2(
        TRI       *tri,
        Mid_soln  *midsoln,
        int       rk_step,
        int       side,
        int       is_counter_clock,
        double    *Moment)
{
        double     t[3], nor[3], *pcrds[3];
        int        i;
        static int dim = 2;
        float      *cent = fg_centroid(tri);
        Locstate   st;
        double     Bx[MAX_N_COEF], By[MAX_N_COEF];
        double     xav, yav, dx, dy;

        if(rk_step == RK_STEP)
            st = tri->st;
        else
            st = midsoln[tri->id].st[rk_step];

        if(YES == is_counter_clock)
        {
            pcrds[0] = Coords(Point_of_tri(tri)[side]);
            pcrds[1] = Coords(Point_of_tri(tri)[(side+1)%3]);

            for(i = 0; i < dim; i++)
                t[i] = fg_side_vector(tri)[side][i];

            nor[0] = t[1];
            nor[1] = -t[0];
        }
        else
        {
            pcrds[1] = Coords(Point_of_tri(tri)[side]);
            pcrds[0] = Coords(Point_of_tri(tri)[(side+1)%3]);

            for(i = 0; i < dim; i++)
                t[i] = fg_side_vector(tri)[side][i];

            nor[0] = -t[1];
            nor[1] = t[0];
        }

        xav = 0.5*(pcrds[0][0] + pcrds[1][0]);
        yav = 0.5*(pcrds[0][1] + pcrds[1][1]);
        dx =  0.5*(pcrds[1][0] - pcrds[0][0]);
        dy =  0.5*(pcrds[1][1] - pcrds[0][1]);

        Bx[0] = dg_B(st)[0][0] + dg_B(st)[0][1]*(xav - cent[0]) +
                                 dg_B(st)[0][2]*(yav - cent[1]) +
                dg_B(st)[0][3]*sqr(xav - cent[0]) + dg_B(st)[0][4]*(xav - cent[0])*(yav - cent[1]) +
                dg_B(st)[0][5]*sqr(yav - cent[1]);
        Bx[1] = dg_B(st)[0][1]*dx + dg_B(st)[0][2]*dy +
                2.0*dg_B(st)[0][3]*(xav - cent[0])*dx +
                dg_B(st)[0][4]*((xav - cent[0])*dy + (yav - cent[1])*dx) +
                2.0*dg_B(st)[0][5]*(yav - cent[1])*dy;
        Bx[2] = dg_B(st)[0][3]*sqr(dx) + dg_B(st)[0][4]*dx*dy + dg_B(st)[0][5]*sqr(dy);
        // change basis to (3t^2-1)/2
        Bx[2] /= 3.0;
        Bx[0] += Bx[2];
        Bx[2] *= 2.0;
        ////////////////
        By[0] = dg_B(st)[1][0] + dg_B(st)[1][1]*(xav - cent[0]) +
                                 dg_B(st)[1][2]*(yav - cent[1]) +
                dg_B(st)[1][3]*sqr(xav - cent[0]) + dg_B(st)[1][4]*(xav - cent[0])*(yav - cent[1]) +
                dg_B(st)[1][5]*sqr(yav - cent[1]);
        By[1] = dg_B(st)[1][1]*dx + dg_B(st)[1][2]*dy +
                2.0*dg_B(st)[1][3]*(xav - cent[0])*dx +
                dg_B(st)[1][4]*((xav - cent[0])*dy + (yav - cent[1])*dx) +
                2.0*dg_B(st)[1][5]*(yav - cent[1])*dy;
        By[2] = dg_B(st)[1][3]*sqr(dx) + dg_B(st)[1][4]*dx*dy + dg_B(st)[1][5]*sqr(dy);

        // change basis to (3t^2-1)/2
        By[2] /= 3.0;
        By[0] += By[2];
        By[2] *= 2.0;

        for(i = 0; i < 3; i++)
        {
            Moment[i] = Bx[i]*nor[0] + By[i]*nor[1];
        }

        // printf("Normal[%g, %g]\n", nor[0], nor[1]);
}


EXPORT void impose_blast_MHD_tri_edge_B_buffer(
        Front     *fr,
        Mid_soln  *mid_soln,
        int       rk_step,
        double    time)
{
        TRI       *tri, *crsp_tri;
        SURFACE   **surf;
        double    rhs[10][MAX_N_COEF], mulrhs[10][MAX_N_COEF], dens[10], tmp;
        double       area, sqrt_area;
        int          i, j, k, indx, side, debug = NO;
        double       *pcrds[3];
        double       crds[16][2], v_conU[3][8], *cent;
        double       qcrds[3], v2[3], nor[3], Bn[10]; 
        Locstate     state;
        static int          loc_Lobatto_N = 7, first = YES;
        static double       loc_q[9], loc_qw[9];
        double       econ_u[10], soln_Bn[3], soln_dgBn[3][N_COEF_EDGE]; 
        RECT_GRID *gr = fr->rect_grid;
        float     *L = gr->L, *U = gr->U;
    
        if(loc_Lobatto_N == 7 && YES == first)
        {
            first = NO;
            loc_q[0] = -1.0; loc_q[1] = -0.83022389627856692987; 
            loc_q[2] = -0.46884879347071421380; loc_q[3]  = 0.0;
            loc_q[4] = 0.46884879347071421380; loc_q[5] = 0.83022389627856692987;
            loc_q[6] = 1.0;

            loc_qw[0] =  0.04761904761904761905;
            loc_qw[1] =  0.27682604736156594801;
            loc_qw[2] =  0.43174538120986262342;
            loc_qw[3] =  0.48761904761904761905;
            loc_qw[4] =  0.43174538120986262342;
            loc_qw[5] =  0.27682604736156594801;
            loc_qw[6] =  0.04761904761904761905;
        }

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf);
                 !at_end_of_tri_list(tri,*surf); tri = tri->next)
            {
                if(tri->BC_type != SUBDOMAIN)
                    continue;

                cent = fg_centroid(tri);

                /* 01-28-2015 added*/
                if(cent[0] > gr->GL[0] && cent[0] < gr->GU[0] &&
                   cent[1] > gr->GL[1] && cent[1] < gr->GU[1])
                    continue;
                /* END::: 01-28-2015 added*/

                area = fg_area(tri);
                sqrt_area = sqrt(area);

                for(i = 0; i < 3; i++)
                    pcrds[i] = Coords(Point_of_tri(tri)[i]);

                for(side = 0; side < 3; side++)
                {
                    for(i = 0; i < 2; i++)
                        v2[i] = fg_side_vector(tri)[side][i];
                    nor[0] = v2[1];
                    nor[1] = -v2[0];

                    soln_Bn[side] = 0.0;
                    for(indx = 0; indx < N_COEF_EDGE; indx++)
                        soln_dgBn[side][indx] = 0.0;

                    for(k = 0; k < loc_Lobatto_N; k++)
                    {
                        for(i = 0; i < 2; i++)
                            qcrds[i] = (pcrds[(side+1)%3][i] + pcrds[side][i])/2.0 +
                                       (pcrds[(side+1)%3][i] - pcrds[side][i])/2.0*loc_q[k];
                        for(j = 5; j < N_EQN; j++)
                            v_conU[side][j] = ff_accurate_magnetic_blast(NULL, qcrds,j,YES);
                        // mag_vort_sol(qcrds,time,econ_u);

                        Bn[k] = nor[0]*v_conU[side][5] + nor[1]*v_conU[side][6];

                        // fg_side_B(tri)[side] += Bn[k]*loc_qw[k];
                        soln_Bn[side] += Bn[k]*loc_qw[k];

                        for(indx = 0; indx < N_COEF_EDGE; indx++)
                        {
                            // fg_side_dgB(tri)[side][indx] += loc_qw[k]*Bn[k]*vh_val_ver2(loc_q[k],indx);//rhs
                            soln_dgBn[side][indx] += loc_qw[k]*Bn[k]*vh_val_ver2(loc_q[k],indx);//rhs
                        }
                    }
                    // fg_side_B(tri)[side] *= 0.5;
                    soln_Bn[side] *= 0.5;
                    for(indx = 0; indx < N_COEF_EDGE; indx++)
                    {
                        // fg_side_dgB(tri)[side][indx] = fg_side_dgB(tri)[side][indx]*(2.0*indx+1.0)/2.0;
                        soln_dgBn[side][indx] = soln_dgBn[side][indx]*(2.0*indx+1.0)/2.0;
                    }
                } //// END::: for(side = 0; side < 3; side++)

                if(rk_step == RK_STEP)
                {
                    for(side = 0; side < 3; side++)
                    {
                        fg_side_B(tri)[side] = soln_Bn[side];
                        for(indx = 0; indx < N_COEF_EDGE; indx++)
                            fg_side_dgB(tri)[side][indx] = soln_dgBn[side][indx];
                    }
                }
                else
                {
                    for(side = 0; side < 3; side++)
                    {
                        mid_soln[tri->id].edge_Bn[side][rk_step] = soln_Bn[side];
                        for(indx = 0; indx < N_COEF_EDGE; indx++)
                            mid_soln[tri->id].edge_dgBn[side][indx][rk_step] = soln_dgBn[side][indx];
                    }
                }
            }/// END::: for (tri = first_tri(*surf);
        }
}

// impose_blast_MHD_tri_edge_B_buffer(fr,midsoln,rk_step, time)
EXPORT void impose_blast_MHD_tri_cell_phys_buffer(
        Front     *fr,
        Mid_soln  *mid_soln,
        int       rk_step,
        double    time)
{
        RECT_GRID *gr = fr->rect_grid;
        float     *L = gr->L, *U = gr->U;
        float     crsp_cent[3], dbcent[2];
        TRI       *tri, *otri;
        SURFACE   **surf;
        BDRY_SIDE side;
        int       dim = 2, i, in_buf;
        double    *cent;
        size_t    sizest = fr->sizest;
        float     nor[2] = {0.0, 1.0}, pt[2];
        double    *crds[2][3], len;
        Locstate  st, ost;
        int       debug = NO;

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf);
                 !at_end_of_tri_list(tri,*surf); tri = tri->next)
            {
                if(tri->BC_type != SUBDOMAIN)
                    continue;

                cent = fg_centroid(tri);

                /*01-28-2015*/
                if(cent[0] > gr->GL[0] && cent[0] < gr->GU[0] &&
                   cent[1] > gr->GL[1] && cent[1] < gr->GU[1])
                    continue;
                /*END::: 01-28-2015*/

                if(rk_step == RK_STEP)
                {
                    st = tri->st;
                    // assign(st, otri->st, fr->sizest);
                }
                else
                {
                    st = mid_soln[tri->id].st[rk_step];
                   //  assign(st, otri->st, fr->sizest);
                }
                assign_buf_state_blast_MHD(st, tri, mid_soln, rk_step, NO);

                // if(mid_soln != NULL)
                //     assign(mid_soln[tri->id].st[0], st, fr->sizest);
            }
        }
}

EXPORT void update_center_and_edge_phys_buffer(
        Front     *oldfr,
        Front     *fr,
        Mid_soln  *mid_soln,
        int       rk_step)
{
        RECT_GRID *gr = fr->rect_grid;
        float     *L = gr->L, *U = gr->U;
        float     crsp_cent[3], dbcent[2];
        TRI       *tri, *otri;
        SURFACE   **surf, **osurf;
        BDRY_SIDE side;
        int       dim = 2, i, in_buf;
        double    *cent;
        size_t    sizest = fr->sizest;
        float     nor[2] = {0.0, 1.0}, pt[2];
        double    *crds[2][3], len;
        Locstate  st, ost;
        int       debug = NO;

        for(osurf = oldfr->mesh->surfaces, surf = fr->mesh->surfaces; surf && *surf; surf++, osurf++)
        {
            for (tri = first_tri(*surf), otri = first_tri(*osurf);
                 !at_end_of_tri_list(tri,*surf); tri = tri->next, otri = otri->next)
            {
                if(tri->BC_type != SUBDOMAIN)
                    continue;

                cent = fg_centroid(tri);

                /*01-28-2015*/
                if(cent[0] > gr->GL[0] && cent[0] < gr->GU[0] &&
                   cent[1] > gr->GL[1] && cent[1] < gr->GU[1])
                    continue;
                /*END::: 01-28-2015*/

                if(rk_step == RK_STEP)
                {
                    st = tri->st;
                    assign(st, otri->st, fr->sizest);
                }
                else
                {
                    st = mid_soln[tri->id].st[rk_step];
                    assign(st, otri->st, fr->sizest);
                }
                assign_buf_state_blast_MHD(st, tri, mid_soln, rk_step, YES);

                if(mid_soln != NULL)
                    assign(mid_soln[tri->id].st[0], st, fr->sizest); 
            }
        }
}

LOCAL void assign_buf_state_blast_MHD(
	Locstate    state,
        TRI         *tri,
        Mid_soln    *mid_soln,
        int         rk_step,
        int         assign_edge_flag)
{
        double          cent[3], v2[3], nor[3];
        POINT           *p[3];
        float           *pcrds[3], dbcent[MAXD], qcrds[3];
        // float           Gam, S, gam = 1.4;
        double          rhs[8][MAX_N_COEF], mulrhs[8][MAX_N_COEF], dens[8], tmp;
        float           area = fg_area(tri), t = 0.0;
        double          **Bmass_inv, **Bmass_matrix;
        int             debug_flag = NO, i, j, k, indx, side;
        float a = 0.065130102902216, b = 0.869739794195568;
        float c = 0.312865496004875, d = 0.638444188569809;
        float e = 0.048690315425316, f = 0.260345966079038;
        float g = 0.479308067841923, third;
        float w1 =-0.149570044467670, w2 = 0.053347235608839,
              w3 = 0.175615257433204, w4 = 0.077113760890257;
        double       crds[16][2], v_conU[3][8];
        double       *lcent = fg_centroid(tri);
        static float w[16] ={0.144315607677787,0.095091634267285,0.095091634267285,0.095091634267285,
                             0.103217370534718, 0.103217370534718,0.103217370534718,
                             0.032458497623198,0.032458497623198,0.032458497623198,
                             0.027230314174435,0.027230314174435,0.027230314174435,
                             0.027230314174435,0.027230314174435,0.027230314174435};
        static double **tmp_mass_inv = NULL;
        int           loc_Lobatto_N = 7;
        long double   loc_q[9], loc_qw[9], Bn[9];
        double        tmpcent0[2] = {-3.33333, -3.66667}, dx, dy, sum, len;
        double        tmpcent1[2] = {0.666667, -1.6666667};

        if(loc_Lobatto_N == 7)
        {
            loc_q[0] = -1.0; loc_q[1] = -0.83022389627856692987;
            loc_q[2] = -0.46884879347071421380; loc_q[3]  = 0.0;
            loc_q[4] = 0.46884879347071421380; loc_q[5] = 0.83022389627856692987;
            loc_q[6] = 1.0;

            loc_qw[0] =  0.04761904761904761905;
            loc_qw[1] =  0.27682604736156594801;
            loc_qw[2] =  0.43174538120986262342;
            loc_qw[3] =  0.48761904761904761905;
            loc_qw[4] =  0.43174538120986262342;
            loc_qw[5] =  0.27682604736156594801;
            loc_qw[6] =  0.04761904761904761905;
        }
        else
        {
            /// To get the Lobatto Quadrature's Abscissas and Weight, ref:
            /// Abscissas and Weight Coefficients for Lobatto Quadrature.
            ///  H.H.Michels. 1963, Mathematics of Computation, Vol. 17, No. 83 (Jul., 1963), pp. 237-244.
            printf("ERROR: assign_buf_state_blast_MHD(), do not have Lobatto quadrature for %d\n",
                     loc_Lobatto_N);
            clean_up(ERROR);
        }

        if(NULL == tmp_mass_inv)
            matrix(&tmp_mass_inv, MAX_N_COEF, MAX_N_COEF, sizeof(double));

        third = 1.0/3.0;
        Bmass_matrix = tri->Bmass_matrix;
        if(tri->Bmass_inv != NULL)
            Bmass_inv = tri->Bmass_inv;
        else
        {
            Bmass_inv = tmp_mass_inv;
            inverse_matrix(Bmass_matrix,MAX_N_COEF,Bmass_inv);
        }

        for(i = 0; i < N_EQN; i++)
        {
            for(j = 0; j < MAX_N_COEF; j++)
                rhs[i][j] = 0.0;
        }

        for(i = 0; i < 3; i++)
        {
            p[i] = Point_of_tri(tri)[i];
            pcrds[i] = Coords(p[i]);
        }

        for(i = 0; i < 2; i++)
            cent[i] = dbcent[i] = fg_centroid(tri)[i];

        /// for 16 pts case
        /**
        tri_quadrature_16_pts(pcrds[0], pcrds[1], pcrds[2], crds);
        for(j = 0; j < N_EQN; j++)
        {
            rhs[j][0] = 0.0;
            for(i = 0; i < 16; i++)
            {
                rhs[j][0] += w[i]*ff_accurate_magnetic_blast(dbcent, crds[i],j,YES);
            }
        }
        **/
        for(j = 0; j < N_EQN; j++)
            rhs[j][0] = ff_accurate_magnetic_blast(dbcent, dbcent,j,YES); 

        for(indx = 1; indx < MAX_N_COEF; indx++)
        {
                dg_Dens(state)[indx] = 0.0;
                dg_Mom(state)[0][indx] = 0.0;
                dg_Mom(state)[1][indx] = 0.0;
                dg_Energy(state)[indx] = 0.0;
                dg_Mom(state)[2][indx] = 0.0;
                dg_B(state)[0][indx] =  0.0;
                dg_B(state)[1][indx] =  0.0;
                dg_B(state)[2][indx] =  0.0;
        }

        // Compute average soln
        Dens(state) = dg_Dens(state)[0] = rhs[0][0];
        Mom(state)[0] = dg_Mom(state)[0][0] = rhs[1][0];
        Mom(state)[1] = dg_Mom(state)[1][0] = rhs[2][0];
        Energy(state) = dg_Energy(state)[0] = rhs[3][0]; // energy with |B|
        Mom(state)[2] = dg_Mom(state)[2][0] = rhs[4][0];
        Mag(state)[0] = dg_B(state)[0][0] = rhs[5][0];
        Mag(state)[1] = dg_B(state)[1][0] = rhs[6][0];
        Mag(state)[2] = dg_B(state)[2][0] = rhs[7][0];

        if(NO == assign_edge_flag) return;

        /// We also need to init edge B
        if(N_EQN == 8)
        {
            if(rk_step == RK_STEP)
            {
                for(side=0; side < 3; side++)
                    fg_side_B(tri)[side] = 0.0;
            }
            else
            {
                for(side=0; side < 3; side++)
                    mid_soln[tri->id].edge_Bn[side][rk_step] = 0.0;
            }

            for(side=0; side < 3; side++)
            {
                for(i = 0; i < 2; i++)
                    v2[i] = fg_side_vector(tri)[side][i];
                nor[0] = v2[1];
                nor[1] = -v2[0];

                for(k = 0; k < loc_Lobatto_N; k++)
                {
                    for(i = 0; i < 2; i++)
                        qcrds[i] = (pcrds[(side+1)%3][i] + pcrds[side][i])/2.0 +
                               (pcrds[(side+1)%3][i] - pcrds[side][i])/2.0*loc_q[k];
                    for(j = 5; j < N_EQN; j++)
                        v_conU[side][j] = ff_accurate_magnetic_blast(dbcent, qcrds,j,NO);

                    Bn[k] = nor[0]*v_conU[side][5] + nor[1]*v_conU[side][6];
                    if(rk_step == RK_STEP)
                        fg_side_B(tri)[side] += Bn[k]*loc_qw[k];
                    else
                        mid_soln[tri->id].edge_Bn[side][rk_step] += Bn[k]*loc_qw[k];
                }
                // fg_side_B(tri)[side] *= fg_length_side(tri)[side]/2.0;
                if(rk_step == RK_STEP)
                    fg_side_B(tri)[side] *= 0.5;
                else
                    mid_soln[tri->id].edge_Bn[side][rk_step] *= 0.5;
            }
            /**
            tmp = fg_side_B(tri)[0]*fg_length_side(tri)[0] +
                  fg_side_B(tri)[1]*fg_length_side(tri)[1] +
                  fg_side_B(tri)[2]*fg_length_side(tri)[2];
            **/
        }
}

EXPORT void impose_mag_acc_vert_tri_edge_B_buffer(
        Front     *fr,
        Mid_soln  *mid_soln,
        int       rk_step,
        double    time)
{
        TRI       *tri, *crsp_tri;
        SURFACE   **surf;
        double    rhs[10][MAX_N_COEF], mulrhs[10][MAX_N_COEF], dens[10], tmp;
        double       area, sqrt_area;
        int          i, j, k, indx, side, debug = NO;
        double       *pcrds[3];
        double       crds[16][2], v_conU[3][8], *cent;
        double       qcrds[3], v2[3], nor[3], Bn[10];
        Locstate     state;
        static int          loc_Lobatto_N = 7, first = YES;
        static double       loc_q[9], loc_qw[9];
        double       econ_u[10], soln_Bn[3], soln_dgBn[3][N_COEF_EDGE]; 

        if(loc_Lobatto_N == 7 && YES == first)
        {
            first = NO;
            loc_q[0] = -1.0; loc_q[1] = -0.83022389627856692987;
            loc_q[2] = -0.46884879347071421380; loc_q[3]  = 0.0;
            loc_q[4] = 0.46884879347071421380; loc_q[5] = 0.83022389627856692987;
            loc_q[6] = 1.0;

            loc_qw[0] =  0.04761904761904761905;
            loc_qw[1] =  0.27682604736156594801;
            loc_qw[2] =  0.43174538120986262342;
            loc_qw[3] =  0.48761904761904761905;
            loc_qw[4] =  0.43174538120986262342;
            loc_qw[5] =  0.27682604736156594801;
            loc_qw[6] =  0.04761904761904761905;
        }

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf);
                 !at_end_of_tri_list(tri,*surf); tri = tri->next)
            {
                if(tri->BC_type != SUBDOMAIN)
                    continue;

                /***
                if(tri->id == 176)
                {
                    printf("impose_mag_acc_vert_tri_edge_B_buffer, set edges of tri[%d] = %p\n",
                         tri->id, tri);
                    debug = YES;
                }
                else 
                    debug = NO;
                ***/
 
                area = fg_area(tri);
                sqrt_area = sqrt(area);
                cent = fg_centroid(tri);

                for(i = 0; i < 3; i++)
                    pcrds[i] = Coords(Point_of_tri(tri)[i]);

                for(side = 0; side < 3; side++)
                {
                    for(i = 0; i < 2; i++)
                        v2[i] = fg_side_vector(tri)[side][i];
                    nor[0] = v2[1];
                    nor[1] = -v2[0];

                    soln_Bn[side] = 0.0; 
                    for(indx = 0; indx < N_COEF_EDGE; indx++)
                        soln_dgBn[side][indx] = 0.0;

                    for(k = 0; k < loc_Lobatto_N; k++)
                    {
                        for(i = 0; i < 2; i++)
                            qcrds[i] = (pcrds[(side+1)%3][i] + pcrds[side][i])/2.0 +
                                       (pcrds[(side+1)%3][i] - pcrds[side][i])/2.0*loc_q[k];
                        // for(j = 5; j < N_EQN; j++)
                        //     v_conU[side][j] = ff_accurate_magnetic_vort(cent, qcrds,j,YES);
                        mag_vort_sol(qcrds,time,econ_u);

                        Bn[k] = nor[0]*econ_u[5] + nor[1]*econ_u[6];
                        
                        /***
                        if(YES == debug)
                        {
                            printf("side[%d], quadrature[%d]=(%g,%g) Bn = %14.12g, loc_q = %15.14g\n", side, k, 
                                     qcrds[0], qcrds[1], Bn[k], loc_q[k]);
                        }
                        ****/

                        // fg_side_B(tri)[side] += Bn[k]*loc_qw[k];
                        soln_Bn[side] += Bn[k]*loc_qw[k];

                        for(indx = 0; indx < N_COEF_EDGE; indx++)
                        {
                            // fg_side_dgB(tri)[side][indx] += loc_qw[k]*Bn[k]*vh_val_ver2(loc_q[k],indx);//rhs
                            soln_dgBn[side][indx] += loc_qw[k]*Bn[k]*vh_val_ver2(loc_q[k],indx);//rhs
                        }
                    }
                    // fg_side_B(tri)[side] *= 0.5;
                    soln_Bn[side] *= 0.5;
                    for(indx = 0; indx < N_COEF_EDGE; indx++)
                    {
                        // fg_side_dgB(tri)[side][indx] = fg_side_dgB(tri)[side][indx]*(2.0*indx+1.0)/2.0;
                        soln_dgBn[side][indx] = soln_dgBn[side][indx]*(2.0*indx+1.0)/2.0;
                    }
                } //// END::: for(side = 0; side < 3; side++)

                if(rk_step == RK_STEP)
                {
                    for(side = 0; side < 3; side++)
                    {
                        fg_side_B(tri)[side] = soln_Bn[side];
                        for(indx = 0; indx < N_COEF_EDGE; indx++)
                            fg_side_dgB(tri)[side][indx] = soln_dgBn[side][indx];
                    }

                    /****
                    if(YES == debug)
                    {
                        // printf("impose_mag_acc_vert_tri_edge_B_buffer, set edges of tri[%d] = %p\n", 
                        //      tri->id, tri);
                        for(side = 0; side < 3; side++)
                        {
                            printf("Side[%d] dgB = %g, %g, avg = %g\n",
                                  side, soln_dgBn[side][0], soln_dgBn[side][1], soln_Bn[side]);
                        }
                        printf("\n");
                    }
                    ****/
                }
                else
                {
                    for(side = 0; side < 3; side++)
                    {
                        mid_soln[tri->id].edge_Bn[side][rk_step] = soln_Bn[side];
                        for(indx = 0; indx < N_COEF_EDGE; indx++)
                            mid_soln[tri->id].edge_dgBn[side][indx][rk_step] = soln_dgBn[side][indx];
                    }
                }
            }/// END::: for (tri = first_tri(*surf);
        }
}

EXPORT void impose_decay_alfven_tri_edge_B_buffer(
        Front     *fr,
        Mid_soln  *mid_soln,
        int       rk_step,
        double    time)
{
        TRI       *tri, *crsp_tri;
        SURFACE   **surf;
        double    rhs[10][MAX_N_COEF], mulrhs[10][MAX_N_COEF], dens[10], tmp;
        double       area, sqrt_area;
        int          i, j, k, indx, side, debug = NO;
        double       *pcrds[3];
        double       crds[16][2], v_conU[3][8], *cent;
        double       qcrds[3], v2[3], nor[3], Bn[10];
        Locstate     state;
        static int          loc_Lobatto_N = 7, first = YES;
        static double       loc_q[9], loc_qw[9];
        double       econ_u[10], soln_Bn[3], soln_dgBn[3][N_COEF_EDGE]; 

        if(loc_Lobatto_N == 7 && YES == first)
        {
            first = NO;
            loc_q[0] = -1.0; loc_q[1] = -0.83022389627856692987;
            loc_q[2] = -0.46884879347071421380; loc_q[3]  = 0.0;
            loc_q[4] = 0.46884879347071421380; loc_q[5] = 0.83022389627856692987;
            loc_q[6] = 1.0;

            loc_qw[0] =  0.04761904761904761905;
            loc_qw[1] =  0.27682604736156594801;
            loc_qw[2] =  0.43174538120986262342;
            loc_qw[3] =  0.48761904761904761905;
            loc_qw[4] =  0.43174538120986262342;
            loc_qw[5] =  0.27682604736156594801;
            loc_qw[6] =  0.04761904761904761905;
        }

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf);
                 !at_end_of_tri_list(tri,*surf); tri = tri->next)
            {
                if(tri->BC_type != SUBDOMAIN)
                    continue;

                /***
                if(tri->id == 176)
                {
                    printf("impose_mag_acc_vert_tri_edge_B_buffer, set edges of tri[%d] = %p\n",
                         tri->id, tri);
                    debug = YES;
                }
                else 
                    debug = NO;
                ***/
 
                area = fg_area(tri);
                sqrt_area = sqrt(area);
                cent = fg_centroid(tri);

                for(i = 0; i < 3; i++)
                    pcrds[i] = Coords(Point_of_tri(tri)[i]);

                for(side = 0; side < 3; side++)
                {
                    for(i = 0; i < 2; i++)
                        v2[i] = fg_side_vector(tri)[side][i];
                    nor[0] = v2[1];
                    nor[1] = -v2[0];

                    soln_Bn[side] = 0.0; 
                    for(indx = 0; indx < N_COEF_EDGE; indx++)
                        soln_dgBn[side][indx] = 0.0;

                    for(k = 0; k < loc_Lobatto_N; k++)
                    {
                        for(i = 0; i < 2; i++)
                            qcrds[i] = (pcrds[(side+1)%3][i] + pcrds[side][i])/2.0 +
                                       (pcrds[(side+1)%3][i] - pcrds[side][i])/2.0*loc_q[k];
                        // mag_vort_sol(qcrds,time,econ_u);
                        accurate_magnetic_decay_alfven_soln(qcrds,time,econ_u);

                        Bn[k] = nor[0]*econ_u[5] + nor[1]*econ_u[6];
                        
                        /***
                        if(YES == debug)
                        {
                            printf("side[%d], quadrature[%d]=(%g,%g) Bn = %14.12g, loc_q = %15.14g\n", side, k, 
                                     qcrds[0], qcrds[1], Bn[k], loc_q[k]);
                        }
                        ****/

                        // fg_side_B(tri)[side] += Bn[k]*loc_qw[k];
                        soln_Bn[side] += Bn[k]*loc_qw[k];

                        for(indx = 0; indx < N_COEF_EDGE; indx++)
                        {
                            // fg_side_dgB(tri)[side][indx] += loc_qw[k]*Bn[k]*vh_val_ver2(loc_q[k],indx);//rhs
                            soln_dgBn[side][indx] += loc_qw[k]*Bn[k]*vh_val_ver2(loc_q[k],indx);//rhs
                        }
                    }
                    // fg_side_B(tri)[side] *= 0.5;
                    soln_Bn[side] *= 0.5;
                    for(indx = 0; indx < N_COEF_EDGE; indx++)
                    {
                        // fg_side_dgB(tri)[side][indx] = fg_side_dgB(tri)[side][indx]*(2.0*indx+1.0)/2.0;
                        soln_dgBn[side][indx] = soln_dgBn[side][indx]*(2.0*indx+1.0)/2.0;
                    }
                } //// END::: for(side = 0; side < 3; side++)

                if(rk_step == RK_STEP)
                {
                    for(side = 0; side < 3; side++)
                    {
                        fg_side_B(tri)[side] = soln_Bn[side];
                        for(indx = 0; indx < N_COEF_EDGE; indx++)
                            fg_side_dgB(tri)[side][indx] = soln_dgBn[side][indx];
                    }

                    /****
                    if(YES == debug)
                    {
                        // printf("impose_mag_acc_vert_tri_edge_B_buffer, set edges of tri[%d] = %p\n", 
                        //      tri->id, tri);
                        for(side = 0; side < 3; side++)
                        {
                            printf("Side[%d] dgB = %g, %g, avg = %g\n",
                                  side, soln_dgBn[side][0], soln_dgBn[side][1], soln_Bn[side]);
                        }
                        printf("\n");
                    }
                    ****/
                }
                else
                {
                    for(side = 0; side < 3; side++)
                    {
                        mid_soln[tri->id].edge_Bn[side][rk_step] = soln_Bn[side];
                        for(indx = 0; indx < N_COEF_EDGE; indx++)
                            mid_soln[tri->id].edge_dgBn[side][indx][rk_step] = soln_dgBn[side][indx];
                    }
                }
            }/// END::: for (tri = first_tri(*surf);
        }
}

// periodic BC on both sides
// Assume we use structured mesh. edges are one-to-one correspondent.
EXPORT void update_edge_B_buffer(
	Front     *fr,
	Mid_soln  *mid_soln,
        int       rk_step)
{
        RECT_GRID *gr = fr->rect_grid;
        float     *L = gr->L, *U = gr->U;
        float     crsp_cent[3], dbcent[2];
        TRI       *tri, *crsp_tri;
        SURFACE   **surf;
        BDRY_SIDE side;
        int       dim = 2, i, indx, in_buf;
        double    *cent;
        size_t    sizest = fr->sizest;
        float     nor[2] = {0.0, 1.0}, pt[2];
        double    *crds[2][3], len;
        int       debug = NO;

        // First update left and right side 
        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf);
                 !at_end_of_tri_list(tri,*surf); tri = tri->next)
            {
                if(tri->BC_type != SUBDOMAIN)
                    continue;
                /***
                /// TMP
                if(tri->id == 8 || tri->id == 7 || tri->id == 1127)
                // if(tri->id == 1127)
                {
                    printf("\ntri[%d] in update_edge_B_buffer() X-direction, L[%g, %g], U[%g, %g]\n\n",
                           tri->id, L[0], L[1], U[0], U[1]);
                    print_tri_crds(tri);
                    debug = YES;
                }
                else
                    debug = NO;
                /// END::: TMP
                ***/

                in_buf = NO;
                cent = fg_centroid(tri);
                if((L[0] > cent[0]) &&  (L[1] < cent[1]) && (U[1] > cent[1])
                  )
                {
                    in_buf = YES;
                    side = LEFT_BDRY;
                }
                if((U[0] < cent[0]) &&  (L[1] < cent[1]) && (U[1] > cent[1])
                  )
                {
                    in_buf = YES;
                    side = RIGHT_BDRY;
                }

                if(in_buf == YES)
                {
                    cent = fg_centroid(tri);
                    switch(side)
                    {
                    case LEFT_BDRY:
                        crsp_cent[0] = cent[0] + U[0]-L[0];
                        crsp_cent[1] = cent[1];
                    break;
                    case RIGHT_BDRY:
                        crsp_cent[0] = cent[0] - (U[0]-L[0]);
                        crsp_cent[1] = cent[1];
                    break;
                    }

                    crsp_tri = find_corres_tri(crsp_cent,fr->mesh);

                    if(YES == debug)
                    {
                        printf("\ntri[%d] receive from tri %d in X-direction\n\n", tri->id, crsp_tri->id);
                        print_tri_crds(crsp_tri);
                        for(i = 0; i < 3; i++)
                        {
                            printf(" side[%d] Bn [%14.12g, %14.12g]\n", i, 
                             fg_side_dgB(crsp_tri)[i][0], fg_side_dgB(crsp_tri)[i][1]);
                        }
                        printf("\n");
                    }

                    if(NULL == crsp_tri)
                    {
                        printf("ERROR: update_edge_B_buffer(), null tri found\n");
                        clean_up(ERROR);
                    }
                    // print_tri_crds();
                    // check consistency of labeling
                    for(i=0; i < 3; i++)
                    {
                        crds[0][i] = Coords(Point_of_tri(tri)[i]);
                        crds[1][i] = Coords(Point_of_tri(crsp_tri)[i]);
                    }
                    len = 0.0;
                    for(i=0; i < 3; i++)
                        len += distance_between_positions(crds[0][i],crds[1][i],2); 
                    if(fabs(len - 3.0*(U[0]-L[0])) > 1.0e-10)
                    {
                        printf("ERROR: update_edge_B_buffer(), edge correspondence is not consistent, x-direction\n");
                        printf("computed len = %g, domain length = %g\n", len, (U[0]-L[0]));
                        clean_up(ERROR);
                    }

                    if(rk_step == RK_STEP)
                    {
                        for(i = 0; i < 3; i++)
                        {
                            fg_side_B(tri)[i] = fg_side_B(crsp_tri)[i];
                            for(indx = 0; indx < N_COEF_EDGE; indx++)
                                fg_side_dgB(tri)[i][indx] = fg_side_dgB(crsp_tri)[i][indx];
                        }
                    }
                    else
                    {
                        for(i = 0; i < 3; i++)
                        {
                            mid_soln[tri->id].edge_Bn[i][rk_step] = mid_soln[crsp_tri->id].edge_Bn[i][rk_step];
                            for(indx = 0; indx < N_COEF_EDGE; indx++)
                                mid_soln[tri->id].edge_dgBn[i][indx][rk_step] = mid_soln[crsp_tri->id].edge_dgBn[i][indx][rk_step];
                        }
                    }
                }
            }
        } // END: First update left and right side

        // update upper and lower side
        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf);
                 !at_end_of_tri_list(tri,*surf); tri = tri->next)
            {
                if(tri->BC_type != SUBDOMAIN)
                    continue;
                /// TMP
                /***
                if(tri->id == 178)
                {
                    printf("++++++++ tri[%d] entered update_edge_B_buffer() in update upper and lower side part\n", tri->id);
                    debug = YES;
                }
                else
                    debug = NO; 
                /// TMP
                if(tri->id == 8 || tri->id == 7 || tri->id == 1127)
                {
                    printf("\ntri[%d] in update_edge_B_buffer() Y-direction, L[1] = %g, U[1] = %g\n\n", 
                               tri->id, L[1], U[1]);
                    print_tri_crds(tri);
                    debug = YES;
                }   
                else
                    debug = NO;
                /// END::: TMP
                ***/

                in_buf = NO;
                cent = fg_centroid(tri);
                if((L[1] > cent[1])
                  )
                {
                    in_buf = YES;
                    side = LOWER_BDRY;
                }
                if((U[1] < cent[1])
                  )
                {
                    in_buf = YES;
                    side = UPPER_BDRY;
                }
 
                if(in_buf == YES)
                {
                    cent = fg_centroid(tri);
                    switch(side)
                    {
                    case LOWER_BDRY:
                        crsp_cent[0] = cent[0];
                        crsp_cent[1] = cent[1] + U[1]-L[1];
                    break;
                    case UPPER_BDRY:
                        crsp_cent[0] = cent[0];
                        crsp_cent[1] = cent[1] - (U[1]-L[1]);
                    break;
                    }
                    crsp_tri = find_corres_tri(crsp_cent,fr->mesh);

                    if(NULL == crsp_tri)
                    {
                        printf("ERROR: update_edge_B_buffer(), null tri found\n");
                        clean_up(ERROR);
                    }
                   
                    /// TMP
                    if(debug == YES)
                    {
                        printf("\ntri[%d] receive from tri %d in Y-direction\n\n", tri->id, crsp_tri->id);
                        for(i = 0; i < 3; i++)
                        {
                            printf(" side[%d] Bn [%14.12g, %14.12g]\n", i, 
                                     fg_side_dgB(crsp_tri)[i][0], fg_side_dgB(crsp_tri)[i][1]);
                        }
                        printf("\n");
                    }

                    // print_tri_crds();
                    // check consistency of labeling
                    for(i=0; i < 3; i++)
                    {
                        crds[0][i] = Coords(Point_of_tri(tri)[i]);
                        crds[1][i] = Coords(Point_of_tri(crsp_tri)[i]);
                    }
                    len = 0.0;
                    for(i=0; i < 3; i++)
                        len += distance_between_positions(crds[0][i],crds[1][i],2);
                    if(fabs(len - 3.0*(U[1]-L[1])) > 1.0e-10)
                    {
                        printf("ERROR: update_edge_B_buffer(), edge correspondence is not consistent\n");
                        clean_up(ERROR);
                    }

                    if(rk_step == RK_STEP)
                    {
                        for(i = 0; i < 3; i++)
                        {
                            fg_side_B(tri)[i] = fg_side_B(crsp_tri)[i];
                            for(indx = 0; indx < N_COEF_EDGE; indx++)
                                fg_side_dgB(tri)[i][indx] = fg_side_dgB(crsp_tri)[i][indx];
                        }
                    }
                    else
                    {
                        for(i = 0; i < 3; i++)
                        {
                            mid_soln[tri->id].edge_Bn[i][rk_step] = mid_soln[crsp_tri->id].edge_Bn[i][rk_step];
                            for(indx = 0; indx < N_COEF_EDGE; indx++)
                                mid_soln[tri->id].edge_dgBn[i][indx][rk_step] = mid_soln[crsp_tri->id].edge_dgBn[i][indx][rk_step];
                        }
                    }

                    /// TMP
                    /***
                    if(debug == YES)
                    {
                        if(rk_step == RK_STEP)
                            printf("received Bn [%g, %g, %g]\n", fg_side_B(tri)[0], fg_side_B(tri)[1], fg_side_B(tri)[2]);
                        else
                            printf("received Bn [%g, %g, %g]\n", mid_soln[tri->id].edge_Bn[0][rk_step],
                                   mid_soln[tri->id].edge_Bn[1][rk_step],
                                   mid_soln[tri->id].edge_Bn[2][rk_step]);
                    }
                    ***/ 
                }
            }
        } 

}

/*
 Find the least square soln of overdetermined Ax = b.
 Here x is a 3 entry vector. A is a [][3] matrix. 
 Linear lease squares: x = (A'A)^(-1)A'b //Huijing 
*/
LOCAL void least_sqr_fit_linear_poly(
	double  *rhs, 
        double  ls_A[][3], 
        int     size_N, 
        double  *lin_fit)
{
        static  int   _size = 0;
        static double **mat = NULL, **mat_tran, **AA, **inv;
        int           i;
        double        rside2[20];
        
        if(_size == 0 || _size < size_N)
        {
            _size = size_N;
            if(mat != NULL)
            {
                free(mat); free(mat_tran); free(AA); free(inv);
            }
            matrix(&(mat), _size, 3, sizeof(double));
            matrix(&(mat_tran), 3, _size, sizeof(double));
            matrix(&(AA), 3, 3, sizeof(double));
            matrix(&(inv), 3, 3, sizeof(double));
        }
        
        for(i = 0; i < size_N; i++)
        {
            mat[i][0] = ls_A[i][0];
            mat[i][1] = ls_A[i][1];
            mat[i][2] = ls_A[i][2];
        }

        d_trans_matrix(mat, size_N, 3, mat_tran);
        matrix_matrix_mult_diff_size(mat_tran, mat,3, size_N, size_N, 3, AA);
        inverse_matrix(AA,3,inv);
        d_matrix_vec_mult(mat_tran, rhs, 3, size_N, rside2);
        d_matrix_vec_mult(inv, rside2, 3, 3, lin_fit);
}

LOCAL void adv_E_vertice_B_edge(        
	TRI      *tri,
        TRI      *otri,
        float    dt,
        float    alpha,
        Mid_soln *mid_soln,  
        int      rk_iter,
        Front    *fr)
{
        int      i, j, k, tri_N, closed, side, N_side, n_eqn, dim = 2;
        TRI      *tris[50]; 
        double   conU[50][10], tmpu[50], tmp_E_z, up[8], tmpe, sum, eps = 1.0e-6, wei[50];
        int      debug = NO, debug_flag = NO;
        Locstate prev_sts[50];
        double   *cents[50], therm_press[50], ls_A[50][3], *pt_crds, lin_fit_press[4];
        double   diam, beta = 0.5, p_grad_L1, therm_p_min, therm_p_max, p_nor[3], ang_wei[50], t[3], nor[3];
        int      p_switch = NO, N_side2, v_switch = NO, p_min_I[3];
        double   vel_u[50], vel_v[50], mag_c[50], lin_fit_u[4], lin_fit_v[4], mag_c_min;
        double   vel_div, delta = 0.1, tmp, tmp_alpha[50], max_alpha = -HUGE_VAL, max_alpha_new = - HUGE_VAL;
        double   half_tan[50], mid_ray[50][3], single_st_tmp_alpha[50];
        double   LLF_U_star[8], pre_tri_pt[3];

        // printf("\n**************Tri %d entered adv_E_vertice_B_edge(), on boundary; tri type %d\n",
        //                   tri->id, tri->BC_type);

        if(otri->BC_type == SUBDOMAIN)
        {
            /***
            if(tri->id == 235 || tri->id == 178)
            {
                printf("\n**************Tri %d entered adv_E_vertice_B_edge(), on boundary; tri type %d\n",
                          tri->id, tri->BC_type);
                print_tri_crds(tri);
                // verbose_print_state("old tri state",otri->st);
            }
            ***/
            return;
        }

        if(Boundary_tri(otri) || tri_on_phy_bdry(tri))
        {
            if (NO == bdry_tri_adv_fw(tri,otri,fr))
                return;
        }

        // if(tri->id == 1866 || tri->id == 2055)
        // if(tri->id == 2552 || tri->id == 2055)
        // if(tri->id == 3763 || tri->id ==844)
        // if(tri->id == 350 || tri->id == 405)
        // if(tri->id == 235 || tri->id == 1298)
        // if(tri->id == 16081)
        // if(tri->id == 17357)
        /***
        if(tri->id == 27062)
        {
            printf("\n**************Tri %d entered adv_E_vertice_B_edge()\n", tri->id);
            // print_tri_crds(tri);
            // verbose_print_state("old tri state",otri->st);
            debug = YES;
        }
        ***/

        /***
        if(otri->id == 27087)
        {
            Locstate prev_st;
            printf("***************************\n");
            printf("Tri[%d] entered In adv_E_vertice_B_edge(), step %d, iteration %d\n",
                         otri->id, fr->step, rk_iter);
            print_tri_crds(otri);
            if(rk_iter == 0)
                prev_st = otri->st;
            else
                prev_st = mid_soln[otri->id].st[rk_iter];
            verbose_print_state("state", prev_st);
        }
        ***/

        // incircle diam of the tri = 2.0*2.0*area/(sum of side length) //Huijing
        diam = fg_diam(otri);

        // for 3 points of tri
        for(k = 0; k < 3; k++)
        {
            if(Index_of_point(Point_of_tri(otri)[k]) == YES && debug == NO)
            // if(Index_of_point(Point_of_tri(otri)[k]) == YES)
                continue;

            // If the pt is fully surounded by tris, return YES; otherwise return NO;
            closed = collect_tris_at_vert(otri, Point_of_tri(otri)[k], tris, &tri_N);
            //////  Compute Pressure gradient by least square
            if(rk_iter == 0)
            {
                for(i = 0; i < tri_N; i++)
                    prev_sts[i] = tris[i]->st;
            }
            else
            {
                for(i = 0; i < tri_N; i++)
                    prev_sts[i] = mid_soln[tris[i]->id].st[rk_iter];
            }

            pt_crds = Coords(Point_of_tri(otri)[k]);
            therm_p_min = HUGE_VAL; therm_p_max = -HUGE_VAL;
            mag_c_min = HUGE_VAL;
            for(i = 0; i < tri_N; i++) 
            {
                cents[i] = fg_centroid(tris[i]);
                // gas pressure from equation of state
                therm_press[i] = POLY_thermal_pressure_MHD(prev_sts[i]);

                ls_A[i][0] = 1.0;
                ls_A[i][1] = cents[i][0] - pt_crds[0];
                ls_A[i][2] = cents[i][1] - pt_crds[1];
                if(therm_press[i] < therm_p_min)
                {
                    therm_p_min = therm_press[i];
                    p_min_I[k] = i;
                }
                if(therm_press[i] > therm_p_max)
                    therm_p_max = therm_press[i];

                vel_u[i] = Mom(prev_sts[i])[0]/Dens(prev_sts[i]);
                vel_v[i] = Mom(prev_sts[i])[1]/Dens(prev_sts[i]);
                mag_c[i] = POLY_magnetosonic_speed_MHD(prev_sts[i]);
                if(mag_c[i] < mag_c_min)
                    mag_c_min = mag_c[i];
            }
            // Linear lease squares: x = (A'A)^(-1)A'b //Huijing
            least_sqr_fit_linear_poly(therm_press, ls_A, tri_N, lin_fit_press);

            // SW1 //Huijing
            p_grad_L1 = diam*(fabs(lin_fit_press[1]) + fabs(lin_fit_press[2])); // undivided diff
            if(p_grad_L1 > (beta*therm_p_min))
                p_switch = YES;
            // else
            //     p_switch = NO;

            sum = sqrt(sqr(lin_fit_press[1]) + sqr(lin_fit_press[2]));
            if(sum < 1.0e-12)
                sum = 1.0e-12; // to avoid division by zero.
            p_nor[0] = lin_fit_press[1]/sum; 
            p_nor[1] = lin_fit_press[2]/sum; 

            // SW2 //Huijing
            least_sqr_fit_linear_poly(vel_u, ls_A, tri_N, lin_fit_u);
            least_sqr_fit_linear_poly(vel_v, ls_A, tri_N, lin_fit_v);
            vel_div = diam*(lin_fit_u[1] + lin_fit_v[2]); // undivided diff to approx. divergence of velocity
            if((-delta*mag_c_min) > vel_div)
            {
                v_switch = YES;
                // printf("adv_E_vertice_B_edge(), v_switch is on for tri %d\n", otri->id);
            }
            // else
            //     v_switch = NO;

            if(debug == YES)
            {
                printf("adv_E_vertice_B_edge(), vel_div %g, mag_c_min = %g, p_grad_L1 %g, p_min = %g, p_max =%g "
                       "  on tri %d, p_switch = %d, v_switch = %d, p_min_I = %d\n",
                             vel_div, -delta*mag_c_min, p_grad_L1, therm_p_min, therm_p_max, otri->id, p_switch, v_switch, p_min_I[k]);
            }
        }

        // for 3 points of tri
        for(k = 0; k < 3; k++)
        {
            if(Index_of_point(Point_of_tri(otri)[k]) == YES && debug == NO)
            // if(Index_of_point(Point_of_tri(otri)[k]) == YES)
                continue;

            closed = collect_tris_at_vert(otri, Point_of_tri(otri)[k], tris, &tri_N);
            if(tri_N >= 50)
            {
                printf("ERROR: adv_E_vertice_B_edge(), exceed allocated %d\n", tri_N );
                clean_up(ERROR);
            }

            //////  Compute Pressure gradient by least square
            if(rk_iter == 0)
            {
                for(i = 0; i < tri_N; i++)
                    prev_sts[i] = tris[i]->st;
            }
            else
            {
                for(i = 0; i < tri_N; i++)
                    prev_sts[i] = mid_soln[tris[i]->id].st[rk_iter];
            }

            pt_crds = Coords(Point_of_tri(otri)[k]);
            therm_p_min = HUGE_VAL; 
            mag_c_min = HUGE_VAL;
            for(i = 0; i < tri_N; i++)
            {
                cents[i] = fg_centroid(tris[i]);
                therm_press[i] = POLY_thermal_pressure_MHD(prev_sts[i]);

                ls_A[i][0] = 1.0;
                ls_A[i][1] = cents[i][0] - pt_crds[0];
                ls_A[i][2] = cents[i][1] - pt_crds[1];
                if(therm_press[i] < therm_p_min)
                {
                    therm_p_min = therm_press[i];
                    p_min_I[k] = i;
                }

                vel_u[i] = Mom(prev_sts[i])[0]/Dens(prev_sts[i]);
                vel_v[i] = Mom(prev_sts[i])[1]/Dens(prev_sts[i]);
                mag_c[i] = POLY_magnetosonic_speed_MHD(prev_sts[i]);
                if(mag_c[i] < mag_c_min)
                    mag_c_min = mag_c[i];
            }
            least_sqr_fit_linear_poly(therm_press, ls_A, tri_N, lin_fit_press);
            p_grad_L1 = diam*(fabs(lin_fit_press[1]) + fabs(lin_fit_press[2])); // undivided diff
            if(p_grad_L1 > (beta*therm_p_min))
                p_switch = YES;
            else
                p_switch = NO;

            sum = sqrt(sqr(lin_fit_press[1]) + sqr(lin_fit_press[2]));
            if(sum < 1.0e-12)
                sum = 1.0e-12; // to avoid division by zero.
            p_nor[0] = lin_fit_press[1]/sum;
            p_nor[1] = lin_fit_press[2]/sum;

            least_sqr_fit_linear_poly(vel_u, ls_A, tri_N, lin_fit_u);
            least_sqr_fit_linear_poly(vel_v, ls_A, tri_N, lin_fit_v);
            vel_div = diam*(lin_fit_u[1] + lin_fit_v[2]); // undivided diff to approx. divergence of velocity
            if((-delta*mag_c_min) > vel_div)
            {
                v_switch = YES;
                // printf("adv_E_vertice_B_edge(), v_switch is on for tri %d\n", otri->id);
            }
            else
                v_switch = NO;


            //// TMP --- sanity check
            /***
            for(i = 0; i < tri_N; i++)
            {
                if(Point_of_tri(otri)[k] != Point_of_tri(tris[i])[0] &&
                   Point_of_tri(otri)[k] != Point_of_tri(tris[i])[1] &&
                   Point_of_tri(otri)[k] != Point_of_tri(tris[i])[2])
                {
                     printf("ERROR: adv_E_vertice_B_edge(), tri do not share vertex\n");
                }
                if(i < tri_N-1)
                {
                    if(Tri_on_side(tris[i],0) != tris[i+1] &&
                       Tri_on_side(tris[i],1) != tris[i+1] &&
                       Tri_on_side(tris[i],2) != tris[i+1])
                    {
                        printf("ERROR: adv_E_vertice_B_edge(), tri do not share edge\n");
                    }
                }
            }
            if(Tri_on_side(tris[0],0) != tris[tri_N-1] &&
               Tri_on_side(tris[0],1) != tris[tri_N-1] &&
               Tri_on_side(tris[0],2) != tris[tri_N-1])
            {
                printf("ERROR: adv_E_vertice_B_edge(), do not find all tris at vertex %d\n", tri_N );
                clean_up(ERROR);
            }
            ***/

            //// TMP
            if(debug == YES)
                printf("tri_N collected = %d for vertex %d, coords[%g, %g]\n", tri_N, k, 
                    Coords(Point_of_tri(otri)[k])[0],
                    Coords(Point_of_tri(otri)[k])[1]);

            /****
            N_side = 0;
            for(i = 0; i < tri_N; i++)
            {
                for(side = 0; side < 3; side++)
                {
                    if(Point_of_tri(tris[i])[side] == Point_of_tri(otri)[k])
                    {
                        if(NULL != Tri_on_side(tris[i],side))
                        {
                            half_tan[N_side] = tan_half_angle(Point_of_tri(otri)[k], tris[i], side,mid_ray[N_side]);
                            single_st_tmp_alpha[N_side] = Max_wave_speed_at_pt_single_state(Point_of_tri(otri)[k],tris[i], 
                                                         mid_soln, rk_iter, mid_ray[N_side], NO);
                            N_side++;
                        }
                    }
                }
            }
            ****/
            
            /// compute HLL FLUX or LLF flux at the point for each tri
            /// The pt is at the starting point of the edge.
            ///// Begin: compute max_wave_speed at vertex
            N_side = 0;
            max_alpha = -HUGE_VAL;
            max_alpha_new = -HUGE_VAL;
            for(i = 0; i < tri_N; i++)
            {
                for(side = 0; side < 3; side++)
                {
                    if(Point_of_tri(tris[i])[side] == Point_of_tri(otri)[k])
                    {
                        if(NULL != Tri_on_side(tris[i],side))
                        {
                            tmp_alpha[N_side] = Max_wave_speed_at_pt(Point_of_tri(otri)[k], tris[i], side, mid_soln, rk_iter, NO);
                            N_side++;
                        }
                    } 
                }
            }

            for(i = 0; i < N_side; i++)
            {
                if(tmp_alpha[i] > max_alpha)
                    max_alpha_new = max_alpha = tmp_alpha[i];
                // if(single_st_tmp_alpha[i] > max_alpha_new)
                //     max_alpha_new = single_st_tmp_alpha[i];
            }
            ///// End: compute max_wave_speed at vertex

            //// Begin: Simple LLF wave model
            //// LLF_U_star_model(Coords(Point_of_tri(otri)[k]), mid_soln, rk_iter, tris, max_alpha_new, half_tan,mid_ray, tri_N, LLF_U_star,debug);
            //// END: Simple LLF wave model

            N_side = 0;
            for(i = 0; i < tri_N; i++)
            {
                for(side = 0; side < 3; side++) // test which tris[i][side] is the point of interest
                {
                    if(Point_of_tri(tris[i])[side] == Point_of_tri(otri)[k])
                    {
                        if(NULL != Tri_on_side(tris[i],side))
                        {
                            // HLL_soln_at_pt(Point_of_tri(otri)[k], tris[i], side, mid_soln, rk_iter, conU[N_side]);
                             
                            /***
                            if(tri->id == 0 && tris[i]->id == 152 && debug == YES)
                            {
                                printf("work with neighboring tri[%d], its crds\n", tris[i]->id);
                                print_tri_crds(tris[i]);
                                verbose_print_state("nghbr tri state",tris[i]->st);
                                debug_flag = YES;
                            }    
                            else
                                debug_flag = NO;
                            ***/
                            /**
                            if(debug == YES && 
                               ((tris[i]->id == 1299 && side == 2) || (tris[i]->id == 179 && side == 2)) 
                              )
                            {
                                printf("work with neighboring tri[%d], side[%d]\n", tris[i]->id, side);
                                print_tri_crds(tris[i]);
                                verbose_print_state("nghbr tri state",tris[i]->st);
                                debug_flag = YES;
                            }
                            else 
                                debug_flag = NO;
                            **/
                            // conU[N_side][0] = Elec_z_at_pt(Point_of_tri(otri)[k], tris[i], side, mid_soln, rk_iter, debug_flag);
                            tmpu[N_side] = Elec_z_at_pt(Point_of_tri(otri)[k], tris[i], side, mid_soln, rk_iter, max_alpha, debug);
                            N_side++;
                            //// TMP
                            /****
                            if(debug == YES && ( (tri->id == 235 && k == 0) || (tri->id == 235 && k == 1) 
                                              || (tri->id == 1298 && k == 1) || (tri->id == 1298 && k == 2)  ))
                            {
                                if((tris[i]->id == 1299 && side == 2) || (tris[i]->id == 179 && side == 2))
                                {
                                    printf("tri[%d], edge[%d] E_z = %g, for pt[%d] coords(%g %g)\n", 
                                            tris[i]->id, side, conU[N_side-1][0], k, 
                                        Coords(Point_of_tri(otri)[k])[0], Coords(Point_of_tri(otri)[k])[1] );
                                    printf("on side[%d] dir[%g %g]\n", side, fg_side_vector(tris[i])[side][0], fg_side_vector(tris[i])[side][1]);
                                }
                            }
                            ****/
                        }
                    }
                }
            }

            Index_of_point(Point_of_tri(otri)[k]) = YES;

            tmp_E_z = 0.0;
            sum = 0.0;
            // p_switch = YES;
            if(p_switch == YES || v_switch == YES)
            {
                // printf("P_switch enabled\n");
                // printf("adv_E_vertice_B_edge(), vel_div %g, mag_c_min = %g, p_grad_L1 %g, p_min = %g   on tri %d\n",
                //                  vel_div, -delta*mag_c_min, p_grad_L1, beta*therm_p_min, otri->id);

                N_side2 = 0;
                for(i = 0; i < tri_N; i++)
                {
                    for(side = 0; side < 3; side++)
                    {
                        if(Point_of_tri(tris[i])[side] == Point_of_tri(otri)[k])
                        {
                            if(NULL != Tri_on_side(tris[i],side))
                            {
                                for(j = 0; j < dim; j++)
                                    t[j] = fg_side_vector(tris[i])[side][j];
                                nor[0] = t[1];
                                nor[1] = -t[0];
                                tmp = nor[0]*p_nor[0] + nor[1]*p_nor[1]; //// use dot product.
                                ang_wei[N_side2] = sqr(sqr(tmp)); 
                                N_side2++;
                            }
                        }
                    }
                }

                for(i = 0; i < N_side; i++)
                {
                    wei[i] = (ang_wei[i]+1.0e-11);
                    // wei[i] = 1.0;
                    sum += wei[i];
                    // wei[i] = (ang_wei[i])*fabs(tmpu[i]) + 1.0e-10;
                    // sum += wei[i];
                }
                tmp_E_z = 0.0; 
                for(i = 0; i < N_side; i++)
                    // tmp_E_z += (ang_wei[i]+1.0e-11)/sum*tmpu[i];
                    tmp_E_z += wei[i]/sum*tmpu[i];

                ///// test:::::::
                /***
                {
                    int    found_tri_id = -100;

                    if(p_switch == YES)
                    {
                        for(i = 0; i < dim; i++)
                            pre_tri_pt[i] = pt_crds[i] - p_nor[i]*diam*0.1;
                        for(i = 0; i < tri_N; i++)
                        {
                            if(pt_in_tri(pre_tri_pt, tris[i]) == YES)
                            {
                                found_tri_id = i;
                                break;
                            }
                        }

                        if(found_tri_id >= 0)
                        {
                            if(debug)
                            {
                                printf("found tri--[%d], press[%g]\n", found_tri_id, therm_press[found_tri_id]);
                            }
                            if(found_tri_id == 0)
                            {
                                if(fabs(tmpu[0]) < fabs(tmpu[N_side-1]))
                                    tmp_E_z = tmpu[0];
                                else
                                    tmp_E_z = tmpu[N_side-1];
                            }
                            else
                            {
                                if(fabs(tmpu[found_tri_id]) < fabs(tmpu[found_tri_id-1]))
                                    tmp_E_z = tmpu[found_tri_id];
                                else
                                    tmp_E_z = tmpu[found_tri_id-1];
                            }
                        }
                        else
                        {
                            if(p_min_I[k] == 0)
                            {
                                if(fabs(tmpu[0]) < fabs(tmpu[N_side-1]))
                                    tmp_E_z = tmpu[0];
                                else
                                    tmp_E_z = tmpu[N_side-1];
                            }
                            else
                            {
                                if(fabs(tmpu[p_min_I[k]]) < fabs(tmpu[p_min_I[k]-1]))
                                    tmp_E_z = tmpu[p_min_I[k]];
                                else
                                    tmp_E_z = tmpu[p_min_I[k]-1];
                            }
                        }
                    }
                    else
                    {
                        if(p_min_I[k] == 0)
                        {
                            if(fabs(tmpu[0]) < fabs(tmpu[N_side-1]))
                                tmp_E_z = tmpu[0];
                            else
                                tmp_E_z = tmpu[N_side-1];
                        }
                        else
                        {
                            if(fabs(tmpu[p_min_I[k]]) < fabs(tmpu[p_min_I[k]-1]))
                                tmp_E_z = tmpu[p_min_I[k]];
                            else
                                tmp_E_z = tmpu[p_min_I[k]-1];
                        }
                    }
                    if(debug)
                    {
                        printf("tri[%d], press[%g], Ez[%g, %g]\n", 0, therm_press[0], tmpu[0], tmpu[N_side-1]);
                        for(i = 1; i < N_side; i++)
                        {
                            printf("tri[%d], press[%g], Ez[%g, %g]\n", i, therm_press[i], tmpu[i], tmpu[i-1]);
                        }
                    }
                } 
                *****/
                ///// END::: test:::::::

                if(isnan(tmp_E_z))
                {
                    printf("ERROR: adv_E_vertice_B_edge(), tmp_E_z is Nan, tri[%d], sum %g\n",otri->id, sum);
                    printf("linear fit press %g %g %g\n", lin_fit_press[0], lin_fit_press[1], lin_fit_press[2]);
                    printf("surrounding press\n");
                    for(i = 0; i < tri_N; i++)
                        printf("press[%d] = %g\n", i, therm_press[i]);
                   
                    clean_up(ERROR);
                }
            }
            else ///  if(p_switch == YES || v_switch == YES)
            {
                if(MAX_N_COEF == 3)
                {
                    for(i = 0; i < N_side; i++)
                    {
                        tmp_E_z += tmpu[i];
                        // wei[i] = 1.0/(eps + sqr(tmpu[i])); // for weighted combine
                        // wei[i] = 1.0/(eps + fabs(tmpu[i])); // for weighted combine
                        // wei[i] = (1.0e-10 + fabs(tmpu[i]));    // for weighted combine
                        sum += wei[i];                           // for weighted combine
                        // printf("add[%d] = %g to sume of Ez\n", i, conU[i][0]); 
                    }
                    tmp_E_z /= N_side;

                    // tmp_E_z = biased_min_mod(tmpu,N_side,tmp_E_z);
                    //// weighted combine
                    /// tmp_E_z = 0.0;
                    /// for(i = 0; i < N_side; i++)
                    ///     tmp_E_z += wei[i]/sum*tmpu[i];
                    //// END: weighted combine
                }
                else if(MAX_N_COEF == 6)
                {
                    for(i = 0; i < N_side; i++)
                    {
                        tmp_E_z += tmpu[i];
                        // wei[i] = 1.0/(eps + fabs(tmpu[i])); // for weighted combine
                        // wei[i] = (1.0e-10 + fabs(tmpu[i]));    // for weighted combine
                        // sum += wei[i];                           // for weighted combine
                    }
                    tmp_E_z /= N_side;
    
                    // tmp_E_z = biased_min_mod(tmpu,N_side,tmp_E_z);
                    //// weighted combine
                    //// tmp_E_z = 0.0;
                    //// for(i = 0; i < N_side; i++)
                    ////     tmp_E_z += wei[i]/sum*tmpu[i];
                    //// END: weighted combine
                }
            }

            E_z_of_point(Point_of_tri(otri)[k]) = tmp_E_z;
            E_z_of_tri(otri)[k] = tmp_E_z;

            if(debug == YES)
            {
                /***
                mag_vort_sol(Coords(Point_of_tri(otri)[k]),fr->time,tmpu);
                for(i = 0; i < 2; i++)
                    up[i+1] = tmpu[i+1]/tmpu[0];
                tmpe = -up[1]*tmpu[6] + up[2]*tmpu[5];

                printf("\ntri[%d], average point Ez = %13.12g, N_side = %d for point %d, analytic Ez = %g\n",
                             tri->id, tmp_E_z, N_side, k, tmpe);
                ***/
                printf("Ez at edges: ");
                for(i = 0; i < N_side; i++)
                    printf("%g; ", tmpu[i]);
                printf(" Weighted Ez = %g", tmp_E_z);
                printf("\n");

                printf("alpha at edges: ");
                for(i = 0; i < N_side; i++)
                    printf("%g; ", tmp_alpha[i]);
                printf("\n");
                
                // printf("Weight at edges: ");
                // for(i = 0; i < N_side; i++)
                //     printf("%g; ", wei[i]/sum);
                // printf("\n");

                printf("press of tris: ");
                for(i = 0; i < tri_N; i++)
                    printf("%g; ", therm_press[i]);
                printf("\n");
                
                // printf("LLF_U_star: rho= %g, mom[0]= %g, mom[1]= %g, Bx= %g, By= %g, E_z=%g\n", 
                //         LLF_U_star[0], LLF_U_star[1], LLF_U_star[2], LLF_U_star[5], LLF_U_star[6],
                //         LLF_U_star[2]/LLF_U_star[0]*LLF_U_star[5] - LLF_U_star[1]/LLF_U_star[0]*LLF_U_star[6]);
                printf("max_alpha_new = %g\n\n", max_alpha_new);
                // printf("fr->time = %g, dt = %g\n", fr->time, dt);
                // clean_up(0);

                if(isnan(tmp_E_z))
                {
                    printf("ERROR: adv_E_vertice_B_edge(), point E_z nan\n");
                    printf("point %g %g of tri[%d]\n", Coords(Point_of_tri(otri)[k])[0], 
                                 Coords(Point_of_tri(otri)[k])[1], otri->id);
                    print_tri_crds(otri);
                    clean_up(ERROR);
                }
            }
        } /// END: for(k = 0; k < 3; k++)

        //// Now compute normal B on edge.
        for(side = 0; side < 3; side++)
        {
            if(Index_of_point(Point_of_tri(otri)[side]) == YES &&
               Index_of_point(Point_of_tri(otri)[(side+1)%3]) == YES)
            {
                adv_B_edge(side, otri, tri, dt, rk_iter, mid_soln, fr); 
            }
            else
            {
                printf("ERROR: adv_E_vertice_B_edge(), E_z of tri[%d] was not computed\n", tri->id);
                clean_up(ERROR);  
            }
        }
        if(YES == debug)
        {
            printf("\n**************Tri %d left adv_E_vertice_B_edge()\n", tri->id);
        }
}

LOCAL void adv_dual_cell_B_edge_DG(
        POLYGON      *polyg,
        POLYGON      *opolyg,
        float        dt,
        float        alpha,
        Mid_soln     *mid_soln,
        Dual_cell_Mid_soln   *dual_cell_mid_soln,
        int          rk_iter,
        Front        *fr)
{
        int          side, indx, k, debug = NO;
        double       edge_Ez[MAX_N_POLY_SIDE][30], Ez_vert[MAX_N_POLY_SIDE][2];
        TRI          *prev_tri, *next_tri;
        POINT        *prev_pt,  *next_pt;
        double       length, vh_pt, Bn0;
        double       soln_Bn[MAX_N_POLY_SIDE][N_COEF_EDGE], 
                     prev_Bn[MAX_N_POLY_SIDE][N_COEF_EDGE], prev_Bn0[MAX_N_POLY_SIDE][N_COEF_EDGE];

        double       rhs[MAX_N_POLY_SIDE][N_COEF_EDGE];
        double       prev_Bn_from_tri[MAX_N_POLY_SIDE][N_COEF_EDGE];
        double       prev_Bn0_from_tri[MAX_N_POLY_SIDE][N_COEF_EDGE];
        double       prev_partBn_from_tri[MAX_N_POLY_SIDE][2][N_COEF_EDGE];
        double       prev_partBn0_from_tri[MAX_N_POLY_SIDE][2][N_COEF_EDGE];
        double       m_correct_partBn_from_tri[MAX_N_POLY_SIDE];
        double       m_correct_partBn0_from_tri[MAX_N_POLY_SIDE];
        double       ****mass_edge = opolyg->mass_edge, bound;
        double       org_avgBn, tri_trace_avgBn, org_Bn_1, org_Bn_2;
        double       tri_trace_Bn_1, tri_trace_Bn_2, tmp_len1, tmp_len2;
     
        static int   first = YES;
        int          debug_flag = NO, N_sides, prev_side, flag_correction[MAX_N_POLY_SIDE], flag_correct = NO; 
        int          found_edge, tmp_side;
        POLYGON      *neighb_cell;
        double       *GL = fr->rect_grid->GL, *GU = fr->rect_grid->GU;
        double       cent_theta = 0.5; // old 0.5

        if(opolyg->closed == NO) 
        {
            /**
            RECT_GRID *gr = fr->rect_grid;   
            if(polyg_centroid(opolyg)[0] < gr->U[0] &&
               polyg_centroid(opolyg)[0] > gr->L[0] &&
               polyg_centroid(opolyg)[1] < gr->U[1] &&
               polyg_centroid(opolyg)[1] > gr->L[1]
              )
            {
                printf("WARNING: dual cell %d in adv_dual_cell_B_edge_DG() not computed\n", opolyg->id);
                print_polyg_crds(opolyg);
            }
            **/
            return;
        }

        /***
        if(opolyg->id == 200 || opolyg->id == 240) // 150
        {
            printf("\n\n--------------------------------------------------\n");
            printf("dual cell %d in adv_dual_cell_B_edge_DG(), RK stage %d, step %d\n", 
                   opolyg->id, rk_iter, fr->step);   
            printf("-----------------\n\n");
            print_polyg_crds(opolyg);
            debug = YES;
        } 
        ***/
        /***
        if(opolyg->id == 2611 && pp_mynode() == 0) // 150
        {
            printf("\n\n--------------------------------------------------\n");
            printf("dual cell %d in adv_dual_cell_B_edge_DG(), RK stage %d, step %d\n", 
                   opolyg->id, rk_iter, fr->step);   
            printf("-----------------\n\n");
            print_polyg_crds(opolyg);
            debug = YES;
        } 
        else if(opolyg->id == 2511 && pp_mynode() == 1)
        {
            printf("\n\n------------------------------------------------\n");
            printf("dual cell %d in adv_dual_cell_B_edge_DG(), RK stage %d, step %d\n", 
                   opolyg->id, rk_iter, fr->step);   
            printf("-----------------\n\n");
            print_polyg_crds(opolyg);
            debug = YES;
        }
        else if(opolyg->id == 509 && pp_mynode() == 2)
        {
            printf("\n\n------------------------------------------------\n");
            printf("dual cell %d in adv_dual_cell_B_edge_DG(), RK stage %d, step %d\n", 
                   opolyg->id, rk_iter, fr->step);   
            printf("-----------------\n\n");
            print_polyg_crds(opolyg);
            debug = YES;
        }
        else if(opolyg->id == 441 && pp_mynode() == 3)
        {
            printf("\n\n------------------------------------------------\n");
            printf("dual cell %d in adv_dual_cell_B_edge_DG(), RK stage %d, step %d\n", 
                   opolyg->id, rk_iter, fr->step);   
            printf("-----------------\n\n");
            print_polyg_crds(opolyg);
            debug = YES;
        }
        fflush(stdout);

        if(opolyg->id == 66) 
        {
             printf("Dual cell %d entered adv_dual_cell_B_edge_DG() with flags %d %d %d %d %d %d\n",
                       opolyg->id, opolyg->private_data._iflags[0], 
                       opolyg->private_data._iflags[1], opolyg->private_data._iflags[2],
                       opolyg->private_data._iflags[3], opolyg->private_data._iflags[4], 
                       opolyg->private_data._iflags[5]);
        }
        ***/

        N_sides = opolyg->n_sides;

        //// Due to B in buffer is not made globally divergence-free
        //// A conservation correction may be needed for dual-grid edges located within the buffer
        /*** commented on 02122014
        for(side = 0; side < N_sides; side++)
        {
            flag_correction[side] = NO;
            if(tri_at_polyg_vert(opolyg)[side]->BC_type == SUBDOMAIN)
            {
                if(debug_flag == YES)
                    printf("Side[%d] will do mass correction\n", side); 

                flag_correction[side] = YES;
                flag_correct = YES;
            }
        }
        ***/
        for(side = 0; side < N_sides; side++)
        {
            double  *tmp_p0, *tmp_p1;
            int     p0_outside, p1_outside;

            /// TMP
            /**
            if(YES == debug && (side == 0 || side == 3))
                debug_flag = YES;
            // else if(YES == debug && side == 0 && opolyg->id == 407)
            //     debug_flag = YES;
            else 
                debug_flag = NO;
            **/
            /// END::: TMP

            flag_correction[side] = NO;
            if(!debugging("field_loop"))
            {
#if defined(__MPI__)
                tmp_p0 = Coords(Point_of_polyg(opolyg)[side]);
                tmp_p1 = Coords(Point_of_polyg(opolyg)[(side+1)%N_sides]);
                p0_outside = outside_point(tmp_p0, GL, GU, 2);
                p1_outside = outside_point(tmp_p1, GL, GU, 2);
                if(p0_outside != 0 && p1_outside != 0)
                {
                    // if(debug_flag == YES)
                    //     printf("Side[%d] will do mass correction\n", side);

                    flag_correction[side] = YES;
                    flag_correct = YES;
                }
#else
                tmp_p0 = Coords(Point_of_polyg(opolyg)[side]);
                tmp_p1 = Coords(Point_of_polyg(opolyg)[(side+1)%N_sides]);
                p0_outside = outside_point(tmp_p0, GL, GU, 2);
                p1_outside = outside_point(tmp_p1, GL, GU, 2);
                if(p0_outside != 0 && p1_outside != 0)
                // if(tri_at_polyg_vert(opolyg)[side]->BC_type == SUBDOMAIN &&
                //    tri_at_polyg_vert(opolyg)[(side+1)%N_sides]->BC_type == SUBDOMAIN)
                {
                    // if(debug_flag == YES)
                    // if(debug == YES)
                    //     printf("Side[%d] will do mass correction\n", side);

                    flag_correction[side] = YES;
                    flag_correct = YES;

                    // if(debugging("field_loop"))
                    //     flag_correct = NO;
                }
#endif /* if defined(__MPI__) */
            }
        } /// END::: for(side = 0; side < N_sides; side++)

        // if(debug == YES) printf("\n");

        dual_cell_B_edge_from_tri_DG_P1(polyg,opolyg,mid_soln,dual_cell_mid_soln,
                                   rk_iter,fr,prev_Bn_from_tri, prev_partBn_from_tri,NO);
        if(RK_STEP == 2 && rk_iter == 1)
        {
            dual_cell_B_edge_from_tri_DG_P1(polyg,opolyg,mid_soln,dual_cell_mid_soln,
                                        0,fr,prev_Bn0_from_tri, prev_partBn0_from_tri, NO);
            for(side = 0; side < N_sides; side++)
            {
                for(indx = 0; indx < N_COEF_EDGE; indx++)
                    prev_Bn0[side][indx] = polyg_side_dgB(opolyg)[side][indx];
            }
        }

        if(rk_iter == 0)
        {
            for(side = 0; side < N_sides; side++)
            {
                for(indx = 0; indx < N_COEF_EDGE; indx++)
                    prev_Bn[side][indx] = polyg_side_dgB(opolyg)[side][indx];
            }
        }
        else
        {
            for(side = 0; side < N_sides; side++)
            {
                for(indx = 0; indx < N_COEF_EDGE; indx++)
                    prev_Bn[side][indx] = dual_cell_mid_soln[opolyg->id].edge_dgBn[rk_iter][side][indx];
            }
        } 

        /////////////////////// 11/07/2012
        /// Only do mass correction for the edge of dual cells within the buffer zone,
        /// since B in buffers of tri-mesh is not made globally divergence-free.
        if(YES == flag_correct)
        {
            found_edge = NO;
            for(side = 0; side < N_sides; side++)
            {
                if(flag_correction[side] == YES && flag_correction[(side+1)%N_sides] == YES)
                {
                    /**
                    if(YES == debug_flag)
                    {
                        printf("edge(%d) of dual cell(%d) is corrected\n", side, opolyg->id);
                    }
                    **/

                    found_edge = YES;
                    org_Bn_2 = 0.0;
                    for(tmp_side = 0; tmp_side < N_sides; tmp_side++)
                    {
                        if(tmp_side == side) continue;
                        org_Bn_2 += (mass_edge[tmp_side][0][0][0]*prev_partBn_from_tri[tmp_side][0][0] +
                                     mass_edge[tmp_side][0][0][1]*prev_partBn_from_tri[tmp_side][0][1])*
                                         polyg_length_side(opolyg)[tmp_side]/2.0 +
                                    (mass_edge[tmp_side][1][0][0]*prev_partBn_from_tri[tmp_side][1][0] +
                                     mass_edge[tmp_side][1][0][1]*prev_partBn_from_tri[tmp_side][1][1])*
                                         polyg_length_side(opolyg)[tmp_side]/2.0;
                    }
                    org_Bn_2 *=-2.0/polyg_length_side(opolyg)[side];
                    tri_trace_Bn_1 = (prev_partBn_from_tri[side][0][0]*mass_edge[side][0][0][0] +
                                      prev_partBn_from_tri[side][0][1]*mass_edge[side][0][0][1]);
 
                    tri_trace_Bn_2 = (prev_partBn_from_tri[side][1][0]*mass_edge[side][1][0][0] +
                                      prev_partBn_from_tri[side][1][1]*mass_edge[side][1][0][1]);

                    m_correct_partBn_from_tri[side] = ((org_Bn_2) - (tri_trace_Bn_2 + tri_trace_Bn_1))/
                                                       (mass_edge[side][0][0][0] + mass_edge[side][1][0][0]);
                    prev_partBn_from_tri[side][0][0] += m_correct_partBn_from_tri[side];
                    prev_partBn_from_tri[side][1][0] += m_correct_partBn_from_tri[side];

                    // if(YES == debug)
                    //     printf("edge(%d) of dual cell(%d) is corrected with (%14.13g)\n", 
                    //       side, opolyg->id, m_correct_partBn_from_tri[side]);
                
                    break;
                }
            }

            if(found_edge == NO)
            {
                printf("ERROR: adv_dual_cell_B_edge_DG(), dual cell %d\n", opolyg->id);
                printf("Can not find edge for conservation correction\n");
                clean_up(ERROR);
            }
        }/// END::: if(YES == flag_correct)
        /////////////////////// END:::: 11/07/2012

        //// compute new \int Bn \times (basis function) d(\xi)
        for(side = 0; side < N_sides; side++)
        {
            /// from [-1, crs] to [crs, 1] \int Bn \times (1) d(\xi)
            prev_Bn_from_tri[side][0] = mass_edge[side][0][0][0]*prev_partBn_from_tri[side][0][0] + 
                                        mass_edge[side][1][0][0]*prev_partBn_from_tri[side][1][0] +
                                        mass_edge[side][0][0][1]*prev_partBn_from_tri[side][0][1] + 
                                        mass_edge[side][1][0][1]*prev_partBn_from_tri[side][1][1];

            /// from [-1, crs] to [crs, 1]. \int Bn \times (\xi) d(\xi)
            prev_Bn_from_tri[side][1] = mass_edge[side][0][1][0]*prev_partBn_from_tri[side][0][0] + 
                                        mass_edge[side][1][1][0]*prev_partBn_from_tri[side][1][0] +
                                        mass_edge[side][0][1][1]*prev_partBn_from_tri[side][0][1] + 
                                        mass_edge[side][1][1][1]*prev_partBn_from_tri[side][1][1];
            /// TMP
            /***
            if(YES == debug && (side == 0 || side == 3))
                debug_flag = YES;
            else 
                debug_flag = NO;
            if(YES == debug_flag)
                printf("side[%d], prev_Bn_from_tri(%14.13g, %14.13g)\n", side,
                     prev_Bn_from_tri[side][0], prev_Bn_from_tri[side][1]); 
            ***/
            /// END::: TMP

        }
        ////END::: compute new \int Bn \times (basis function) d(\xi)

        if(RK_STEP == 2 && rk_iter == 1)
        {
            /////////////////////// 11/07/2012 for RK2, stage 0 soln
            /// Only do mass correction for the edge of dual cells within the buffer zone,
            /// since B in buffers of tri-mesh is not made globally divergence-free.
            if(YES == flag_correct)
            {
                found_edge = NO;
                for(side = 0; side < N_sides; side++)
                {
                    if(flag_correction[side] == YES && flag_correction[(side+1)%N_sides] == YES)
                    {
                        found_edge = YES;
                        org_Bn_2 = 0.0;
                        for(tmp_side = 0; tmp_side < N_sides; tmp_side++)
                        {
                            if(tmp_side == side) continue;
                            org_Bn_2 += (mass_edge[tmp_side][0][0][0]*prev_partBn0_from_tri[tmp_side][0][0] +
                                         mass_edge[tmp_side][0][0][1]*prev_partBn0_from_tri[tmp_side][0][1])*
                                             polyg_length_side(opolyg)[tmp_side]/2.0 +
                                        (mass_edge[tmp_side][1][0][0]*prev_partBn0_from_tri[tmp_side][1][0] +
                                         mass_edge[tmp_side][1][0][1]*prev_partBn0_from_tri[tmp_side][1][1])*
                                             polyg_length_side(opolyg)[tmp_side]/2.0;
                        }
                        org_Bn_2 *=-2.0/polyg_length_side(opolyg)[side];
                        tri_trace_Bn_1 = (prev_partBn0_from_tri[side][0][0]*mass_edge[side][0][0][0] +
                                          prev_partBn0_from_tri[side][0][1]*mass_edge[side][0][0][1]);

                        tri_trace_Bn_2 = (prev_partBn0_from_tri[side][1][0]*mass_edge[side][1][0][0] +
                                          prev_partBn0_from_tri[side][1][1]*mass_edge[side][1][0][1]);

                        m_correct_partBn0_from_tri[side] = ((org_Bn_2) - (tri_trace_Bn_2 + tri_trace_Bn_1))/
                                                           (mass_edge[side][0][0][0] + mass_edge[side][1][0][0]);
                        prev_partBn0_from_tri[side][0][0] += m_correct_partBn0_from_tri[side];
                        prev_partBn0_from_tri[side][1][0] += m_correct_partBn0_from_tri[side];
                        break;
                    }
                }
            }/// END:: if(YES == flag_correct)
            /////////////////////// END:::: 11/07/2012 for RK2, stage 0 soln

            //// compute new \int Bn0 \times (basis function) d(\xi)
            for(side = 0; side < N_sides; side++)
            {
                /// from [-1, crs] to [crs, 1] \int Bn0 \times (1) d(\xi)
                prev_Bn0_from_tri[side][0] = mass_edge[side][0][0][0]*prev_partBn0_from_tri[side][0][0] +
                                             mass_edge[side][1][0][0]*prev_partBn0_from_tri[side][1][0] +
                                             mass_edge[side][0][0][1]*prev_partBn0_from_tri[side][0][1] +
                                             mass_edge[side][1][0][1]*prev_partBn0_from_tri[side][1][1];

                /// from [-1, crs] to [crs, 1]. \int Bn0 \times (\xi) d(\xi)
                prev_Bn0_from_tri[side][1] = mass_edge[side][0][1][0]*prev_partBn0_from_tri[side][0][0] +
                                             mass_edge[side][1][1][0]*prev_partBn0_from_tri[side][1][0] +
                                             mass_edge[side][0][1][1]*prev_partBn0_from_tri[side][0][1] +
                                             mass_edge[side][1][1][1]*prev_partBn0_from_tri[side][1][1];
                /// TMP
                /**
                if(YES == debug && (side == 3 || side == 0))
                    debug_flag = YES;
                else 
                    debug_flag = NO;
                if(YES == debug_flag)
                    printf("side[%d], prev_Bn0_from_tri(%14.13g, %14.13g)\n", side,
                         prev_Bn0_from_tri[side][0], prev_Bn0_from_tri[side][1]); 
                **/
                /// END:: TMP
            }
        }/// END::: if(RK_STEP == 2 && rk_iter == 1)

        /// TMP
        /***
        if(YES == debug)
        {
            
            printf("previous soln of Bn at RK %d, step %d\n\n", rk_iter,fr->step);

            for(side = 0; side < opolyg->n_sides; side++)
            {
                if(side == 3 && opolyg->id == 381)
                {
                    printf("side[%d] prev_Bn[%15.14g, %15.14g], len[%14.13g, %14.13g]\n", 
                       side, prev_Bn[side][0], prev_Bn[side][1], mass_edge[side][0][0][0], mass_edge[side][1][0][0]);  
                    printf("---Avg of trace from tri  [%14.13g]\n\n", prev_Bn_from_tri[side][0]/2.0);
                }
                else if(side == 0 && opolyg->id == 407)
                {
                    printf("side[%d] prev_Bn[%15.14g, %15.14g], len[%14.13g, %14.13g]\n", 
                       side, prev_Bn[side][0], prev_Bn[side][1], mass_edge[side][0][0][0], mass_edge[side][1][0][0]);  
                    printf("---Avg of trace from tri  [%14.13g]\n\n", prev_Bn_from_tri[side][0]/2.0);
                }
            }
            printf("-------- END:: previous soln of Bn----\n\n");

            org_Bn_2 = 0.0;
            tri_trace_Bn_2 = 0.0;
            tmp_len1 = tmp_len2 = 0.0;

            for(side = 0; side < N_sides; side++)
            {
                org_Bn_2 += prev_Bn[side][0]*(mass_edge[side][0][0][0]+mass_edge[side][1][0][0])
                                            *polyg_length_side(opolyg)[side]/2.0;
                tmp_len1 += polyg_length_side(opolyg)[side];

                tri_trace_Bn_2 += (mass_edge[side][0][0][0]*prev_partBn_from_tri[side][0][0] +
                                   mass_edge[side][0][0][1]*prev_partBn_from_tri[side][0][1])*
                                     polyg_length_side(opolyg)[side]/2.0 +
                                  (mass_edge[side][1][0][0]*prev_partBn_from_tri[side][1][0] +
                                   mass_edge[side][1][0][1]*prev_partBn_from_tri[side][1][1])*
                                     polyg_length_side(opolyg)[side]/2.0;

                 tmp_len2 += mass_edge[side][0][0][0]*polyg_length_side(opolyg)[side]/2.0 + 
                             mass_edge[side][1][0][0]*polyg_length_side(opolyg)[side]/2.0; 
            }
            printf("Original total Bn on edges = %14.13g\n", org_Bn_2);
            printf("Total Bn on trace of tris = %14.13g\n", tri_trace_Bn_2);
            printf("Total length of perimeter = %14.13g\n", tmp_len1);
            printf("Computed total length of perimeter = %12.10g\n\n", tmp_len2);

            // printf("WARNING: exit adv_dual_cell_B_edge_DG()\n");
            // clean_up(0);
        }
        ***/
        /// END::: TMP

        //// TMP:::: Consistency check
        tri_trace_Bn_2 = 0.0;
        for(side = 0; side < N_sides; side++)
        {
            tri_trace_Bn_2 += (mass_edge[side][0][0][0]*prev_partBn_from_tri[side][0][0] +
                               mass_edge[side][0][0][1]*prev_partBn_from_tri[side][0][1])*
                                     polyg_length_side(opolyg)[side]/2.0 +
                              (mass_edge[side][1][0][0]*prev_partBn_from_tri[side][1][0] +
                               mass_edge[side][1][0][1]*prev_partBn_from_tri[side][1][1])*
                                     polyg_length_side(opolyg)[side]/2.0;
        }
        if(fabs(tri_trace_Bn_2) > 1.0e-10)
        {
            printf("ERROR: adv_dual_cell_B_edge_DG() dual cell %d\n", opolyg->id);
            printf("The flux of projected Bn is not divergence-free = %14.12g\n", tri_trace_Bn_2);
            print_polyg_crds(opolyg);
            for(side = 0; side < N_sides; side++)
            {
                printf("side[%d], part_1: %12.11g, %12.11g, part_2: %12.11g, %12.11g\n",
                    side, prev_partBn_from_tri[side][0][0], prev_partBn_from_tri[side][0][1],
                    prev_partBn_from_tri[side][1][0], prev_partBn_from_tri[side][1][1]);
            }
            clean_up(ERROR);
        }
        //// END::: TMP:::: Consistency check

        // Compute flux of E. for each edge 
        for(side = 0; side < N_sides; side++)
        {
            /// TMP
            /***
            if(YES == debug && side == 3 && opolyg->id == 381)
                debug_flag = YES;
            else if(YES == debug && side == 0 && opolyg->id == 407)
                debug_flag = YES;
            else 
                debug_flag = NO;
            ***/
            /// END:: TMP
             
            if(YES == (opolyg->private_data)._iflags[side])
            {
                for(indx = 0; indx < N_COEF_EDGE; indx++)
                    rhs[side][indx] = opolyg->_DG_rhs[side][indx];
                /// TMP
                /***
                if(YES == debug_flag)
                {
                    printf("rsh of E by opposite: %15.14g, %15.14g\n", rhs[side][0], rhs[side][1]);     
                }
                ***/ 
                /// END:: TMP
                continue;
            }

            // adjacent cells do not have to set rhs for opolyg
            (opolyg->private_data)._iflags[side] = YES; 

            prev_tri = tri_at_polyg_vert(opolyg)[side];
            next_tri = tri_at_polyg_vert(opolyg)[(side+1)%opolyg->n_sides];

            prev_pt = Point_of_polyg(opolyg)[side];
            next_pt = Point_of_polyg(opolyg)[(side+1)%opolyg->n_sides];

            /*****
            //// TMP, sanity check
            if(pt_in_tri(Coords(prev_pt), prev_tri) != YES ||
               pt_in_tri(Coords(next_pt), next_tri) != YES)
            {
                printf("ERROR: adv_dual_cell_B_edge_DG()\n");
                printf("dual cell edge end points do not in tris\n");
                clean_up(ERROR);
            }
            //// END:::: TMP, sanity check
            *****/

            dual_cell_edge_Ez_quadrature(prev_tri,next_tri,prev_pt, next_pt, 
                              mid_soln,rk_iter,edge_Ez[side],Ez_vert[side], debug_flag);
                              // ((side == 0 && debug_flag == YES)? YES:NO));

            length = polyg_length_side(opolyg)[side];

            for(indx = 0; indx < N_COEF_EDGE; indx++)
            {
                rhs[side][indx] = 0.0;
                /**
                /// Start:: Gauss Lobatto
                for(k = 0; k < Lobatto_N; k++)
                {
                    vh_pt = vh_val_d_ver2(Lo_q[k],indx);
                    rhs[side][indx] += edge_Ez[side][k]*vh_pt*Lo_qw[k];
                }
                rhs[side][indx] -= edge_Ez[side][Lobatto_N-1] - pow(-1,indx)*edge_Ez[side][0];
                rhs[side][indx] *= (2.0*indx+1.0)/length;
                /// End:: Gauss Lobatto
                **/
                /// Start:: Gauss quadrature rule
                for(k = 0; k < Gauss_N; k++)
                {
                    vh_pt = vh_val_d_ver2(q[k],indx);
                    rhs[side][indx] += edge_Ez[side][k]*vh_pt*qw[k];
                }
                rhs[side][indx] -= (Ez_vert[side][1] - pow(-1,indx)*Ez_vert[side][0]);
                rhs[side][indx] *= (2.0*indx+1.0)/length;
                /// End:: Gauss quadrature rule
            }

            //// TMP
            /**
            if(YES == debug_flag)
            {
                printf("rsh of E: %15.14g, %15.14g\n", rhs[side][0], rhs[side][1]);     
            }
            **/ 
            //// END::: TMP
        }  /// END:::: for(side = 0; side < opolyg->n_sides; side++)
        //// END:::: Compute flux of E. for each edge

        if(RK_STEP == 1)
        {
            printf("ERROR: adv_dual_cell_B_edge_DG()\n");
            printf("Implement exact central DG \n");
            clean_up(ERROR);
            for(side = 0; side < N_sides; side++)
            {
                for(indx = 0; indx < N_COEF_EDGE; indx++)
                    soln_Bn[side][indx] = prev_Bn[side][indx] + dt*rhs[side][indx];
            }
        }
        else if(RK_STEP == 2)
        {
            if(rk_iter == 0)
            { 
                for(side = 0; side < N_sides; side++)
                {
                    // for(indx = 0; indx < N_COEF_EDGE; indx++)
                    //     soln_Bn[side][indx] = prev_Bn[side][indx] + dt*rhs[side][indx];

                    /* Original NT DG scheme */
                    /*
                    for(indx = 0; indx < N_COEF_EDGE; indx++)
                        soln_Bn[side][indx] = prev_Bn_from_tri[side][indx]*(2.0*indx+1.0)/2.0 + dt*rhs[side][indx];
                    */
                    /* END: Original NT DG scheme */

                    /* Liu's central DG scheme */
                    for(indx = 0; indx < N_COEF_EDGE; indx++)
                        soln_Bn[side][indx] = cent_theta*prev_Bn_from_tri[side][indx]*(2.0*indx+1.0)/2.0 
                                             +(1.0-cent_theta)* prev_Bn[side][indx] + dt*rhs[side][indx];
                    /// TMP
                    /**
                    if(YES == debug && (side == 0 || side == 3))
                        debug_flag = YES;
                    else 
                        debug_flag = NO;
                    if(YES == debug_flag)
                    {
                        printf("\ndata for soln_Bn[0] %15.14g, %15.14g, %15.14g\n", 
                           cent_theta*prev_Bn_from_tri[side][0]*(2.0*0+1.0)/2.0, 
                           (1.0-cent_theta)* prev_Bn[side][0], dt*rhs[side][0]);
                        printf("data for soln_Bn[1] %15.14g, %15.14g, %15.14g\n\n", 
                           cent_theta*prev_Bn_from_tri[side][1]*(2.0*1+1.0)/2.0, 
                           (1.0-cent_theta)* prev_Bn[side][1], dt*rhs[side][1]);
                    }
                    **/ 
                    /// END:: TMP
                    /* END: Liu's central DG scheme */
                }
            }
            else
            {
                for(side = 0; side < N_sides; side++)
                {
                    // for(indx = 0; indx < N_COEF_EDGE; indx++)
                    // {
                    //     Bn0 = polyg_side_dgB(opolyg)[side][indx];
                    //     soln_Bn[side][indx] = 0.5*(Bn0 + prev_Bn[side][indx]) + 0.5*dt*rhs[side][indx];
                    // }

                    /* Original NT DG scheme */
                    /**
                    for(indx = 0; indx < N_COEF_EDGE; indx++)
                    {
                        soln_Bn[side][indx] = 0.5*(prev_Bn0_from_tri[side][indx] + prev_Bn_from_tri[side][indx])*
                                                  (2.0*indx+1.0)/2.0 + 0.5*dt*rhs[side][indx];
                    }
                    **/
                    /* END: Original NT DG scheme */
                    /* Liu's central DG scheme */
                    for(indx = 0; indx < N_COEF_EDGE; indx++)
                    {
                        soln_Bn[side][indx] = 0.5*cent_theta*prev_Bn_from_tri[side][indx]*(2.0*indx+1.0)/2.0 
                                              + 0.5*polyg_side_dgB(opolyg)[side][indx] 
                                              + 0.5*(1.0-cent_theta)*prev_Bn[side][indx]
                                              + 0.5*dt*rhs[side][indx];
                    }
                    // dual_cell_mid_soln[polyg->id].edge_dgBn[rk_iter+1][side][indx]
                    /* END: Liu's central DG scheme */
                }
            }
        }
        else if(RK_STEP == 3)
        {
            printf("ERROR: adv_dual_cell_B_edge_DG()\n");
            printf("Implement exact central DG \n");
            clean_up(ERROR);

            if(rk_iter == 0)
            {
                for(side = 0; side < N_sides; side++)
                {
                    for(indx = 0; indx < N_COEF_EDGE; indx++)
                        soln_Bn[side][indx] = prev_Bn[side][indx] + dt*rhs[side][indx];
                }
            }
            else if(rk_iter == 1)
            {
                for(side = 0; side < N_sides; side++)
                {
                    for(indx = 0; indx < N_COEF_EDGE; indx++)
                    {
                        Bn0 = polyg_side_dgB(opolyg)[side][indx];
                        soln_Bn[side][indx] = 0.75*Bn0 + 0.25*prev_Bn[side][indx] + 0.25*dt*rhs[side][indx];
                    }
                }
            }
            else
            {
                for(side = 0; side < N_sides; side++)
                {
                    for(indx = 0; indx < N_COEF_EDGE; indx++)
                    {
                        Bn0 = polyg_side_dgB(opolyg)[side][indx];
                        soln_Bn[side][indx] = 1.0/3.0*Bn0 + 
                                 2.0/3.0*prev_Bn[side][indx] + 2.0/3.0*dt*rhs[side][indx];;
                    }
                }
            }
        }
        else
        {
            printf("ERROR: adv_dual_cell_B_edge_DG(), implement RK_STEP = %d\n", RK_STEP);
            clean_up(ERROR);
        }

        /// TMP
        /***
        if(YES == debug)
        {
            printf("---------Dual cell %d Soln of Bn at adv_dual_cell_B_edge_DG()-------\n\n", 
                    polyg->id);
            for(side = 0; side < opolyg->n_sides; side++)
            {
                if(polyg->id == 381 && side == 3)
                    printf("side[%d] curr_Bn[%14.12g, %14.12g]\n", side, soln_Bn[side][0], soln_Bn[side][1]);
                if(polyg->id == 407 && side == 0)
                    printf("side[%d] curr_Bn[%14.12g, %14.12g]\n", side, soln_Bn[side][0], soln_Bn[side][1]);
            }
            printf("------ End soln of Bn\n\n");
        }
        ***/
        /// END::: TMP

        /// save the rhs to common edge on adjacent cells.
        for(side = 0; side < N_sides; side++)
        {
            neighb_cell = Polyg_on_side(opolyg, side);
            for(tmp_side = 0;  tmp_side < neighb_cell->n_sides; tmp_side++)
            {
                if(Polyg_on_side(neighb_cell, tmp_side) == opolyg)
                {
                    if(NO == (neighb_cell->private_data)._iflags[tmp_side])
                    {
                        (neighb_cell->private_data)._iflags[tmp_side] = YES;

                        neighb_cell->_DG_rhs[tmp_side][0] = -1.0*rhs[side][0]; 
                        if(N_COEF_EDGE == 2)
                            neighb_cell->_DG_rhs[tmp_side][1] = rhs[side][1]; 
                    }
                    break;
                }   
            }/// END::: for(tmp_side = 0;  tmp_side < neighb_cell->n_sides; 
        }
        /// END:::: save the rhs to common edge on adjacent cells.


        //store soln_Bn
        if(rk_iter == RK_STEP-1)
        {
            for(side = 0; side < N_sides; side++)
            {
                for(indx = 0; indx < N_COEF_EDGE; indx++)
                    polyg_side_dgB(polyg)[side][indx] = soln_Bn[side][indx];
            }
        }
        else
        {
            for(side = 0; side < N_sides; side++)
            {
                for(indx = 0; indx < N_COEF_EDGE; indx++)
                    dual_cell_mid_soln[polyg->id].edge_dgBn[rk_iter+1][side][indx] = soln_Bn[side][indx];
            }
        }
        // END::: store soln_Bn
}

/// 1. Get the trace of Bn defined on dual cell edges 
/// from the underneath tri-grid. The trace is a piecewise polynomial defined on [-1, 1] with
/// Legendre basis.
/// On return, Bn[][] saves the integral of trace * (1 and xi), respectively. 1 and xi are test function 
/// defined on [-1, 1].
/// WARNING: THIS function is called by both time-stepping routine and subsequent reconstruction routine.

LOCAL void dual_cell_B_edge_from_tri_DG_P1(
        POLYGON      *polyg,
        POLYGON      *opolyg,
        Mid_soln     *mid_soln,
        Dual_cell_Mid_soln   *dual_cell_mid_soln,
        int          rk_iter,
        Front        *fr,
        double       Bn[][N_COEF_EDGE],
        double       partBn[][2][N_COEF_EDGE],
        int          debug)
{
	int          side, N_sides, i, dim = 2;
        TRI          *prev_tri, *next_tri;
        double       ****mass_edge = opolyg->mass_edge;
        Locstate     tri_sts[30]; 
        double       p_sqrt_area, n_sqrt_area;
        double       p_tri_Be[2][N_COEF_EDGE], n_tri_Be[2][N_COEF_EDGE];
        double       p_Bn[N_COEF_EDGE], n_Bn[N_COEF_EDGE]; 
        double       nor[3], t[3], dx, dy, x_bar, y_bar;
        double       *p_pt, *n_pt, bound;
        int          tri_comm_e, tmp_i;

        N_sides = opolyg->n_sides;
        if(rk_iter == 0) 
        {
            for(side = 0; side < N_sides; side++)
                tri_sts[side] = tri_at_polyg_vert(opolyg)[side]->st; 
        }
        else
        {
            for(side = 0; side < N_sides; side++)
                tri_sts[side] = mid_soln[tri_at_polyg_vert(opolyg)[side]->id].st[rk_iter];
        }

        /**
        if(debug == YES)
            printf("\n---START: Polygon %d in dual_cell_B_edge_from_tri_DG_P1()\n\n", opolyg->id);
        **/

        for(side = 0; side < N_sides; side++)
        {
            prev_tri = tri_at_polyg_vert(opolyg)[side];
            next_tri = tri_at_polyg_vert(opolyg)[(side+1)%N_sides];

            /// TMP
            /****
            if(debug == YES)
            {
                for(tri_comm_e = 0; tri_comm_e < 3; tri_comm_e++)
                {
                    if(Tri_on_side(prev_tri, tri_comm_e) == next_tri)
                        break;
                }
                for(tmp_i = 0; tmp_i < 3; tmp_i++)
                {
                    if(Tri_on_side(next_tri, tmp_i) == prev_tri)
                        break;
                }

                printf("\n-------side[%d], prev-tri(%p, id %d, %s) B soln:\n", 
                         side, prev_tri, prev_tri->id, print_TRI_BC_TYPE(prev_tri->BC_type));
                printf("Bx : %12.10g, %12.10g, %12.10g\n", dg_B(tri_sts[side])[0][0],
                                  dg_B(tri_sts[side])[0][1], dg_B(tri_sts[side])[0][2]);
                printf("By : %12.10g, %12.10g, %12.10g\n", dg_B(tri_sts[side])[1][0],
                                  dg_B(tri_sts[side])[1][1], dg_B(tri_sts[side])[1][2]);
                printf("On the common edge(%d) of prev-tri(%d): Bn %12.10g, %12.10g\n", tri_comm_e, prev_tri->id, 
                    fg_side_dgB(prev_tri)[tri_comm_e][0], fg_side_dgB(prev_tri)[tri_comm_e][1]);
                printf("On the common edge(%d) of next-tri(%d): Bn %12.10g, %12.10g\n", tmp_i, next_tri->id,
                    fg_side_dgB(next_tri)[tmp_i][0], fg_side_dgB(next_tri)[tmp_i][1]);
            }
            ****/
            /// END::: TMP

            p_sqrt_area = sqrt(fg_area(prev_tri));
            n_sqrt_area = sqrt(fg_area(next_tri));
            p_pt = Coords(Point_of_polyg(opolyg)[side]);   
            n_pt = Coords(Point_of_polyg(opolyg)[(side+1)%N_sides]);   
            
            for(i = 0; i < dim; i++)
                t[i] = polyg_side_vector(opolyg)[side][i];
            nor[0] = t[1]; nor[1] = -t[0];

            x_bar = 0.5*(p_pt[0] + n_pt[0]);
            y_bar = 0.5*(p_pt[1] + n_pt[1]);
            dx    = 0.5*(n_pt[0] - p_pt[0]);
            dy    = 0.5*(n_pt[1] - p_pt[1]);

            bound = mass_edge[side][0][0][0] - 1.0;

            // Bx from prev_tri
            p_tri_Be[0][0] = dg_B(tri_sts[side])[0][0] + 
                             dg_B(tri_sts[side])[0][1]/p_sqrt_area*(x_bar - p_pt[0]) +
                             dg_B(tri_sts[side])[0][2]/p_sqrt_area*(y_bar - p_pt[1]); 
           
            p_tri_Be[0][1] = dg_B(tri_sts[side])[0][1]/p_sqrt_area*dx + 
                             dg_B(tri_sts[side])[0][2]/p_sqrt_area*dy;

            // By from prev_tri
            p_tri_Be[1][0] = dg_B(tri_sts[side])[1][0] + 
                             dg_B(tri_sts[side])[1][1]/p_sqrt_area*(x_bar - p_pt[0]) +
                             dg_B(tri_sts[side])[1][2]/p_sqrt_area*(y_bar - p_pt[1]); 

            p_tri_Be[1][1] = dg_B(tri_sts[side])[1][1]/p_sqrt_area*dx + 
                             dg_B(tri_sts[side])[1][2]/p_sqrt_area*dy;

            p_Bn[0] = p_tri_Be[0][0]*nor[0] + p_tri_Be[1][0]*nor[1];
            p_Bn[1] = p_tri_Be[0][1]*nor[0] + p_tri_Be[1][1]*nor[1];

            // Bx from next_tri
            n_tri_Be[0][0] = dg_B(tri_sts[(side+1)%N_sides])[0][0] + 
                             dg_B(tri_sts[(side+1)%N_sides])[0][1]/n_sqrt_area*(x_bar - n_pt[0]) +
                             dg_B(tri_sts[(side+1)%N_sides])[0][2]/n_sqrt_area*(y_bar - n_pt[1]); 

            n_tri_Be[0][1] = dg_B(tri_sts[(side+1)%N_sides])[0][1]/n_sqrt_area*dx + 
                             dg_B(tri_sts[(side+1)%N_sides])[0][2]/n_sqrt_area*dy;

            // By from next_tri
            n_tri_Be[1][0] = dg_B(tri_sts[(side+1)%N_sides])[1][0] + 
                             dg_B(tri_sts[(side+1)%N_sides])[1][1]/n_sqrt_area*(x_bar - n_pt[0]) +
                             dg_B(tri_sts[(side+1)%N_sides])[1][2]/n_sqrt_area*(y_bar - n_pt[1]); 

            n_tri_Be[1][1] = dg_B(tri_sts[(side+1)%N_sides])[1][1]/n_sqrt_area*dx + 
                             dg_B(tri_sts[(side+1)%N_sides])[1][2]/n_sqrt_area*dy;

            n_Bn[0] = n_tri_Be[0][0]*nor[0] + n_tri_Be[1][0]*nor[1];
            n_Bn[1] = n_tri_Be[0][1]*nor[0] + n_tri_Be[1][1]*nor[1];

            /// on [-1, crs] the trace of Bn.             
            partBn[side][0][0] = p_Bn[0];
            partBn[side][0][1] = p_Bn[1];

            /// on [crs, 1] the trace of Bn.             
            partBn[side][1][0] = n_Bn[0];
            partBn[side][1][1] = n_Bn[1];

            /***
            /// from [-1, crs] to [crs, 1] \int Bn \times (1) d(\xi)
            Bn[side][0] = mass_edge[side][0][0][0]*p_Bn[0] + mass_edge[side][1][0][0]*n_Bn[0] + 
                          mass_edge[side][0][0][1]*p_Bn[1] + mass_edge[side][1][0][1]*n_Bn[1];
 
            /// from [-1, crs] to [crs, 1]. \int Bn \times (\xi) d(\xi)
            Bn[side][1] = mass_edge[side][0][1][0]*p_Bn[0] + mass_edge[side][1][1][0]*n_Bn[0] +
                          mass_edge[side][0][1][1]*p_Bn[1] + mass_edge[side][1][1][1]*n_Bn[1];
            ***/
        }

        // if(debug == YES)
        //     printf("\n---END: Polygon %d in dual_cell_B_edge_from_tri_DG_P1()\n\n", opolyg->id);
}

LOCAL void dual_cell_edge_Ez_quadrature(
        TRI      *prev_tri,
        TRI      *next_tri, 
        POINT    *prev_pt,
        POINT    *next_pt,
        Mid_soln     *mid_soln,
        int          rk_iter,
        double       *Ez,
        double       *Ez_vert,
        int          debug)
{
        Locstate st_prev, st_next;
        int          k, i;
        static int   dim = 2;// first = YES;
        double       qcrds[20][4], con_u[10];
        // static double  loc_q[9], loc_qw[9];
        double      *prev_cent, *next_cent, p_sqrt_area, n_sqrt_area;
        double      u[3], B[3];
        POINT       **prev_pts, **next_pts;
 
        if(rk_iter == 0)
        {
            st_prev = prev_tri->st;
            st_next = next_tri->st;
        }
        else
        {
            st_prev = mid_soln[prev_tri->id].st[rk_iter];
            st_next = mid_soln[next_tri->id].st[rk_iter];
        }
        prev_cent = fg_centroid(prev_tri);
        next_cent = fg_centroid(next_tri);
        p_sqrt_area = sqrt(fg_area(prev_tri));
        n_sqrt_area = sqrt(fg_area(next_tri));

        prev_pts = Point_of_tri(prev_tri);

        //// START::: Gauss quadrature
        for(k = 0; k < Gauss_N; k++)
        {
            for(i = 0; i < dim; i++)
                qcrds[k][i] = (Coords(next_pt)[i] + Coords(prev_pt)[i])/2.0 +
                           (Coords(next_pt)[i] - Coords(prev_pt)[i])/2.0*q[k];

            // if(pt_in_tri(qcrds[k], prev_tri) == YES)  
            if(Point_in_polygon_2d(prev_pts, 3, qcrds[k]) == YES)  
            {
                con_u_at_pt(st_prev, qcrds[k], prev_cent, p_sqrt_area, con_u);
                if(debug == YES)
                {
                    // print_tri_crds(prev_tri);
                    printf("quadrature (%g, %g) compute E from tri %d, state %p\n", qcrds[k][0], qcrds[k][1],
                                    prev_tri->id, st_prev);
                }
            }
            else
            {
                con_u_at_pt(st_next, qcrds[k], next_cent, n_sqrt_area, con_u);
                if(debug == YES)
                {
                    // print_tri_crds(next_tri);
                    printf("quadrature (%g, %g) compute E from tri %d, state %p\n", qcrds[k][0], qcrds[k][1],
                                    next_tri->id, st_next);
                }
            }

            for(i = 0; i < dim; i++)
                u[i] = con_u[i+1]/con_u[0]; 
            Ez[k] = -(u[0]*con_u[6] - u[1]*con_u[5]);
            /// TMP: to use exact values
            /***
            {
                double econ_u[10];
                mag_vort_sol(qcrds[k],RK_bdry_time,econ_u);
                for(i = 0; i < 2; i++)
                    u[i] = econ_u[i+1]/econ_u[0];
                Ez[k] = -(u[0]*econ_u[6] - u[1]*econ_u[5]);
            }
            ***/
            /// END::: TMP: to use exact values
        }

        con_u_at_pt(st_prev, Coords(prev_pt), prev_cent, p_sqrt_area, con_u);
        for(i = 0; i < dim; i++)
            u[i] = con_u[i+1]/con_u[0]; 
        Ez_vert[0] = -(u[0]*con_u[6] - u[1]*con_u[5]);

        con_u_at_pt(st_next, Coords(next_pt), next_cent, n_sqrt_area, con_u);
        for(i = 0; i < dim; i++)
            u[i] = con_u[i+1]/con_u[0]; 
        Ez_vert[1] = -(u[0]*con_u[6] - u[1]*con_u[5]);
  
        //// TMP::: use 0 electric flux
        // for(k = 0; k < Gauss_N; k++)
        //     Ez[k] = 0.0;
        // Ez_vert[0] = Ez_vert[1] = 0.0;
        //// END::: TMP::: use 0 electric flux

        ///// TMP:: to use exact Ez values at vertex
        /***
        {
            double econ_u[10];
            mag_vort_sol(Coords(prev_pt),RK_bdry_time,econ_u);
            for(i = 0; i < 2; i++)
                u[i] = econ_u[i+1]/econ_u[0];
            Ez_vert[0] = -(u[0]*econ_u[6] - u[1]*econ_u[5]);
        }

        {
            double econ_u[10];
            mag_vort_sol(Coords(next_pt),RK_bdry_time,econ_u);
            for(i = 0; i < 2; i++)
                u[i] = econ_u[i+1]/econ_u[0];
            Ez_vert[1] = -(u[0]*econ_u[6] - u[1]*econ_u[5]);
        }
        ***/ 
        ///// END:::::: TMP:: to use exact Ez values at vertex
        //// END::: Gauss quadrature

        //// Start::: Gauss-Lab
        ///Gauss-Lab 0 pt
        /*****
        for(i = 0; i < dim; i++)
           qcrds[0][i] = (Coords(next_pt)[i] + Coords(prev_pt)[i])/2.0 +
                      (Coords(next_pt)[i] - Coords(prev_pt)[i])/2.0*Lo_q[0];
        con_u_at_pt(st_prev, qcrds[0], prev_cent, p_sqrt_area, con_u);
        for(i = 0; i < dim; i++)
            u[i] = con_u[i+1]/con_u[0]; 
        Ez_vert[0] = Ez[0] = -(u[0]*con_u[6] - u[1]*con_u[5]);

        for(k = 1; k < Lobatto_N-1; k++)
        {
            for(i = 0; i < dim; i++)
                qcrds[k][i] = (Coords(next_pt)[i] + Coords(prev_pt)[i])/2.0 +
                           (Coords(next_pt)[i] - Coords(prev_pt)[i])/2.0*Lo_q[k];

            // if(pt_in_tri(qcrds[k], prev_tri) == YES)  
            if(Point_in_polygon_2d(prev_pts, 3, qcrds[k]) == YES)  
            {
                con_u_at_pt(st_prev, qcrds[k], prev_cent, p_sqrt_area, con_u);
                // if(debug == YES)
                //     print_tri_crds(prev_tri);
            }
            else
            {
                con_u_at_pt(st_next, qcrds[k], next_cent, n_sqrt_area, con_u);
                // if(debug == YES)
                //     print_tri_crds(next_tri);
            }

            for(i = 0; i < dim; i++)
                u[i] = con_u[i+1]/con_u[0]; 
            Ez[k] = -(u[0]*con_u[6] - u[1]*con_u[5]);
        }

        ///Gauss-Lab last pt
        for(i = 0; i < dim; i++)
           qcrds[Lobatto_N-1][i] = (Coords(next_pt)[i] + Coords(prev_pt)[i])/2.0 +
                      (Coords(next_pt)[i] - Coords(prev_pt)[i])/2.0*Lo_q[Lobatto_N-1];
        con_u_at_pt(st_next, qcrds[Lobatto_N-1], next_cent, n_sqrt_area, con_u);

        for(i = 0; i < dim; i++)
            u[i] = con_u[i+1]/con_u[0]; 
        Ez_vert[1] = Ez[Lobatto_N-1] = -(u[0]*con_u[6] - u[1]*con_u[5]);
        ****/
        //// End::: Gauss-Lab

        /***
        if(debug == YES)
        {
            printf("\nIn dual_cell_edge_Ez_quadrature(), quadrature pts\n");
            for(k = 0; k < Lobatto_N; k++)
            {
                print_general_vector("quadrature", qcrds[k], 2, "\n");
            }
            printf("\n");
            printf("Ez at quadrature pts\n");
            for(k = 0; k < Lobatto_N; k++)
                printf("Ez[%d] = %e\n", k, Ez[k]);
            printf("\n\n");
        } 
        ***/
}

LOCAL void adv_E_vertice_B_edge_DG(        
	TRI      *tri,
        TRI      *otri,
        float    dt,
        float    alpha,
        Mid_soln *mid_soln,  
        int      rk_iter,
        Front    *fr)
{
        int      i, j, k, tri_N, closed, side, N_side, n_eqn, dim = 2, eN;
        TRI      *tris[50]; 
        double   conU[50][10], tmpu[50], tmp_E_z, up[8], tmpe, sum, eps = 1.0e-6, wei[50];
        int      debug = NO, debug_flag = NO;
        Locstate prev_sts[50];
        double   *cents[50], therm_press[50], ls_A[50][3], *pt_crds, lin_fit_press[4];
        double   diam, beta = 0.5, p_grad_L1, therm_p_min, therm_p_max, p_nor[3], ang_wei[50], t[3], nor[3];
        int      p_switch = NO, N_side2, v_switch = NO, p_min_I[3];
        double   vel_u[50], vel_v[50], mag_c[50], lin_fit_u[4], lin_fit_v[4], mag_c_min;
        double   vel_div, delta = 0.1, tmp, tmp_alpha[50], max_alpha = -HUGE_VAL, max_alpha_new = - HUGE_VAL;
        double   half_tan[50], mid_ray[50][3], single_st_tmp_alpha[50];
        double   LLF_U_star[8], pre_tri_pt[3],edge_Ez[3][20];
        double   new_alpha, v_nor[3];

        // printf("\n**************Tri %d entered adv_E_vertice_B_edge_DG(), on boundary; tri type %d\n",
        //         tri->id, tri->BC_type);fflush(stdout);

        if(otri->BC_type == SUBDOMAIN)
        {
            /***
            if(tri->id == 235 || tri->id == 178)
            {
                printf("\n**************Tri %d entered adv_E_vertice_B_edge_DG(), on boundary; tri type %d\n",
                          tri->id, tri->BC_type);
                print_tri_crds(tri);
                // verbose_print_state("old tri state",otri->st);
            }
            ***/
            return;
        }

        if(Boundary_tri(otri) || tri_on_phy_bdry(tri))
        {
            if (NO == bdry_tri_adv_fw(tri,otri,fr))
                return;
        }
        //printf("After returns, begin computation.\n");

        // if(tri->id == 1866 || tri->id == 2055)
        // if(tri->id == 2552 || tri->id == 2055)
        // if(tri->id == 3763 || tri->id ==844)
        // if(tri->id == 350 || tri->id == 405)
        // if(tri->id == 235 || tri->id == 1298)
        // if(tri->id == 16081)
        // if(tri->id == 17357)
        /****
        if(tri->id == 3067)
        {
            printf("\n**************Tri %d entered adv_E_vertice_B_edge_DG()\n", tri->id);
            // print_tri_crds(tri);
            // verbose_print_state("old tri state",otri->st);
            debug = YES;
        }
        ****/

        /***
        if(otri->id == 27087)
        {
            Locstate prev_st;
            printf("***************************\n");
            printf("Tri[%d] entered In adv_E_vertice_B_edge_DG(), step %d, iteration %d\n",
                         otri->id, fr->step, rk_iter);
            print_tri_crds(otri);
            if(rk_iter == 0)
                prev_st = otri->st;
            else
                prev_st = mid_soln[otri->id].st[rk_iter];
            verbose_print_state("state", prev_st);
        }
        ***/

        // incircle diam of the tri = 2.0*2.0*area/(sum of side length) //Huijing
        diam = fg_diam(otri);

        // for 3 points of tri
        for(k = 0; k < 3; k++)
        {
            if(Index_of_point(Point_of_tri(otri)[k]) == YES && debug == NO)
            // if(Index_of_point(Point_of_tri(otri)[k]) == YES)
                continue;

            // If the pt is fully surounded by tris, return YES; otherwise return NO;
            closed = collect_tris_at_vert(otri, Point_of_tri(otri)[k], tris, &tri_N);
            if(tri_N >= 50)
            {
                printf("ERROR: adv_E_vertice_B_edge_DG(), exceed allocated %d\n", tri_N );
                clean_up(ERROR);
            }

            //////  Compute Pressure gradient by least square
            if(rk_iter == 0)
            {
                for(i = 0; i < tri_N; i++)
                    prev_sts[i] = tris[i]->st;
            }
            else
            {
                for(i = 0; i < tri_N; i++)
                    prev_sts[i] = mid_soln[tris[i]->id].st[rk_iter];
            }

            pt_crds = Coords(Point_of_tri(otri)[k]);
            therm_p_min = HUGE_VAL; therm_p_max = -HUGE_VAL;
            mag_c_min = HUGE_VAL;
            for(i = 0; i < tri_N; i++) 
            {
                cents[i] = fg_centroid(tris[i]);
                // gas pressure from equation of state
                therm_press[i] = POLY_thermal_pressure_MHD(prev_sts[i]);

                ls_A[i][0] = 1.0;
                ls_A[i][1] = cents[i][0] - pt_crds[0];
                ls_A[i][2] = cents[i][1] - pt_crds[1];
                if(therm_press[i] < therm_p_min)
                {
                    therm_p_min = therm_press[i];
                    p_min_I[k] = i;
                }
                if(therm_press[i] > therm_p_max)
                    therm_p_max = therm_press[i];

                vel_u[i] = Mom(prev_sts[i])[0]/Dens(prev_sts[i]);
                vel_v[i] = Mom(prev_sts[i])[1]/Dens(prev_sts[i]);
                mag_c[i] = POLY_magnetosonic_speed_MHD(prev_sts[i]);
                if(mag_c[i] < mag_c_min)
                    mag_c_min = mag_c[i];
            }
            // Linear lease squares: x = (A'A)^(-1)A'b 
            least_sqr_fit_linear_poly(therm_press, ls_A, tri_N, lin_fit_press);

            // SW1 
            p_grad_L1 = diam*(fabs(lin_fit_press[1]) + fabs(lin_fit_press[2])); // undivided diff
            if(p_grad_L1 > (beta*therm_p_min))
                p_switch = YES;
            else
                p_switch = NO;

            sum = sqrt(sqr(lin_fit_press[1]) + sqr(lin_fit_press[2]));
            if(sum < 1.0e-12)
                sum = 1.0e-12; // to avoid division by zero.
            p_nor[0] = lin_fit_press[1]/sum; 
            p_nor[1] = lin_fit_press[2]/sum; 

            // SW2 
            least_sqr_fit_linear_poly(vel_u, ls_A, tri_N, lin_fit_u);
            least_sqr_fit_linear_poly(vel_v, ls_A, tri_N, lin_fit_v);
            vel_div = diam*(lin_fit_u[1] + lin_fit_v[2]); // undivided diff to approx. divergence of velocity
            if((-delta*mag_c_min) > vel_div)
            {
                v_switch = YES;
                sum = sqrt(sqr(lin_fit_u[0]) + sqr(lin_fit_v[0]));
                v_nor[0] = lin_fit_u[0]/sum;
                v_nor[1] = lin_fit_v[0]/sum;
            }
            else
                v_switch = NO;

            // printf("L4177\n");fflush(stdout);

            if(debug == YES)
            {
                printf("adv_E_vertice_B_edge_DG(), vel_div %g, mag_c_min = %g, p_grad_L1 %g, p_min = %g, p_max =%g "
                       "  on tri %d, p_switch = %d, v_switch = %d, p_min_I = %d\n",
                             vel_div, -delta*mag_c_min, p_grad_L1, therm_p_min, therm_p_max, otri->id, p_switch, v_switch, p_min_I[k]);
            }

            //// TMP
            if(debug == YES)
                printf("tri_N collected = %d for vertex %d, coords[%g, %g]\n", tri_N, k, 
                    Coords(Point_of_tri(otri)[k])[0],
                    Coords(Point_of_tri(otri)[k])[1]);

            /****
            N_side = 0;
            for(i = 0; i < tri_N; i++)
            {
                for(side = 0; side < 3; side++)
                {
                    if(Point_of_tri(tris[i])[side] == Point_of_tri(otri)[k])
                    {
                        if(NULL != Tri_on_side(tris[i],side))
                        {
                            half_tan[N_side] = tan_half_angle(Point_of_tri(otri)[k], tris[i], side,mid_ray[N_side]);
                            single_st_tmp_alpha[N_side] = Max_wave_speed_at_pt_single_state(Point_of_tri(otri)[k],tris[i], 
                                                         mid_soln, rk_iter, mid_ray[N_side], NO);
                            N_side++;
                        }
                    }
                }
            }
            ****/
            
            /// compute HLL FLUX or LLF flux at the point for each tri
            /// The pt is at the starting point of the edge.
            ///// Begin: compute max_wave_speed at vertex
            N_side = 0;
            max_alpha = -HUGE_VAL;
            max_alpha_new = -HUGE_VAL;

            for(i = 0; i < tri_N; i++)
            {
                for(side = 0; side < 3; side++)
                {
                    if(Point_of_tri(tris[i])[side] == Point_of_tri(otri)[k])
                    {
                        if(NULL != Tri_on_side(tris[i],side))
                        {
                            //get alpha from both tris of the same side
                            // tmp_alpha[N_side] = Max_wave_speed_at_pt(Point_of_tri(otri)[k], tris[i], side, mid_soln, rk_iter, NO);
                            tmp_alpha[N_side] = Max_wave_speed_at_pt(Point_of_tri(otri)[k], tris[i], side, mid_soln, rk_iter, debug);
                            N_side++;
                        }
                    } 
                }
            }

            //printf("L4234\n");fflush(stdout);

            // printf("L4234: N_side=%d tri_N=%d\n",N_side,tri_N);
            // return;

            for(i = 0; i < N_side; i++)
            {
                if(tmp_alpha[i] > max_alpha)
                    max_alpha_new = max_alpha = tmp_alpha[i];
                // if(single_st_tmp_alpha[i] > max_alpha_new)
                //     max_alpha_new = single_st_tmp_alpha[i];
            }
            ///// End: compute max_wave_speed at vertex


            //// Begin: Simple LLF wave model
            //// LLF_U_star_model(Coords(Point_of_tri(otri)[k]), mid_soln, rk_iter, tris, max_alpha_new, half_tan,mid_ray, tri_N, LLF_U_star,debug);
            //// END: Simple LLF wave model

            N_side = 0;
            for(i = 0; i < tri_N; i++)
            {
                for(side = 0; side < 3; side++) // test which tris[i][side] is the point of interest
                {
                    if(Point_of_tri(tris[i])[side] == Point_of_tri(otri)[k])
                    {
                        if(NULL != Tri_on_side(tris[i],side))
                        {
                            // HLL_soln_at_pt(Point_of_tri(otri)[k], tris[i], side, mid_soln, rk_iter, conU[N_side]);
                             
                            /***
                            if(tri->id == 0 && tris[i]->id == 152 && debug == YES)
                            {
                                printf("work with neighboring tri[%d], its crds\n", tris[i]->id);
                                print_tri_crds(tris[i]);
                                verbose_print_state("nghbr tri state",tris[i]->st);
                                debug_flag = YES;
                            }    
                            else
                                debug_flag = NO;
                            ***/
                            /**
                            if(debug == YES && 
                               ((tris[i]->id == 1299 && side == 2) || (tris[i]->id == 179 && side == 2)) 
                              )
                            {
                                printf("work with neighboring tri[%d], side[%d]\n", tris[i]->id, side);
                                print_tri_crds(tris[i]);
                                verbose_print_state("nghbr tri state",tris[i]->st);
                                debug_flag = YES;
                            }
                            else 
                                debug_flag = NO;
                            **/
                            // conU[N_side][0] = Elec_z_at_pt(Point_of_tri(otri)[k], tris[i], side, mid_soln, rk_iter, debug_flag);
                            tmpu[N_side] = Elec_z_at_pt(Point_of_tri(otri)[k], tris[i], side, mid_soln, rk_iter, max_alpha, debug);
                            N_side++;
                            //// TMP
                            /****
                            if(debug == YES && ( (tri->id == 235 && k == 0) || (tri->id == 235 && k == 1) 
                                              || (tri->id == 1298 && k == 1) || (tri->id == 1298 && k == 2)  ))
                            {
                                if((tris[i]->id == 1299 && side == 2) || (tris[i]->id == 179 && side == 2))
                                {
                                    printf("tri[%d], edge[%d] E_z = %g, for pt[%d] coords(%g %g)\n", 
                                            tris[i]->id, side, conU[N_side-1][0], k, 
                                        Coords(Point_of_tri(otri)[k])[0], Coords(Point_of_tri(otri)[k])[1] );
                                    printf("on side[%d] dir[%g %g]\n", side, fg_side_vector(tris[i])[side][0], fg_side_vector(tris[i])[side][1]);
                                }
                            }
                            ****/
                        }
                    }
                }
            }

            Index_of_point(Point_of_tri(otri)[k]) = YES;

            if(p_switch == NO && v_switch == YES)
            {   ///// use compressive velocity's direction instead
                p_nor[0] = v_nor[0];
                p_nor[1] = v_nor[1];
            }

            tmp_E_z = 0.0;
            sum = 0.0;
            // p_switch = YES;
            if(p_switch == YES || v_switch == YES)
            {
                // printf("P_switch enabled\n");
                // printf("adv_E_vertice_B_edge_DG(), vel_div %g, mag_c_min = %g, p_grad_L1 %g, p_min = %g   on tri %d\n",
                //                  vel_div, -delta*mag_c_min, p_grad_L1, beta*therm_p_min, otri->id);

                N_side2 = 0;
                for(i = 0; i < tri_N; i++)
                {
                    for(side = 0; side < 3; side++)
                    {
                        if(Point_of_tri(tris[i])[side] == Point_of_tri(otri)[k])
                        {
                            if(NULL != Tri_on_side(tris[i],side))
                            {
                                for(j = 0; j < dim; j++)
                                    t[j] = fg_side_vector(tris[i])[side][j];
                                nor[0] = t[1];
                                nor[1] = -t[0];
                                tmp = nor[0]*p_nor[0] + nor[1]*p_nor[1]; //// use dot product.
                                ang_wei[N_side2] = sqr(sqr(tmp)); 
                                N_side2++;
                            }
                        }
                    }
                }

                for(i = 0; i < N_side; i++)
                {
                    wei[i] = (ang_wei[i]+1.0e-11);
                    // wei[i] = 1.0;
                    sum += wei[i];
                    // wei[i] = (ang_wei[i])*fabs(tmpu[i]) + 1.0e-10;
                    // sum += wei[i];
                }
                tmp_E_z = 0.0; 
                for(i = 0; i < N_side; i++)
                    // tmp_E_z += (ang_wei[i]+1.0e-11)/sum*tmpu[i];
                    tmp_E_z += wei[i]/sum*tmpu[i];

                ///// test:::::::
                /***
                {
                    int    found_tri_id = -100;

                    if(p_switch == YES)
                    {
                        for(i = 0; i < dim; i++)
                            pre_tri_pt[i] = pt_crds[i] - p_nor[i]*diam*0.1;
                        for(i = 0; i < tri_N; i++)
                        {
                            if(pt_in_tri(pre_tri_pt, tris[i]) == YES)
                            {
                                found_tri_id = i;
                                break;
                            }
                        }

                        if(found_tri_id >= 0)
                        {
                            if(debug)
                            {
                                printf("found tri--[%d], press[%g]\n", found_tri_id, therm_press[found_tri_id]);
                            }
                            if(found_tri_id == 0)
                            {
                                if(fabs(tmpu[0]) < fabs(tmpu[N_side-1]))
                                    tmp_E_z = tmpu[0];
                                else
                                    tmp_E_z = tmpu[N_side-1];
                            }
                            else
                            {
                                if(fabs(tmpu[found_tri_id]) < fabs(tmpu[found_tri_id-1]))
                                    tmp_E_z = tmpu[found_tri_id];
                                else
                                    tmp_E_z = tmpu[found_tri_id-1];
                            }
                        }
                        else
                        {
                            if(p_min_I[k] == 0)
                            {
                                if(fabs(tmpu[0]) < fabs(tmpu[N_side-1]))
                                    tmp_E_z = tmpu[0];
                                else
                                    tmp_E_z = tmpu[N_side-1];
                            }
                            else
                            {
                                if(fabs(tmpu[p_min_I[k]]) < fabs(tmpu[p_min_I[k]-1]))
                                    tmp_E_z = tmpu[p_min_I[k]];
                                else
                                    tmp_E_z = tmpu[p_min_I[k]-1];
                            }
                        }
                    }
                    else
                    {
                        if(p_min_I[k] == 0)
                        {
                            if(fabs(tmpu[0]) < fabs(tmpu[N_side-1]))
                                tmp_E_z = tmpu[0];
                            else
                                tmp_E_z = tmpu[N_side-1];
                        }
                        else
                        {
                            if(fabs(tmpu[p_min_I[k]]) < fabs(tmpu[p_min_I[k]-1]))
                                tmp_E_z = tmpu[p_min_I[k]];
                            else
                                tmp_E_z = tmpu[p_min_I[k]-1];
                        }
                    }
                    if(debug)
                    {
                        printf("tri[%d], press[%g], Ez[%g, %g]\n", 0, therm_press[0], tmpu[0], tmpu[N_side-1]);
                        for(i = 1; i < N_side; i++)
                        {
                            printf("tri[%d], press[%g], Ez[%g, %g]\n", i, therm_press[i], tmpu[i], tmpu[i-1]);
                        }
                    }
                } 
                *****/
                ///// END::: test:::::::

                if(isnan(tmp_E_z))
                {
                    printf("ERROR: adv_E_vertice_B_edge_DG(), tmp_E_z is Nan, tri[%d], sum %g\n",otri->id, sum);
                    printf("linear fit press %g %g %g\n", lin_fit_press[0], lin_fit_press[1], lin_fit_press[2]);
                    printf("surrounding press\n");
                    for(i = 0; i < tri_N; i++)
                        printf("press[%d] = %g\n", i, therm_press[i]);
                   
                    clean_up(ERROR);
                }
            }
            else ///  if(p_switch == YES || v_switch == YES)
            {
                if(MAX_N_COEF == 3 || MAX_N_COEF == 1)
                {
                    for(i = 0; i < N_side; i++)
                    {
                        tmp_E_z += tmpu[i];
                        // wei[i] = 1.0/(eps + sqr(tmpu[i])); // for weighted combine
                        // wei[i] = 1.0/(eps + fabs(tmpu[i])); // for weighted combine
                        // wei[i] = (1.0e-10 + fabs(tmpu[i]));    // for weighted combine
                        // sum += wei[i];                           // for weighted combine
                        // printf("add[%d] = %g to sume of Ez\n", i, conU[i][0]); 
                    }
                    tmp_E_z /= N_side;

                    // tmp_E_z = biased_min_mod(tmpu,N_side,tmp_E_z);
                    //// weighted combine
                    /// tmp_E_z = 0.0;
                    /// for(i = 0; i < N_side; i++)
                    ///     tmp_E_z += wei[i]/sum*tmpu[i];
                    //// END: weighted combine
                }
                else if(MAX_N_COEF == 6)
                {
                    for(i = 0; i < N_side; i++)
                    {
                        tmp_E_z += tmpu[i];
                        // wei[i] = 1.0/(eps + fabs(tmpu[i])); // for weighted combine
                        // wei[i] = (1.0e-10 + fabs(tmpu[i]));    // for weighted combine
                        // sum += wei[i];                           // for weighted combine
                    }
                    tmp_E_z /= N_side;
    
                    // tmp_E_z = biased_min_mod(tmpu,N_side,tmp_E_z);
                    //// weighted combine
                    //// tmp_E_z = 0.0;
                    //// for(i = 0; i < N_side; i++)
                    ////     tmp_E_z += wei[i]/sum*tmpu[i];
                    //// END: weighted combine
                }
            }

            E_z_of_point(Point_of_tri(otri)[k]) = tmp_E_z;
            E_z_of_tri(otri)[k] = tmp_E_z;

            // printf("\ntri[%d], average point Ez = %13.12g, N_side = %d for point %d\n",
            //                  otri->id, tmp_E_z, N_side, k);

            if(debug == YES)
            {
                /***
                mag_vort_sol(Coords(Point_of_tri(otri)[k]),fr->time,tmpu);
                for(i = 0; i < 2; i++)
                    up[i+1] = tmpu[i+1]/tmpu[0];
                tmpe = -up[1]*tmpu[6] + up[2]*tmpu[5];

                printf("\ntri[%d], average point Ez = %13.12g, N_side = %d for point %d, analytic Ez = %g\n",
                             tri->id, tmp_E_z, N_side, k, tmpe);
                ***/
                printf("Ez at edges: ");
                for(i = 0; i < N_side; i++) printf("%g; ", tmpu[i]);
                printf(" Weighted Ez = %g", tmp_E_z);
                printf("\n");

                printf("alpha at edges: ");
                for(i = 0; i < N_side; i++) printf("%g; ", tmp_alpha[i]);
                printf("\n");
                
                // printf("Weight at edges: ");
                // for(i = 0; i < N_side; i++) //     printf("%g; ", wei[i]/sum); // printf("\n");

                printf("press of tris: ");
                for(i = 0; i < tri_N; i++)
                    printf("%g; ", therm_press[i]);
                printf("\n");
                
                // printf("LLF_U_star: rho= %g, mom[0]= %g, mom[1]= %g, Bx= %g, By= %g, E_z=%g\n", 
                //         LLF_U_star[0], LLF_U_star[1], LLF_U_star[2], LLF_U_star[5], LLF_U_star[6],
                //         LLF_U_star[2]/LLF_U_star[0]*LLF_U_star[5] - LLF_U_star[1]/LLF_U_star[0]*LLF_U_star[6]);
                printf("max_alpha_new = %g\n\n", max_alpha_new);
                // printf("fr->time = %g, dt = %g\n", fr->time, dt);
                // clean_up(0);

                if(isnan(tmp_E_z))
                {
                    printf("ERROR: adv_E_vertice_B_edge_DG(), point E_z nan\n");
                    printf("point %g %g of tri[%d]\n", Coords(Point_of_tri(otri)[k])[0], 
                                 Coords(Point_of_tri(otri)[k])[1], otri->id);
                    print_tri_crds(otri);
                    clean_up(ERROR);
                }
            } //// END::: if(debug == YES)
 
            /// Compute Ez at Gauss quadrature point on edge
            /// edge_Ez_quadrature(tri,otri,max_alpha,mid_soln,rk_iter,fr,k, edge_Ez[k]);
        } /// END: for(k = 0; k < 3; k++) // for 3 points of tri

        //// Now compute normal B on edge.
        // try to change it to DG 
        for(side = 0; side < 3; side++)
        {
            if(Index_of_point(Point_of_tri(otri)[side]) == YES &&
               Index_of_point(Point_of_tri(otri)[(side+1)%3]) == YES)
            {
                // adv_B_edge(side, otri, tri, dt, rk_iter, mid_soln, fr); //OLD for FV
                // printf("----!!Begin edge_Ez_quadrature()\n");
                new_alpha = edge_Ez_quadrature(tri,otri,max_alpha,mid_soln,rk_iter,fr,side, edge_Ez[side]);
                // printf("----!!After edge_Ez_quadrature()\n");
                // printf("----!!Before adv_B_edge_DG()\n");
                adv_B_edge_DG(side,tri,otri,dt,new_alpha,mid_soln,rk_iter,fr,NULL,YES,YES,YES,edge_Ez[side]);
                // printf("----!!After adv_B_edge_DG()\n");
            }
            else
            {
                printf("ERROR: adv_E_vertice_B_edge_DG(), E_z of tri[%d] was not computed\n", tri->id);
                clean_up(ERROR);  
            }
        }
        if(YES == debug)
        {
            printf("\n**************Tri %d left adv_E_vertice_B_edge_DG()\n", tri->id);fflush(stdout);
        }
}

LOCAL double edge_Ez_quadrature(
        TRI      *tri,
        TRI      *otri,
        float    alpha,
        Mid_soln *mid_soln,
        int      rk_iter,
        Front    *fr,
        int      side,
        float    *Ez)
{
        TRI      *nbtri;
        POINT    *p[3], *vertex;
        int      i, k, dim = 2;
        float    *pcrds[4], qcrds[4], crds[2];
        double   *cent = fg_centroid(otri);
        double   *nbcent, vh_pt, con_u[10], nbcon_u[10];
        static   Locstate Tstl = NULL, Tstr;
        Locstate stl = NULL, str;
        double   nor[3], t[3];
        double   new_alpha, max_alpha = 0.0, sqrt_area, nb_sqrt_area; 

        if(Tstl == NULL)
        {
            g_alloc_state(&Tstl, fr->sizest);
            g_alloc_state(&Tstr, fr->sizest);
            assign(Tstl, otri->st, fr->sizest);
            assign(Tstr, otri->st, fr->sizest);
        }

        nbtri = Tri_on_side(otri, side);
        nbcent = fg_centroid(nbtri);

        sqrt_area = sqrt(fg_area(otri));
        nb_sqrt_area = sqrt(fg_area(nbtri));

        //////
        if(rk_iter == 0)
        {
            stl = otri->st;
            str = nbtri->st;
        }
        else
        {
            stl = mid_soln[otri->id].st[rk_iter];
            str = mid_soln[nbtri->id].st[rk_iter];
        }
        assign(Tstl, stl, fr->sizest);
        assign(Tstr, str, fr->sizest);
        //////


        for(i = 0; i < 3; i++)
        {
            p[i] = Point_of_tri(otri)[i];
            pcrds[i] = Coords(p[i]);
        }

        for(i = 0; i < dim; i++)
            t[i] = fg_side_vector(otri)[side][i];

        nor[0] = t[1];
        nor[1] = -t[0];

        // printf("L4640::Tstl=%g %g %g %g\n",dg_Dens(Tstl)[0],dg_Mom(Tstl)[0][0],dg_Mom(Tstl)[1][0],dg_B(Tstl)[0][0]);
        // printf("L4641::Tstr=%g %g %g %g\n\n",dg_Dens(Tstr)[0],dg_Mom(Tstr)[0][0],dg_Mom(Tstr)[1][0],dg_B(Tstr)[0][0]);


        /// compare Gauss and Gauss-Lab
        for(k = 0; k < Gauss_N; k++)
        {
            for(i = 0; i < dim; i++)
                qcrds[i] = (pcrds[(side+1)%3][i] + pcrds[side][i])/2.0 +
                           (pcrds[(side+1)%3][i] - pcrds[side][i])/2.0*q[k];
                // vh_pt = vh_val(qcrds, cent, indx);
            con_u_at_pt(stl, qcrds, cent, sqrt_area, con_u);//value at edge from surface momens
            con_u_at_pt(str, qcrds, nbcent, nb_sqrt_area, nbcon_u);

            con_u_to_state(con_u, dim, Tstl);
            con_u_to_state(nbcon_u, dim, Tstr);

            // printf("total moms:\n");
            // printf("Tstl=%g %g %g %g\n",Dens(Tstl),Mom(Tstl)[0],Mom(Tstl)[1],Mag(Tstl)[0]);
            // printf("Tstr=%g %g %g %g\n\n",Dens(Tstr),Mom(Tstr)[0],Mom(Tstr)[1],Mag(Tstr)[0]);

            // printf("Before compute_alpha_of_LF_flux_MHD()\n");
            //max speed from both tris at the same boundary point
            new_alpha = compute_alpha_of_LF_flux_MHD(Tstl,Tstr,nor);
            // printf("After compute_alpha_of_LF_flux_MHD()\n");
            if(isnan(new_alpha))
            {
                printf("new_alpha=%g\n",new_alpha);
                clean_up(ERROR);
            }

            if(new_alpha > max_alpha)
                max_alpha = new_alpha;

            // printf("Before LF_soln_ideal_MHD_Elec_z()\n");
#if defined(Ez_USE_LF_FLUX)  
            Ez[k] = LF_soln_ideal_MHD_Elec_z(con_u,nbcon_u,nor,Tstl,Tstr,new_alpha,NO);
#else // if defined(Ez_USE_LF_FLUX)
            Ez[k] = (HLL_soln_ideal_MHD_Elec_z(con_u,nbcon_u,nor,Tstl,Tstr,NO));
#endif // if defined(Ez_USE_LF_FLUX)
            // printf("Before LF_soln_ideal_MHD_Elec_z()\n");fflush(stdout);
            if(isnan(Ez[k]))
            {              
                printf("Ez=%g new_alpha=%g\n",Ez[k],new_alpha);fflush(stdout);
                /***
                printf("Ez[%d]=nan alpha=%g\ncon_u=",alpha, k);
                for(i=0;i<N_EQN;i++)
                    printf("%g ",con_u[i]);
                printf("\n\nnbcon_u=");
                for(i=0;i<N_EQN;i++)
                    printf("%g ",nbcon_u[i]);
                printf("\n\n");
                printf("nor=(%g %g)\n",nor[0],nor[1]);
                Ez[k] = LF_soln_ideal_MHD_Elec_z(con_u,nbcon_u,nor,Tstl,Tstr,alpha,YES);    
                ***/
                clean_up(ERROR);
            }              
        }

        return max_alpha;
}

LOCAL void adv_E_vertice_B_edge_LLF_U_star(        
	TRI      *tri,
        TRI      *otri,
        float    dt,
        float    alpha,
        Mid_soln *mid_soln,  
        int      rk_iter,
        Front    *fr)
{
        int      i, j, k, tri_N, closed, side, N_side, n_eqn, dim = 2;
        TRI      *tris[50]; 
        double   conU[50][8], tmpu[50], tmp_E_z, up[8], tmpe, sum, eps = 1.0e-6, wei[50];
        int      debug = NO, debug_flag = NO;
        Locstate prev_sts[50];
        double   *cents[50], therm_press[50], ls_A[50][3], *pt_crds, lin_fit_press[4];
        double   diam, beta = 0.5, p_grad_L1, therm_p_min, p_nor[3], ang_wei[50], t[3], nor[3];
        int      p_switch, N_side2, v_switch;
        double   vel_u[50], vel_v[50], mag_c[50], lin_fit_u[4], lin_fit_v[4], mag_c_min;
        double   vel_div, delta = 0.1, tmp, tmp_alpha[50], max_alpha = -HUGE_VAL, max_alpha_new = - HUGE_VAL;
        double   half_tan[50], mid_ray[50][3], single_st_tmp_alpha[50];
        double   LLF_U_star[8];

        // printf("\n**************Tri %d entered adv_E_vertice_B_edge(), on boundary; tri type %d\n",
        //                   tri->id, tri->BC_type);

        if(otri->BC_type == SUBDOMAIN)
        {
            /***
            if(tri->id == 235 || tri->id == 178)
            {
                printf("\n**************Tri %d entered adv_E_vertice_B_edge(), on boundary; tri type %d\n",
                          tri->id, tri->BC_type);
                print_tri_crds(tri);
                // verbose_print_state("old tri state",otri->st);
            }
            ***/
            return;
        }

        if(Boundary_tri(otri) || tri_on_phy_bdry(tri))
        {
            if (NO == bdry_tri_adv_fw(tri,otri,fr))
                return;
        }

        // if(tri->id == 1866 || tri->id == 2055)
        // if(tri->id == 2552 || tri->id == 2055)
        // if(tri->id == 3763 || tri->id ==844)
        // if(tri->id == 350 || tri->id == 405)
        // if(tri->id == 235 || tri->id == 1298)
        if(tri->id == 16432)
        {
            printf("\n**************Tri %d entered adv_E_vertice_B_edge()\n", tri->id);
            // print_tri_crds(tri);
            // verbose_print_state("old tri state",otri->st);
            debug = YES;
        }

        diam = fg_diam(otri);

        for(k = 0; k < 3; k++)
        {
            if(Index_of_point(Point_of_tri(otri)[k]) == YES && debug == NO)
            // if(Index_of_point(Point_of_tri(otri)[k]) == YES)
                continue;

            closed = collect_tris_at_vert(otri, Point_of_tri(otri)[k], tris, &tri_N);
            //////  Compute Pressure gradient by least square
            if(rk_iter == 0)
            {
                for(i = 0; i < tri_N; i++)
                    prev_sts[i] = tris[i]->st;
            }
            else
            {
                for(i = 0; i < tri_N; i++)
                    prev_sts[i] = mid_soln[tris[i]->id].st[rk_iter];
            }

            pt_crds = Coords(Point_of_tri(otri)[k]);
            therm_p_min = HUGE_VAL;
            mag_c_min = HUGE_VAL;
            for(i = 0; i < tri_N; i++) 
            {
                cents[i] = fg_centroid(tris[i]);
                therm_press[i] = POLY_thermal_pressure_MHD(prev_sts[i]);

                ls_A[i][0] = 1.0;
                ls_A[i][1] = cents[i][0] - pt_crds[0];
                ls_A[i][2] = cents[i][1] - pt_crds[1];
                if(therm_press[i] < therm_p_min)
                    therm_p_min = therm_press[i];

                vel_u[i] = Mom(prev_sts[i])[0]/Dens(prev_sts[i]);
                vel_v[i] = Mom(prev_sts[i])[1]/Dens(prev_sts[i]);
                mag_c[i] = POLY_magnetosonic_speed_MHD(prev_sts[i]);
                if(mag_c[i] < mag_c_min)
                    mag_c_min = mag_c[i];
            }
            least_sqr_fit_linear_poly(therm_press, ls_A, tri_N, lin_fit_press);

            p_grad_L1 = diam*(fabs(lin_fit_press[1]) + fabs(lin_fit_press[2])); // undivided diff
            if(p_grad_L1 > (beta*therm_p_min))
                p_switch = YES;
            else
                p_switch = NO;

            sum = sqrt(sqr(lin_fit_press[1]) + sqr(lin_fit_press[2]));
            if(sum < 1.0e-12)
                sum = 1.0e-12; // to avoid division by zero.
            p_nor[0] = lin_fit_press[1]/sum; 
            p_nor[1] = lin_fit_press[2]/sum; 


            least_sqr_fit_linear_poly(vel_u, ls_A, tri_N, lin_fit_u);
            least_sqr_fit_linear_poly(vel_v, ls_A, tri_N, lin_fit_v);
            vel_div = diam*(lin_fit_u[1] + lin_fit_v[2]); // undivided diff to approx. divergence of velocity
            if((-delta*mag_c_min) > vel_div)
            {
                v_switch = YES;
                // printf("adv_E_vertice_B_edge(), v_switch is on for tri %d\n", otri->id);
            }
            else
                v_switch = NO;

            if(debug == YES)
            {
                printf("adv_E_vertice_B_edge(), vel_div %g, mag_c_min = %g, p_grad_L1 %g, p_min = %g "
                       "  on tri %di, p_switch = %d, v_switch = %d\n",
                             vel_div, -delta*mag_c_min, p_grad_L1, beta*therm_p_min, otri->id, p_switch, v_switch);
            }
            //// TMP --- sanity check
            /***
            for(i = 0; i < tri_N; i++)
            {
                if(Point_of_tri(otri)[k] != Point_of_tri(tris[i])[0] &&
                   Point_of_tri(otri)[k] != Point_of_tri(tris[i])[1] &&
                   Point_of_tri(otri)[k] != Point_of_tri(tris[i])[2])
                {
                     printf("ERROR: adv_E_vertice_B_edge(), tri do not share vertex\n");
                }
                if(i < tri_N-1)
                {
                    if(Tri_on_side(tris[i],0) != tris[i+1] &&
                       Tri_on_side(tris[i],1) != tris[i+1] &&
                       Tri_on_side(tris[i],2) != tris[i+1])
                    {
                        printf("ERROR: adv_E_vertice_B_edge(), tri do not share edge\n");
                    }
                }
            }
            if(Tri_on_side(tris[0],0) != tris[tri_N-1] &&
               Tri_on_side(tris[0],1) != tris[tri_N-1] &&
               Tri_on_side(tris[0],2) != tris[tri_N-1])
            {
                printf("ERROR: adv_E_vertice_B_edge(), do not find all tris at vertex %d\n", tri_N );
                clean_up(ERROR);
            }
            ***/

            if(tri_N >= 50)
            {
                printf("ERROR: adv_E_vertice_B_edge(), exceed allocated %d\n", tri_N );
                clean_up(ERROR);
            }

            //// TMP
            if(debug == YES)
                printf("tri_N collected = %d for vertex %d, coords[%g, %g]\n", tri_N, k, 
                    Coords(Point_of_tri(otri)[k])[0],
                    Coords(Point_of_tri(otri)[k])[1]);

            N_side = 0;
            for(i = 0; i < tri_N; i++)
            {
                for(side = 0; side < 3; side++)
                {
                    if(Point_of_tri(tris[i])[side] == Point_of_tri(otri)[k])
                    {
                        if(NULL != Tri_on_side(tris[i],side))
                        {
                            half_tan[N_side] = tan_half_angle(Point_of_tri(otri)[k], tris[i], side,mid_ray[N_side]);
                            single_st_tmp_alpha[N_side] = Max_wave_speed_at_pt_single_state(Point_of_tri(otri)[k],tris[i], 
                                                         mid_soln, rk_iter, mid_ray[N_side], NO);
                            N_side++;
                        }
                    }
                }
            }
            
            /// compute HLL FLUX or LLF flux at the point for each tri
            /// The pt is at the starting point of the edge.
            ///// Begin: compute max_wave_speed at vertex
            N_side = 0;
            max_alpha = -HUGE_VAL;
            max_alpha_new = -HUGE_VAL;
            for(i = 0; i < tri_N; i++)
            {
                for(side = 0; side < 3; side++)
                {
                    if(Point_of_tri(tris[i])[side] == Point_of_tri(otri)[k])
                    {
                        if(NULL != Tri_on_side(tris[i],side))
                        {
                            tmp_alpha[N_side] = Max_wave_speed_at_pt(Point_of_tri(otri)[k], tris[i], side, mid_soln, rk_iter, NO);
                            N_side++;
                        }
                    } 
                }
            }

            for(i = 0; i < N_side; i++)
            {
                if(tmp_alpha[i] > max_alpha)
                    max_alpha_new = max_alpha = tmp_alpha[i];
                if(single_st_tmp_alpha[i] > max_alpha_new)
                    max_alpha_new = single_st_tmp_alpha[i];
            }
            ///// End: compute max_wave_speed at vertex

            //// Begin: Simple LLF wave model
            LLF_U_star_model(Coords(Point_of_tri(otri)[k]), mid_soln, rk_iter, tris, max_alpha_new, half_tan,mid_ray, tri_N, LLF_U_star,debug);
            tmp_E_z = LLF_U_star[2]/LLF_U_star[0]*LLF_U_star[5] - LLF_U_star[1]/LLF_U_star[0]*LLF_U_star[6];

            Index_of_point(Point_of_tri(otri)[k]) = YES;
            E_z_of_point(Point_of_tri(otri)[k]) = tmp_E_z;
            E_z_of_tri(otri)[k] = tmp_E_z;
            //// END: Simple LLF wave model

            /*****
            N_side = 0;
            for(i = 0; i < tri_N; i++)
            {
                for(side = 0; side < 3; side++) // test which tris[i][side] is the point of interest
                {
                    if(Point_of_tri(tris[i])[side] == Point_of_tri(otri)[k])
                    {
                        if(NULL != Tri_on_side(tris[i],side))
                        {
                            // HLL_soln_at_pt(Point_of_tri(otri)[k], tris[i], side, mid_soln, rk_iter, conU[N_side]);
                             
                            // conU[N_side][0] = Elec_z_at_pt(Point_of_tri(otri)[k], tris[i], side, mid_soln, rk_iter, debug_flag);
                            tmpu[N_side] = Elec_z_at_pt(Point_of_tri(otri)[k], tris[i], side, mid_soln, rk_iter, max_alpha, debug_flag);
                            N_side++;
                            //// TMP
                        }
                    }
                }
            }

            ////  average soln (electric E_z) at the point.
            Index_of_point(Point_of_tri(otri)[k]) = YES;

            tmp_E_z = 0.0;
            sum = 0.0;
            // p_switch = YES;
            if(p_switch == YES || v_switch == YES)
            {
                // printf("P_switch enabled\n");
                // printf("adv_E_vertice_B_edge(), vel_div %g, mag_c_min = %g, p_grad_L1 %g, p_min = %g   on tri %d\n",
                //                  vel_div, -delta*mag_c_min, p_grad_L1, beta*therm_p_min, otri->id);

                N_side2 = 0;
                for(i = 0; i < tri_N; i++)
                {
                    for(side = 0; side < 3; side++)
                    {
                        if(Point_of_tri(tris[i])[side] == Point_of_tri(otri)[k])
                        {
                            if(NULL != Tri_on_side(tris[i],side))
                            {
                                for(j = 0; j < dim; j++)
                                    t[j] = fg_side_vector(tris[i])[side][j];
                                nor[0] = t[1];
                                nor[1] = -t[0];
                                tmp = nor[0]*p_nor[0] + nor[1]*p_nor[1]; //// use dot product.
                                ang_wei[N_side2] = sqr(sqr(tmp)); 
                                N_side2++;
                            }
                        }
                    }
                }

                for(i = 0; i < N_side; i++)
                {
                    wei[i] = (ang_wei[i]+1.0e-11);
                    // wei[i] = 1.0;
                    sum += wei[i];
                    // wei[i] = (ang_wei[i])*fabs(tmpu[i]) + 1.0e-10;
                    // sum += wei[i];
                }
                tmp_E_z = 0.0; 
                for(i = 0; i < N_side; i++)
                    // tmp_E_z += (ang_wei[i]+1.0e-11)/sum*tmpu[i];
                    tmp_E_z += wei[i]/sum*tmpu[i];

                if(isnan(tmp_E_z))
                {
                    printf("ERROR: adv_E_vertice_B_edge(), tmp_E_z is Nan, tri[%d], sum %g\n",otri->id, sum);
                    printf("linear fit press %g %g %g\n", lin_fit_press[0], lin_fit_press[1], lin_fit_press[2]);
                    printf("surrounding press\n");
                    for(i = 0; i < tri_N; i++)
                        printf("press[%d] = %g\n", i, therm_press[i]);
                   
                    clean_up(ERROR);
                }
            }
            else
            {
                if(MAX_N_COEF == 3)
                {
                    for(i = 0; i < N_side; i++)
                    {
                        tmp_E_z += tmpu[i];
                        // wei[i] = 1.0/(eps + sqr(tmpu[i])); // for weighted combine
                        // wei[i] = 1.0/(eps + fabs(tmpu[i])); // for weighted combine
                        // wei[i] = (1.0e-10 + fabs(tmpu[i]));    // for weighted combine
                        sum += wei[i];                           // for weighted combine
                        // printf("add[%d] = %g to sume of Ez\n", i, conU[i][0]); 
                    }
                    tmp_E_z /= N_side;

                    // tmp_E_z = biased_min_mod(tmpu,N_side,tmp_E_z);
                    //// weighted combine
                    /// tmp_E_z = 0.0;
                    /// for(i = 0; i < N_side; i++)
                    ///     tmp_E_z += wei[i]/sum*tmpu[i];
                    //// END: weighted combine
                }
                else if(MAX_N_COEF == 6)
                {
                    for(i = 0; i < N_side; i++)
                    {
                        tmp_E_z += tmpu[i];
                        // wei[i] = 1.0/(eps + fabs(tmpu[i])); // for weighted combine
                        // wei[i] = (1.0e-10 + fabs(tmpu[i]));    // for weighted combine
                        // sum += wei[i];                           // for weighted combine
                    }
                    tmp_E_z /= N_side;
    
                    // tmp_E_z = biased_min_mod(tmpu,N_side,tmp_E_z);
                    //// weighted combine
                    //// tmp_E_z = 0.0;
                    //// for(i = 0; i < N_side; i++)
                    ////     tmp_E_z += wei[i]/sum*tmpu[i];
                    //// END: weighted combine
                }
            }
            ****/

            E_z_of_point(Point_of_tri(otri)[k]) = tmp_E_z;
            E_z_of_tri(otri)[k] = tmp_E_z;

            if(debug == YES)
            {
                printf("alpha at edges: ");
                for(i = 0; i < N_side; i++)
                    printf("%g; ", tmp_alpha[i]);
                printf("\n");
                
                printf("LLF_U_star: rho= %g, mom[0]= %g, mom[1]= %g, Bx= %g, By= %g, E_z=%g\n", 
                        LLF_U_star[0], LLF_U_star[1], LLF_U_star[2], LLF_U_star[5], LLF_U_star[6],
                        LLF_U_star[2]/LLF_U_star[0]*LLF_U_star[5] - LLF_U_star[1]/LLF_U_star[0]*LLF_U_star[6]);
                printf("max_alpha_new = %g\n\n", max_alpha_new);
                // printf("fr->time = %g, dt = %g\n", fr->time, dt);
                // clean_up(0);

                if(isnan(tmp_E_z))
                {
                    printf("ERROR: adv_E_vertice_B_edge(), point E_z nan\n");
                    printf("point %g %g of tri[%d]\n", Coords(Point_of_tri(otri)[k])[0], 
                                 Coords(Point_of_tri(otri)[k])[1], otri->id);
                    print_tri_crds(otri);
                    clean_up(ERROR);
                }
            }
        } /// END: for(k = 0; k < 3; k++)

        //// Now compute normal B on edge.
        for(side = 0; side < 3; side++)
        {
            if(Index_of_point(Point_of_tri(otri)[side]) == YES &&
               Index_of_point(Point_of_tri(otri)[(side+1)%3]) == YES)
            {
                adv_B_edge(side, otri, tri, dt, rk_iter, mid_soln, fr); 
            }
            else
            {
                printf("ERROR: adv_E_vertice_B_edge(), E_z of tri[%d] was not computed\n", tri->id);
                clean_up(ERROR);  
            }
        }
        if(YES == debug)
        {
            printf("\n**************Tri %d left adv_E_vertice_B_edge()\n", tri->id);
        }
}


LOCAL void LLF_U_star_model(
        double    *crds,
        Mid_soln  *mid_soln,
        int       rk_iter,
        TRI       *tris[],
        double    max_alpha,
        double    *half_tan,
        double    nor[][3],
        int       N_tri,
        double    *U_star,
        int       debug)
{
        int       i, j; 
        Locstate  st = NULL;
        double    conU[50][10]; 
        double    fluxx[10], fluxy[10], flux[50][10];
        double    sum_tan = 0.0, sum_flux[10], sum_U[10];

        for(i = 0; i < N_tri; i++)
            sum_tan += half_tan[i];
        for(j = 0; j < N_EQN; j++)
        {
            sum_flux[j] = 0.0; 
            sum_U[j] = 0.0; 
        }

        for(i = 0; i < N_tri; i++)
        {
            if(rk_iter == 0)
            {
                st = tris[i]->st;
            }
            else
            {
                st = mid_soln[tris[i]->id].st[rk_iter];
            }
            con_u_at_pt(st, crds, fg_centroid(tris[i]), sqrt(fg_area(tris[i])), conU[i]);
            flux_at_pt(conU[i],st,NULL,NULL,fluxx,fluxy);

            for(j = 0; j < N_EQN; j++)
            {
                sum_U[j] += conU[i][j]*half_tan[i];

                flux[i][j] = (fluxx[j]*nor[i][0]+fluxy[j]*nor[i][1])*(2.0*half_tan[i]);
                sum_flux[j] += flux[i][j];
            }
        }

        for(j = 0; j < N_EQN; j++)
        {
            U_star[j] = (sum_U[j] - 1.0/(5.0*max_alpha)*sum_flux[j])/sum_tan;
        }

        if(debug == YES)
        {
            printf("IN LLF_U_star_model(), state around the point:\n");
            for(i = 0; i < N_tri; i++)
            {
                printf("tri[%d]: ", i);
                for(j = 0; j < N_EQN; j++)
                    printf("%g, ", conU[i][j]);
                printf("\n");
            }
        }
}

EXPORT double biased_min_mod(
	double  *tmpu,
	int     N_side,
        double  avg)
{
        int    i;
        double  tmp, eps = 0.05;
        int    pos_count = 0, neg_count = 0; 

        for(i = 0; i < N_side; i++) 
        {
            if(tmpu[i] > 0.0)
                pos_count++;
            else if(tmpu[i] < 0.0)
                neg_count++;
        }

        if(pos_count != 0 && neg_count != 0)
            return 0.0;
        else if (pos_count != 0)
        {
            tmp = tmpu[0];

            for(i = 1; i < N_side; i++)
            { 
                tmp = min(tmp, tmpu[i]);
            }
            return tmp;
            // return min(((1.0+eps)*tmp), avg);
        }
        else 
        {
            tmp = tmpu[0];
            for(i = 1; i < N_side; i++)
            {
                tmp = max(tmp, tmpu[i]);
            }
            return tmp;
            // return max(((1.0+eps)*tmp), avg);
        }
}

/// advance normal B field on the edge

LOCAL void adv_B_edge(
	int      side,
        TRI      *otri,
        TRI      *tri,
        double   dt,
        int      rk_iter,
        Mid_soln *mid_soln,
        Front    *fr)
{
        double    prev_Bn, Bn0;
        double    dt_area, length;
        double    *soln_Bn;
        float     *pcrds[4], qcrds[4];
        int       i, dim = 2, debug = NO;
        double    *cent = fg_centroid(tri), *nbcent;
        Locstate prev_st, prev_nbst;
        double    con_u[10], nxcon_u[10], conUB[10], E_z, nx_E_z;

        // for(i = 0; i < 3; i++)
        //     pcrds[i] = Coords(Point_of_tri(tri)[i]);

        length = fg_length_side(otri)[side];
        dt_area = dt/length;
   
        /**
        // if(tri->id == 1866 || tri->id == 2055)
        // if(tri->id == 2552 || tri->id == 2055)
        // if(tri->id == 3763 || tri->id ==844)
        if(tri->id == 350 || tri->id == 405)
        {
            printf("\n--------------Tri %d entered adv_B_edge()\n", tri->id);
            debug = YES;
        }
        **/

        // for(i = 0; i < dim; i++)
        //     qcrds[i] = (pcrds[(side+1)%3][i] + pcrds[side][i])/2.0 

        // *nbcent = fg_centroid(Tri_on_side(otri,side));
        if(rk_iter == 0)
        {
            prev_Bn = fg_side_B(otri)[side];
            prev_st = otri->st;
            prev_nbst = Tri_on_side(otri,side)->st;
        }
        else
        {
            prev_Bn = mid_soln[otri->id].edge_Bn[side][rk_iter];
            prev_st = mid_soln[otri->id].st[rk_iter];
            prev_nbst = mid_soln[Tri_on_side(otri,side)->id].st[rk_iter];
        }

        // compute HLL flux to compute B
        // con_u_at_pt(prev_st, qcrds, cent, con_u);
        // con_u_at_pt(prev_nbst, qcrds, nbcent, nbcon_u);
        // HLL_soln_at_pt(qcrds, otri, side, mid_soln, rk_iter, conUB);

        /***
        for(i = 0; i < N_EQN; i++)
        {
            con_u[i] = U_of_point(Point_of_tri(otri)[side])[i];
            nxcon_u[i] = U_of_point(Point_of_tri(otri)[(side+1)%3])[i];
        }
        ***/

        E_z = E_z_of_point(Point_of_tri(otri)[side]);
        nx_E_z = E_z_of_point(Point_of_tri(otri)[(side+1)%3]);
        // E_z = E_z_of_tri(otri)[side];
        // nx_E_z = E_z_of_tri(otri)[(side+1)%3];

        if(rk_iter == RK_STEP-1)
        {
            soln_Bn = &(fg_side_B(tri)[side]);
        }
        else
        {
            soln_Bn = &(mid_soln[otri->id].edge_Bn[side][rk_iter+1]);
        }

        if(RK_STEP == 2)
        {
            if(rk_iter == 0)
            {
                *soln_Bn = prev_Bn - dt_area*(nx_E_z - E_z);
            }
            else
            {
                Bn0 = fg_side_B(otri)[side];
                *soln_Bn = 0.5*(Bn0 + prev_Bn) - 0.5*dt_area*(nx_E_z - E_z);
            }
        }
        else if(RK_STEP == 3)
        {
            if(rk_iter == 0)
            {
                *soln_Bn = prev_Bn - dt_area*(nx_E_z - E_z);
            } 
            else if(rk_iter == 1)
            {
                Bn0 = fg_side_B(otri)[side];
                *soln_Bn = 0.75*Bn0 + 0.25*prev_Bn - 0.25*dt_area*(nx_E_z - E_z);
            }
            else
            {
                Bn0 = fg_side_B(otri)[side];
                *soln_Bn = 1.0/3.0*Bn0 + 2.0/3.0*prev_Bn - 2.0/3.0*dt_area*(nx_E_z - E_z);
            }
        }
        else
        {
            printf("ERROR: adv_B_edge(), implement RK_STEP = %d\n", RK_STEP);
            clean_up(ERROR);
        }

        //// TMP
        if(debug == YES)
        {
            printf("Bn on side[%d] = %13.12g\n", side, *soln_Bn);
        }
}


// (sin (x) )^2 = (1 - cos(2x))/2
// (cos (x) )^2 = (1 + cos(2x))/2
LOCAL double tan_half_angle(
        POINT     *pt,
        TRI       *tri,  /// OLD tri
        int       side,
        double    *mid_ray)
{
        double    nor[3], t[2][3];
        int       i, dim = 2;    
        double    c_theta, s_theta_2, c_theta_2, len;
 
        for(i = 0; i < dim; i++)
        {
            t[0][i] = fg_side_vector(tri)[side][i];
            t[1][i] = -fg_side_vector(tri)[(side+2)%3][i];
        }
        c_theta = t[0][0]*t[1][0] + t[0][1]*t[1][1];
          
        s_theta_2 = sqrt((1.0 - c_theta)/2.0);
        c_theta_2 = sqrt((1.0 + c_theta)/2.0);

        mid_ray[0] = t[0][0] + t[1][0];
        mid_ray[1] = t[0][1] + t[1][1];
        len = sqrt( sqr(mid_ray[0]) + sqr(mid_ray[1]) );
        mid_ray[0] /= len;
        mid_ray[1] /= len;

        return (s_theta_2/c_theta_2);
}

LOCAL double Max_wave_speed_at_pt_single_state(
        POINT     *pt,
        TRI       *tri,  /// OLD tri
        Mid_soln  *mid_soln,
        int       rk_iter,
        double    *nor,
        int       debug)
{
        double    *crds;
        int       i, dim = 2;
        double    conUl[10], conUr[10], HLLU[10], t[3];
        static Locstate Tstl = NULL;
        Locstate stl = NULL;
        int       debug_flag = NO;
        double    tmp_alpha;

        if(Tstl == NULL)
        {
            (Params(tri->st)->_alloc_state)(&Tstl,Params(tri->st)->sizest);
            assign(Tstl, tri->st, Params(tri->st)->sizest);
        }

        crds = Coords(pt);

        if(rk_iter == 0)
        {
            stl = tri->st;
        }
        else
        {
            stl = mid_soln[tri->id].st[rk_iter];
        }

        // for(i = 0; i < dim; i++)
        //     t[i] = fg_side_vector(tri)[side][i];

        con_u_at_pt(stl, crds, fg_centroid(tri), sqrt(fg_area(tri)), conUl);

        con_u_to_state(conUl, dim, Tstl);

#if defined(Ez_USE_LF_FLUX)
        tmp_alpha = compute_alpha_of_state_MHD(Tstl,nor);
#else // if defined(Ez_USE_LF_FLUX)
        printf("ERROR: Max_wave_speed_at_pt_single_state() implement Ez_USE_LF_FLUX\n");
        clean_up(ERROR);
#endif // if defined(Ez_USE_LF_FLUX)
        return tmp_alpha;
}

LOCAL double Max_wave_speed_at_pt(
        POINT     *pt,
        TRI       *tri,  /// OLD tri
        int       side,
        Mid_soln  *mid_soln,
        int       rk_iter,
        int       debug)
{
        TRI       *nbtri;
        double    nor[3], t[3], *crds; 
        int       i, dim = 2;
        double    conUl[10], conUr[10], HLLU[10];
        static Locstate Tstl = NULL, Tstr;
        Locstate stl = NULL, str;
        int       debug_flag = NO;
        double    tmp_alpha;
    
        if(Tstl == NULL)
        {
            (Params(tri->st)->_alloc_state)(&Tstl,Params(tri->st)->sizest);
            (Params(tri->st)->_alloc_state)(&Tstr,Params(tri->st)->sizest);
            assign(Tstl, tri->st, Params(tri->st)->sizest);
            assign(Tstr, tri->st, Params(tri->st)->sizest);
        }

        nbtri = Tri_on_side(tri,side);
        crds = Coords(pt);

        for(i = 0; i < dim; i++)
            t[i] = fg_side_vector(tri)[side][i];
        nor[0] = t[1];
        nor[1] = -t[0];

        if(rk_iter == 0)
        {
            stl = tri->st;
            str = nbtri->st;
        }
        else
        {
            stl = mid_soln[tri->id].st[rk_iter];
            str = mid_soln[nbtri->id].st[rk_iter];
        }

        /**
        printf("L5456:tri[%d | %d] dg_Dens=(%g %g %g| %g %g %g) Dens=(%g | %g)\n",
                tri->id,nbtri->id,
                dg_Dens(stl)[0],dg_Dens(stl)[1],dg_Dens(stl)[2],
                dg_Dens(str)[0],dg_Dens(str)[1],dg_Dens(str)[2],Dens(stl),Dens(str));
        **/
        // update conservatives from DG 
        con_u_at_pt(stl, crds, fg_centroid(tri), sqrt(fg_area(tri)), conUl);
        con_u_at_pt(str, crds, fg_centroid(nbtri), sqrt(fg_area(nbtri)), conUr);

        if(debug == YES && side == 0)
           printf("tri[%d | %d] conU=(%g | %g) side = %d\n",tri->id,nbtri->id,conUl[0],conUr[0], side);

        // copy conservatives to state 
        con_u_to_state(conUl, dim, Tstl);
        con_u_to_state(conUr, dim, Tstr);
        // printf("Tstl|Tstr: dg_Dens=(%g | %g) Dens=(%g | %g)\n",dg_Dens(Tstl)[0],dg_Dens(Tstr)[0],Dens(Tstl),Dens(Tstr));

        if(debug == YES && side == 0)
        {
            printf("tri[%d], nbtri[%d]\n", tri->id, nbtri->id);
            print_general_vector("tri soln",conUl,8,"\n");
            print_general_vector("nbtri soln",conUr,8,"\n");
            verbose_print_state("stl", stl);
            verbose_print_state("str", str);
            print_tri_crds(tri);
            print_tri_crds(nbtri);
        }

        // printf("L5477\n");
        // printf("Tstl=%g %g %g %g\n",Dens(Tstl),Mom(Tstl)[0],Mom(Tstl)[1],Mag(Tstl)[0]);
        // printf("Tstr=%g %g %g %g\n\n",Dens(Tstr),Mom(Tstr)[0],Mom(Tstr)[1],Mag(Tstr)[0]);

#if defined(Ez_USE_LF_FLUX) 
        // get alpha as an upper bound for the eigenvalues of the Jacobian 
        tmp_alpha = compute_alpha_of_LF_flux_MHD(Tstl,Tstr,nor);
#else // if defined(Ez_USE_LF_FLUX)
        printf("ERROR: Elec_z_at_pt() implement Ez_USE_LF_FLUX\n");
        clean_up(ERROR); 
#endif // if defined(Ez_USE_LF_FLUX)
        return tmp_alpha;
}


//// We solver 2.5D equations.
//// First compute HLL flux at point. This function is primarily for computing
//// electric E_z (2D) at the vertices of triangles, which is then used to
//// update normal component of B defined on the cell edge for a constrained transport formulation.
//// Double disserpation is introduced for the subsonic case.  
//// Solve one-dimensional Riemann problem.
//// First transform coordinates.
//// eqns  are put in (\rho, \rho u_{normal}, \rho u_{tangential}, ...) format.
//// Then solve the above eqn. 
//// In the end, transform back to x-y coordinate.
//// The output soln is for (\rho, \rho u, \rho v, E, B_x, B_y) in the normal direction.
//// Pay attention to the order of variables.
LOCAL double Elec_z_at_pt(
	POINT     *pt,
	TRI       *tri,  /// OLD tri
        int       side,
        Mid_soln  *mid_soln,
        int       rk_iter,
        double    ver_alpha,
        int       debug)
{
        TRI       *nbtri;
        double    nor[3], t[3], *crds;
        int       i, dim = 2;
        double    conUl[10], conUr[10], HLLU[10];
        static Locstate Tstl = NULL, Tstr; 
        Locstate stl = NULL, str;
        int       debug_flag = NO;

        if(Tstl == NULL)
        {
            (Params(tri->st)->_alloc_state)(&Tstl,Params(tri->st)->sizest);
            (Params(tri->st)->_alloc_state)(&Tstr,Params(tri->st)->sizest);
            assign(Tstl, tri->st, Params(tri->st)->sizest);
            assign(Tstr, tri->st, Params(tri->st)->sizest);
        }

        /**
        if(tri->id == 152)
        {
            printf("In Elec_z_at_pt(), tri[%d]\n", tri->id);
            debug_flag = YES;
        }
        **/

        nbtri = Tri_on_side(tri,side);
        crds = Coords(pt);

        for(i = 0; i < dim; i++)
            t[i] = fg_side_vector(tri)[side][i];
        nor[0] = t[1];
        nor[1] = -t[0];

        if(rk_iter == 0)
        {
            stl = tri->st;
            str = nbtri->st;
        }
        else
        {
            stl = mid_soln[tri->id].st[rk_iter];
            str = mid_soln[nbtri->id].st[rk_iter];
        }

        con_u_at_pt(stl, crds, fg_centroid(tri), sqrt(fg_area(tri)), conUl);
        con_u_at_pt(str, crds, fg_centroid(nbtri), sqrt(fg_area(nbtri)), conUr);

        con_u_to_state(conUl, dim, Tstl);
        con_u_to_state(conUr, dim, Tstr);

        /***
        if(debug == YES)
        {
            printf("tri[%d], nbtri[%d]\n", tri->id, nbtri->id);
            print_general_vector("tri soln",conUl,8,"\n");
            print_general_vector("nbtri soln",conUr,8,"\n");
        } 
        ***/  

#if defined(Ez_USE_LF_FLUX)  
        return LF_soln_ideal_MHD_Elec_z(conUl,conUr,nor,Tstl,Tstr,ver_alpha,NO);
#else // if defined(Ez_USE_LF_FLUX)
        return (HLL_soln_ideal_MHD_Elec_z(conUl,conUr,nor,Tstl,Tstr,NO));
#endif // if defined(Ez_USE_LF_FLUX)
}


//// Compute HLL soln at point. This function is primarily for computing
//// E_z (2D) at the vertices of triangles, which is then used to
//// update normal component of B defined on the cell edge for a constrained transport formulation.
//// Double disserpation is introduced for the subsonic case.  
//// Solve one-dimensional Riemann problem.
//// First transform coordinates.
//// eqns  are put in (\rho, \rho u_{normal}, \rho u_{tangential}, ...) format.
//// Then solve the above eqn. 
//// In the end, transform back to x-y coordinate.
//// The output soln is for (\rho, \rho u, \rho v, E, B_x, B_y) in the normal direction.
//// Pay attention to the order of variables.
LOCAL void HLL_soln_at_pt(
	POINT     *pt,
	TRI       *tri,  /// OLD tri
        int       side,
        Mid_soln  *mid_soln,
        int       rk_iter,
        double    *con)
{
        TRI       *nbtri;
        double    nor[3], t[3], *crds;
        int       i, dim = 2;
        double    conUl[10], conUr[10], HLLU[10];
        static Locstate Tstl = NULL, Tstr; 
        Locstate stl = NULL, str;

        if(stl == NULL)
        {
            (Params(tri->st)->_alloc_state)(&Tstl,Params(tri->st)->sizest);
            (Params(tri->st)->_alloc_state)(&Tstr,Params(tri->st)->sizest);
            assign(Tstl, tri->st, Params(tri->st)->sizest);
            assign(Tstr, tri->st, Params(tri->st)->sizest);
        }

        nbtri = Tri_on_side(tri,side);
        crds = Coords(pt);

        for(i = 0; i < dim; i++)
            t[i] = fg_side_vector(tri)[side][i];
        nor[0] = t[1];
        nor[1] = -t[0];

        if(rk_iter == 0)
        {
            stl = tri->st;
            str = nbtri->st;
        }
        else
        {
            stl = mid_soln[tri->id].st[rk_iter];
            str = mid_soln[nbtri->id].st[rk_iter];
        }

        con_u_at_pt(stl, crds, fg_centroid(tri), sqrt(fg_area(tri)), conUl);
        con_u_at_pt(str, crds, fg_centroid(nbtri), sqrt(fg_area(nbtri)), conUr);

        HLL_soln_ideal_MHD(conUl,conUr,nor,stl,str,con,NO);
}



/// Collect neighbors of nbtris.
EXPORT void get_sten_neighbr(
        TRI       *nbtri[3],
        TRI       *tri,
        TRI       *nntri[],
        int       *nn_num)
{
        int       i, j, k, l, side, in_list;
        TRI       *tmp, *tmp2;
        // POINT     *p[3];
        int       share_v, N_nn = 0, num_side;

        nntri[N_nn] = nbtri[0]; // 1111
        N_nn++;

        for(side = 0; side < 3; side++)
        {
            if(tri == Tri_on_side(nbtri[0],side))
                break;
        }
        // add nbri[0] upper side tri, 2222
        tmp = Tri_on_side(nbtri[0],(side+2)%3);
        if(tmp != NULL)
        {
            nntri[N_nn] = tmp;
            N_nn++;
        }
        // add nbri[1] lower side tri, 3333
        for(side = 0; side < 3; side++)
        {
            if(tri == Tri_on_side(nbtri[1],side))
                break;
        }
        tmp = Tri_on_side(nbtri[1],(side+1)%3);
        if(tmp != NULL)
        {
            in_list = NO;
            for(i = 0; i < N_nn; i++)
            {
                if(nntri[i] == tmp)
                {
                    in_list = YES;
                    break;
                }
            }
            if(in_list == NO)
            {
                nntri[N_nn] = tmp;
                N_nn++;
            }
        }
        // add nbtri[1], 4444
        nntri[N_nn] = nbtri[1];
        N_nn++;

        // add nbtri[1] upper side tri, 5555
        tmp = Tri_on_side(nbtri[1],(side+2)%3);
        if(tmp != NULL)
        {
            nntri[N_nn] = tmp;
            N_nn++;
        }
        // add nbri[2], lower tri, 6666
        for(side = 0; side < 3; side++)
        {
            if(tri == Tri_on_side(nbtri[2],side))
                break;
        }
        tmp = Tri_on_side(nbtri[2],(side+1)%3);
        if(tmp != NULL)
        {
            in_list = NO;
            for(i = 0; i < N_nn; i++)
            {
                if(nntri[i] == tmp)
                {
                    in_list = YES;
                    break;
                }
            }
            if(in_list == NO)
            {
                nntri[N_nn] = tmp;
                N_nn++;
            }
        }
        // add nbri[2], 7777
        nntri[N_nn] = nbtri[2];
        N_nn++;
        // add nbri[2], upper tri, 8888
        tmp = Tri_on_side(nbtri[2],(side+2)%3);
        if(tmp != NULL)
        {
            nntri[N_nn] = tmp;
            N_nn++;
        }
        // add nbri[0] lower side tri, 9999
        for(side = 0; side < 3; side++)
        {
            if(tri == Tri_on_side(nbtri[0],side))
                break;
        }
        tmp = Tri_on_side(nbtri[0],(side+1)%3);
        if(tmp != NULL)
        {
            in_list = NO;
            for(i = 0; i < N_nn; i++)
            {
                if(nntri[i] == tmp)
                {
                    in_list = YES;
                    break;
                }
            }
            if(in_list == NO)
            {
                nntri[N_nn] = tmp;
                N_nn++;
            }
        }
        *nn_num = N_nn;
}

/* 
 Get the stencil for preliminary reconstruction on tri. 
 Tri's edge neighbors and one cell between each pair of 
  edge neighbors are selected.
*/
LOCAL void get_sten_consv_var_P2(
        TRI       *nbtri[3],
        TRI       *tri,
        TRI       *nntri[],
        int       *nn_num)
{
        TRI       *tmptris[20];
        int       i, side, N_tri = 0, N_between;

        for(side = 0; side < 3; side++)
        {
            nntri[N_tri] = nbtri[side];
            N_tri++;

            tris_between_edge_neighbrs(tri, NULL, side, tmptris, &N_between);
            nntri[N_tri] = tmptris[N_between/2];
            N_tri++;
        }
        *nn_num = N_tri; 
}


EXPORT void tris_between_edge_neighbrs_ver2(
        TRI      *tri,
        POINT    *pt,
        int      pt_side,
        TRI      *tris[],
        int      *N)
{
        TRI      *Nbtri[2], *curr_t, *next_t;
        int      i, tmp_side, side, tri_N = 0, met_next_Nbtri = NO;

        Nbtri[0] = Tri_on_side(tri,pt_side);
        Nbtri[1] = Tri_on_side(tri,(pt_side+1)%3);

        //// first do counter-clockwise direction
        curr_t = Nbtri[0];
        for(side =0; side < 3; side++)
        {
            if(tri == Tri_on_side(curr_t,side))
                break;
        } 
        side = (side+2)%3;

        for(; ;)
        {
            next_t = Tri_on_side(curr_t,side);
            if(next_t == NULL || next_t == Nbtri[1])
            {
                if(next_t == Nbtri[1])
                    met_next_Nbtri = YES;
                break;
            }

            tris[tri_N] = next_t;
            tri_N++;

            for(tmp_side = 0; tmp_side < 3; tmp_side++)
            {
                if(curr_t == Tri_on_side(next_t,tmp_side))
                    break;
            }

            side = (tmp_side+2)%3;
            curr_t = next_t;
        }

        if(met_next_Nbtri == YES)
        {
            *N = tri_N; 
            return;
        }

        printf("ERROR: tris_between_edge_neighbrs()\n");
        printf("Do not find all tris between Nbtri[0] and Nbtri[1]\n");
        clean_up(ERROR);
}


EXPORT void tris_between_edge_neighbrs(
        TRI      *tri,
        POINT    *pt,
        int      pt_side,
        TRI      *tris[],
        int      *N)
{
        TRI      *Nbtri[2], *curr_t, *next_t;
        int      i, tmp_side, side, tri_N = 0, met_next_Nbtri = NO;

        Nbtri[0] = Tri_on_side(tri,pt_side);
        Nbtri[1] = Tri_on_side(tri,(pt_side+2)%3);

        //// first do clockwise direction
        curr_t = Nbtri[0];
        for(side =0; side < 3; side++)
        {
            if(tri == Tri_on_side(curr_t,side))
                break;
        } 
        side = (side+1)%3;

        for(; ;)
        {
            next_t = Tri_on_side(curr_t,side);
            if(next_t == NULL || next_t == Nbtri[1])
            {
                if(next_t == Nbtri[1])
                    met_next_Nbtri = YES;
                break;
            }

            tris[tri_N] = next_t;
            tri_N++;

            for(tmp_side = 0; tmp_side < 3; tmp_side++)
            {
                if(curr_t == Tri_on_side(next_t,tmp_side))
                    break;
            }

            side = (tmp_side+1)%3;
            curr_t = next_t;
        }

        if(met_next_Nbtri == YES)
        {
            *N = tri_N; 
            return;
        }

        printf("ERROR: tris_between_edge_neighbrs()\n");
        printf("Do not find all tris between Nbtri[0] and Nbtri[1]\n");
        clean_up(ERROR);
}

/// Collect neighbors of nbtris.
/// neighbors are in the edge advancing direction.
EXPORT void get_sten_neighbr_B_field(
        TRI       *nbtri[3],
        TRI       *tri,
        TRI       *nntri[],
        int       *nn_num)
{
        int       i, j, k, l, side, in_list;
        TRI       *tmp, *tmp2;
        // POINT     *p[3];
        int       share_v, N_nn = 0, num_side;

        for(side = 0; side < 3; side++)
        {
            if(tri == Tri_on_side(nbtri[0],side))
                break;
        }
        // add nbri[0] next side tri, 111
        tmp = Tri_on_side(nbtri[0],(side+1)%3);
        if(tmp != NULL)
        {
            nntri[N_nn] = tmp;
            N_nn++;
        }
        // add nbri[0] next next side tri, 222
        tmp = Tri_on_side(nbtri[0],(side+2)%3);
        if(tmp != NULL)
        {
            in_list = NO;
            for(i = 0; i < N_nn; i++)
            {
                if(nntri[i] == tmp)
                {
                    in_list = YES;
                    break;
                }
            }
            if(in_list == NO)
            {
                nntri[N_nn] = tmp;
                N_nn++;
            }
        }

        // add nbri[1] next side tri, 333
        for(side = 0; side < 3; side++)
        {
            if(tri == Tri_on_side(nbtri[1],side))
                break;
        }
        tmp = Tri_on_side(nbtri[1],(side+1)%3);
        if(tmp != NULL)
        {
            in_list = NO;
            for(i = 0; i < N_nn; i++)
            {
                if(nntri[i] == tmp)
                {
                    in_list = YES;
                    break;
                }
            }
            if(in_list == NO)
            {
                nntri[N_nn] = tmp;
                N_nn++;
            }
        }
        // add nbtri[1] next next side tri, 444
        tmp = Tri_on_side(nbtri[1],(side+2)%3);
        if(tmp != NULL)
        {
            nntri[N_nn] = tmp;
            N_nn++;
        }

        // add nbri[2], next side tri, 555
        for(side = 0; side < 3; side++)
        {
            if(tri == Tri_on_side(nbtri[2],side))
                break;
        }
        tmp = Tri_on_side(nbtri[2],(side+1)%3);
        if(tmp != NULL)
        {
            in_list = NO;
            for(i = 0; i < N_nn; i++)
            {
                if(nntri[i] == tmp)
                {
                    in_list = YES;
                    break;
                }
            }
            if(in_list == NO)
            {
                nntri[N_nn] = tmp;
                N_nn++;
            }
        }
        // add nbri[2], next next  side tri, 666
        tmp = Tri_on_side(nbtri[2],(side+2)%3);
        if(tmp != NULL)
        {
            in_list = NO;
            for(i = 0; i < N_nn; i++)
            {
                if(nntri[i] == tmp)
                {
                    in_list = YES;
                    break;
                }
            }
            if(in_list == NO)
            {
                nntri[N_nn] = tmp;
                N_nn++;
            }
        }

        *nn_num = N_nn;
}

//ok
LOCAL void preliminary_reconstruction_zone_center(
        Front    *fr,
        Mid_soln *midsoln,
        int      rk_step,
        Limiting_store **limit_store)
{
        RECT_GRID *gr = fr->rect_grid;
        TRI       *tri, *crsp_tri, *nbtri[3], *sten_tris[50], *tmptri, *new_cand[50];
        SURFACE   **surf;
        int       dim = 2, i, j, k, indx, side, pre_con_method = 1;
        float     *cent, B0[3];
        size_t    sizest = fr->sizest;
        Locstate  st;
        int       nn_num, repetition, N_use = 0, in_list, new_nn_num = 0, num;
        int       debug = NO, vt_adj;
        POINT     *pt[3];
        double    **con_u = NULL;

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf);
                 !at_end_of_tri_list(tri,*surf); tri = tri->next)
            {
                // cent = fg_centroid(tri);
                if(tri->BC_type == SUBDOMAIN)
                    continue;

                for(i = 0; i < 3; i++)
                    nbtri[i] = Tri_on_side(tri,i);

                if(MAX_N_COEF == 3)
                {
                    init_tri_comput_P1_polynomials_from_avg_MHD(tri,nbtri,3, midsoln, rk_step, NULL);
                    continue;
                }

                if(MAX_N_COEF == 6)
                {
                    get_sten_neighbr(nbtri, tri, sten_tris, &nn_num);
                    if(nn_num != 9)
                    {
                        printf("ERROR: preliminary_reconstruction_zone_center() failed to build stencil for P2, nn_num = %d\n", nn_num); 
                        clean_up(ERROR);
                    }
                    tri_comput_P2_polynomials_from_avg_MHD(tri, sten_tris, nn_num, midsoln,limit_store,rk_step, NULL);
                    continue;
                }

                printf("ERROR: implement preliminary_reconstruction_zone_center() for p%d case\n", MAX_N_COEF); 
                clean_up(ERROR); 
            }
        }

}

EXPORT void B_tri_mesh_2nd_reconstruction(
        Front    *fr,
        Mid_soln *midsoln,
        int      rk_step,
        Limiting_store **limit_store)
{
        SURFACE   **surf;
        TRI       *tri, *crsp_tri, *nbtri[3], *sten_tris[50], *tmptri, *new_cand[50];
        float     *cent, B0[3];
        int       debug = NO;

        
        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf);
                 !at_end_of_tri_list(tri,*surf); tri = tri->next)
            {

                /***
                cent = fg_centroid(tri);
                if(tri->id == 5177 && pp_mynode() == 0)
                    debug = YES;
                else if(tri->id == 4977 && pp_mynode() == 1)
                    debug = YES;
                else if(tri->id == 794 && pp_mynode() == 2)
                    debug = YES;
                else
                    debug = NO;

                if(pp_mynode() == 1)
                {
                    if(fabs(cent[0]-0.00666666) < 0.00001 &&
                        fabs(cent[1]+0.30666666666) < 0.00001)
                        debug = YES;
                    else if(tri->id == 4977)
                        debug = YES;
                    else
                        debug = NO;
                }
       
                if(YES == debug)
                {
                    printf("tri %d, entered B_tri_mesh_2nd_reconstruction()\n", tri->id);
                    print_tri_crds(tri);
                }
                ***/
 
                if(tri->BC_type == SUBDOMAIN)
                    continue;

                if(MAX_N_COEF == 3)
                {
                    tri_B_P1_polynomial_reconstruction_ver2(tri, midsoln,limit_store,rk_step);
                    // tri_B_P1_polynomial_2nd_reconstruction(tri, midsoln,limit_store,rk_step);
                    continue;
                }
                else if(MAX_N_COEF == 6)
                {
                    printf("ERROR: B_tri_mesh_2nd_reconstruction(), need to revise MAX_N_COEF = 6 case\n");
                    printf("Basis functions for B have been scaled\n");
                    clean_up(ERROR);
                    // tri_B_P2_polynomial_2nd_reconstruction(tri, midsoln,limit_store,rk_step);
                    
                    tri_B_P2_polynomial_2nd_reconstruction_ver2(tri, midsoln,limit_store,rk_step);//NEW
                    continue;
                }
            }
        }
}


//// Construct P1 poly of Magnetic field on tri by avarage of normal component, its 1st and 2nd variations on edge
/* use condition: b_2 = -a_1
                  b_4 = -2 a_3
                  b_5 = -0.5*a_4
   to reduce unknowns to 9 variables. 
   They are: a0, a1, a2, a3, a4, a5, b0, b1, b3.
   Note that the solution is represented as:
   Bx = a0 + a1(x-x_c) + a2(y - y_c) ...
   By = b0 + b1(x-x_c) + b2(y - y_c) ...
   which is the Taylor expansion about tri center.
*/
LOCAL void tri_B_P2_polynomial_2nd_reconstruction(
         TRI       *tri,
         Mid_soln *midsoln,
         Limiting_store **limit_store,
         int      rk_step)
{
        TRI       *Bnbtri[3], *tris[30], *nbtri;
        double    AA[300], BB[500], CC[300], DD[300], XX[8][300], work[900], tmp;
        static double   **A = NULL, eps = 1.0e-6, **B, **invB, **Iden;
        int       dim = 2, i, j, k, M, N, P, l, LDA, LDB, LWORK, INFO, indx, debug = NO, cv_indx;
        int       sten_indx, edge_indx[3][9], e_indx, N_cell, N_STEN;
        Locstate  st, nbst[3], st2;
        double    Bavg[30], *pcrds[3], len, cellavgB[2], xx1, yy1, sum, BBavg[20];
        int       side, tmp_side, Edge_side[30], N_edge, num_tris_vertex;
        double    **Lmass_matrix = tri->Lmass_matrix, *cent = fg_centroid(tri), **tmpA, **tmpB;
        double    OIx[8], OIy[8], alpha[8], wei[8], ansx[8][30], ansy[8][30], MB[2][12];
        int       use_central_sten = YES;
        double    nor[3], t[3];
        double    nx, ny, v0x, v1x, v0y, v1y, x0, y0, v0x2, v1x2, v0y2, v1y2;
        int       irow;
        double    xav, yav, dx, dy;
        double    Bx_p1[6], By_p1[6];

        if(A == NULL)
        {
            matrix(&(A), 2, 9, sizeof(double));
            matrix(&(B), 9, 9, sizeof(double));
            matrix(&(invB), 9, 9, sizeof(double));
            matrix(&(Iden), 9, 9, sizeof(double));
        }

        for(side = 0; side < 3; side++)
        {
            pcrds[side] = Coords(Point_of_tri(tri)[side]);
        }

        if(alltri_HR_sten != NULL && alltri_HR_sten[tri->id].P2_B_2nd_recons_sten_set == NO)
        {
            alltri_HR_sten[tri->id].P2_B_2nd_recons_sten_set = YES;
            matrix(&tmpA,2,9,sizeof(double));
            matrix(&tmpB,9,9,sizeof(double));
            alltri_HR_sten[tri->id].P2_B_2nd_recons_sten_A = tmpA;
            alltri_HR_sten[tri->id].P2_B_2nd_recons_sten_B = tmpB;
            /// 1) setup constraint equation for matching average
            for(i = 0; i < 2; i++)
            {
                dy = pcrds[(i+1)%3][1] - pcrds[i][1];
                dx = pcrds[(i+1)%3][0] - pcrds[i][0];
                xx1 = pcrds[i][0] - cent[0];
                yy1 = pcrds[i][1] - cent[1];

                len = fg_length_side(tri)[i];

                tmpB[i][0] = dy;
                tmpB[i][1] = dy*(xx1 + 0.5*dx);
                tmpB[i][2] = dy*(yy1 + 0.5*dy);
                tmpB[i][3] = dy*(sqr(xx1) + xx1*dx + sqr(dx)/3.0);
                tmpB[i][4] = dy*(xx1*yy1 + 0.5*xx1*dy +0.5*yy1*dx + dx*dy/3.0);
                tmpB[i][5] = dy*(sqr(yy1) + yy1*dy + sqr(dy)/3.0);

                tmpB[i][6] = -dx;    // b_0
                tmpB[i][7] = -dx*(xx1 + 0.5*dx);  // b_1
                tmpB[i][1] += dx*(yy1 + 0.5*dy);    // b_2 = - a_1
                tmpB[i][8] = -dx*(sqr(xx1) + xx1*dx + sqr(dx)/3.0);   // b_3
                tmpB[i][3] += 2.0*dx*(xx1*yy1 +0.5*xx1*dy + 0.5*yy1*dx +  dx*dy/3.0);  // b_4
                tmpB[i][4] += 0.5*dx*(sqr(yy1) + yy1*dy + sqr(dy)/3.0);  // b_5

                for(j = 0; j < 9; j++)
                    tmpB[i][j] /= len;
            }
            if(rk_step == RK_STEP)
            {
                for(i = 0; i < 2; i++)
                    BBavg[i] = fg_side_dgB(tri)[i][0];
            }
            else
            {
                for(i = 0; i < 2; i++)
                    BBavg[i] = midsoln[tri->id].edge_dgBn[i][0][rk_step];
            }

            /// 2) setup constraint equation for matching 1st variation and quadratic variation
            for(side = 0; side < 3; side++)
            {
                for(i = 0; i < dim; i++)
                    t[i] = fg_side_vector(tri)[side][i];
                nor[0] = t[1];
                nor[1] = -t[0];

                xav = 0.5*(pcrds[side][0] + pcrds[(side+1)%3][0]);
                yav = 0.5*(pcrds[side][1] + pcrds[(side+1)%3][1]);
                dx =  0.5*(pcrds[(side+1)%3][0] - pcrds[side][0]);
                dy =  0.5*(pcrds[(side+1)%3][1] - pcrds[side][1]);

                /// linear part
                tmpB[side+2][0] = 0.0;         // a0
                tmpB[side+2][1] = (dx)*nor[0]; // a1
                tmpB[side+2][2] = (dy)*nor[0]; // a2
                tmpB[side+2][3] = 2.0*(xav - cent[0])*dx*nor[0]; // a3
                tmpB[side+2][4] = ((xav - cent[0])*dy + (yav - cent[1])*dx)*nor[0];   // a4
                tmpB[side+2][5] = 2.0*(yav - cent[1])*dy*nor[0];       // a5
                tmpB[side+2][6] = 0.0;                        // b0
                tmpB[side+2][7] = (dx)*nor[1];                // b1
                tmpB[side+2][1] -= (dy)*nor[1];                // b2; b_2 = -a_1
                tmpB[side+2][8] = 2.0*(xav - cent[0])*dx*nor[1];    // b3
                tmpB[side+2][3] += -2.0*((xav - cent[0])*dy + (yav - cent[1])*dx)*nor[1];    // b4; b_4 = -2*a_3
                tmpB[side+2][4] += (-(yav - cent[1])*dy*nor[1]);    // b5; b_5 = -0.5*a_4

                /// quadratic part
                tmpB[side+5][0] = 0.0;    // a0
                tmpB[side+5][1] = 0.0;    // a1
                tmpB[side+5][2] = 0.0;    // a2
                tmpB[side+5][3] = (sqr(dx)*nor[0])*2.0/3.0;    // a3
                tmpB[side+5][4] = (dx*dy*nor[0])*2.0/3.0;      // a4
                tmpB[side+5][5] = (sqr(dy)*nor[0])*2.0/3.0;    // a5
                tmpB[side+5][6] = 0.0;    // b0
                tmpB[side+5][7] = 0.0;    // b1
                                          // b2 = 0
                tmpB[side+5][8] = (sqr(dx)*nor[1])*2.0/3.0;             // b3
                tmpB[side+5][3] += (-2.0*dx*dy*nor[1])*2.0/3.0;         // b4; b_4 = -2*a_3
                tmpB[side+5][4] += (-0.5*sqr(dy)*nor[1])*2.0/3.0;       // b5; b_5 = -0.5*a_4
            }
            if(rk_step == RK_STEP)
            {
                for(side = 0; side < 3; side++)
                {
                    BBavg[side+2] = fg_side_dgB(tri)[side][1];
                    BBavg[side+5] = fg_side_dgB(tri)[side][2];
                }
            }
            else
            {
                for(side = 0; side < 3; side++)
                {
                    BBavg[side+2] = midsoln[tri->id].edge_dgBn[side][1][rk_step];
                    BBavg[side+5] = midsoln[tri->id].edge_dgBn[side][2][rk_step];
                }
            }

            /// set least square equation for minimizing a5 and b3
            //// a) \partial{smooth indicator Bx}{\partial a5} = 0
            tmpA[0][0] = 0.0;
            tmpA[0][1] = 0.0;
            tmpA[0][2] = 4.0*Lmass_matrix[0][2];
            tmpA[0][3] = 0.0;
            tmpA[0][4] = 4.0*Lmass_matrix[0][4];
            tmpA[0][5] = 8.0*Lmass_matrix[0][5];
            tmpA[0][6] = tmpA[0][7] = tmpA[0][8] = 0.0;
            /// TMP
            // for(j = 0; j < 9; j++)
            //     tmpB[8][j] = tmpA[0][j];
            // solve_by_gj(tmpB,9,BBavg,XX[2]);
            // inverse_matrix(tmpB, 9, invB);
            // matrix_matrix_mult(tmpB, invB, 9, 9, Iden);
            /// END:  TMP

            /// b) \partial{smooth indicator By}\{\partial b3} = 0
            tmpA[1][0] = tmpA[1][1] = tmpA[1][2] = tmpA[1][3] = tmpA[1][4] = tmpA[1][5] = 0.0;
            tmpA[1][6] = 0.0;    /// b0
            tmpA[1][7] = 4.0*Lmass_matrix[0][1];  // b1
            /// b2 coef = 0.0
            tmpA[1][8] = 8.0*Lmass_matrix[0][3];  /// b3
            /// b4 coef
            tmpA[1][3] += -2.0*(4.0*Lmass_matrix[0][4]); /// b4; b_4 = -2*a_3
            /// b5 coef = 0.0

            Bavg[0] = Bavg[1] = 0.0;

            //// Now constrained least square 
            M = 2; N = 9; P = 8; LDA = M; LDB = P; LWORK = M+N+P;
            // N: number of unknown, M: number of least square eqn.
            // P: number of constraint eqn.
            l = 0;
            for(j = 0; j < N; j++)
            {
                for(i = 0; i < M; i++)
                {
                    AA[l] = tmpA[i][j];
                    l++;
                }
            }
            l = 0;
            for(j = 0; j < N; j++)
            {
                for(i = 0; i < P; i++)
                {
                    BB[l] = tmpB[i][j];
                    l++;
                }
            }
            for(i = 0; i < M; i++)  // right side for the least square part
                CC[i] = Bavg[i];
            for(i = 0; i < P; i++)  // right side for the constrained part
                DD[i] = BBavg[i];
            FORTRAN_NAME(dgglse)(&M, &N, &P, AA, &LDA, ///// int M,int N,int P, double array A, int LDA
                                 BB, &LDB, CC, DD, XX[0], ///// double array B, int LDB, double array C, D, X,
                                 work, &LWORK, &INFO);
        }
        else if(alltri_HR_sten != NULL && alltri_HR_sten[tri->id].P2_B_2nd_recons_sten_set == YES)
        {
            tmpA = alltri_HR_sten[tri->id].P2_B_2nd_recons_sten_A;
            tmpB = alltri_HR_sten[tri->id].P2_B_2nd_recons_sten_B;

            /// 1) setup constraint equation for matching average
            if(rk_step == RK_STEP)
            {
                for(i = 0; i < 2; i++)
                    BBavg[i] = fg_side_dgB(tri)[i][0];
            }
            else
            {
                for(i = 0; i < 2; i++)
                    BBavg[i] = midsoln[tri->id].edge_dgBn[i][0][rk_step];
            }
            /// 2) setup constraint equation for matching 1st variation and quadratic variation
            if(rk_step == RK_STEP)
            {
                for(side = 0; side < 3; side++)
                {
                    BBavg[side+2] = fg_side_dgB(tri)[side][1];
                    BBavg[side+5] = fg_side_dgB(tri)[side][2];
                }
            }
            else
            {
                for(side = 0; side < 3; side++)
                {
                    BBavg[side+2] = midsoln[tri->id].edge_dgBn[side][1][rk_step];
                    BBavg[side+5] = midsoln[tri->id].edge_dgBn[side][2][rk_step];
                }
            }
            /// set least square equation for minimizing a5 and b3
            Bavg[0] = Bavg[1] = 0.0;

            //// Now constrained least square 
            M = 2; N = 9; P = 8; LDA = M; LDB = P; LWORK = M+N+P;
            // N: number of unknown, M: number of least square eqn.
            // P: number of constraint eqn.
            l = 0;
            for(j = 0; j < N; j++)
            {
                for(i = 0; i < M; i++)
                {
                    AA[l] = tmpA[i][j];
                    l++;
                }
            }
            l = 0;
            for(j = 0; j < N; j++)
            {
                for(i = 0; i < P; i++)
                {
                    BB[l] = tmpB[i][j];
                    l++;
                }
            }
            for(i = 0; i < M; i++)  // right side for the least square part
                CC[i] = Bavg[i];
            for(i = 0; i < P; i++)  // right side for the constrained part
                DD[i] = BBavg[i];
            FORTRAN_NAME(dgglse)(&M, &N, &P, AA, &LDA, ///// int M,int N,int P, double array A, int LDA
                                 BB, &LDB, CC, DD, XX[0], ///// double array B, int LDB, double array C, D, X,
                                 work, &LWORK, &INFO);
        } /// END: else if(alltri_HR_sten != NULL && alltri_HR_sten[tri->id].P2_B_2nd_recons_sten_set == YES)

        if(alltri_HR_sten == NULL)
        {
            /// 1) setup constraint equation for matching average
            for(i = 0; i < 2; i++)
            {
                dy = pcrds[(i+1)%3][1] - pcrds[i][1];
                dx = pcrds[(i+1)%3][0] - pcrds[i][0];
                xx1 = pcrds[i][0] - cent[0];
                yy1 = pcrds[i][1] - cent[1];

                len = fg_length_side(tri)[i];

                B[i][0] = dy;
                B[i][1] = dy*(xx1 + 0.5*dx);
                B[i][2] = dy*(yy1 + 0.5*dy);
                B[i][3] = dy*(sqr(xx1) + xx1*dx + sqr(dx)/3.0);
                B[i][4] = dy*(xx1*yy1 + 0.5*xx1*dy +0.5*yy1*dx + dx*dy/3.0);
                B[i][5] = dy*(sqr(yy1) + yy1*dy + sqr(dy)/3.0);

                B[i][6] = -dx;    // b_0
                B[i][7] = -dx*(xx1 + 0.5*dx);  // b_1
                B[i][1] += dx*(yy1 + 0.5*dy);    // b_2 = - a_1
                B[i][8] = -dx*(sqr(xx1) + xx1*dx + sqr(dx)/3.0);   // b_3
                B[i][3] += 2.0*dx*(xx1*yy1 +0.5*xx1*dy + 0.5*yy1*dx +  dx*dy/3.0);  // b_4
                B[i][4] += 0.5*dx*(sqr(yy1) + yy1*dy + sqr(dy)/3.0);  // b_5

                for(j = 0; j < 9; j++)
                    B[i][j] /= len;
            }
            if(rk_step == RK_STEP)
            {
                for(i = 0; i < 2; i++)
                    BBavg[i] = fg_side_dgB(tri)[i][0];
            }
            else
            {
                for(i = 0; i < 2; i++)
                    BBavg[i] = midsoln[tri->id].edge_dgBn[i][0][rk_step];
            }

            /// 2) setup constraint equation for matching 1st variation and quadratic variation
            for(side = 0; side < 3; side++)
            {
                for(i = 0; i < dim; i++)
                    t[i] = fg_side_vector(tri)[side][i];
                nor[0] = t[1];
                nor[1] = -t[0];

                xav = 0.5*(pcrds[side][0] + pcrds[(side+1)%3][0]);
                yav = 0.5*(pcrds[side][1] + pcrds[(side+1)%3][1]);
                dx =  0.5*(pcrds[(side+1)%3][0] - pcrds[side][0]);
                dy =  0.5*(pcrds[(side+1)%3][1] - pcrds[side][1]);

                /// linear part
                B[side+2][0] = 0.0;         // a0
                B[side+2][1] = (dx)*nor[0]; // a1
                B[side+2][2] = (dy)*nor[0]; // a2
                B[side+2][3] = 2.0*(xav - cent[0])*dx*nor[0]; // a3
                B[side+2][4] = ((xav - cent[0])*dy + (yav - cent[1])*dx)*nor[0];   // a4
                B[side+2][5] = 2.0*(yav - cent[1])*dy*nor[0];       // a5
                B[side+2][6] = 0.0;                        // b0
                B[side+2][7] = (dx)*nor[1];                // b1
                B[side+2][1] -= (dy)*nor[1];                // b2; b_2 = -a_1
                B[side+2][8] = 2.0*(xav - cent[0])*dx*nor[1];    // b3
                B[side+2][3] += -2.0*((xav - cent[0])*dy + (yav - cent[1])*dx)*nor[1];    // b4; b_4 = -2*a_3
                B[side+2][4] += (-(yav - cent[1])*dy*nor[1]);    // b5; b_5 = -0.5*a_4

                /// quadratic part
                B[side+5][0] = 0.0;    // a0
                B[side+5][1] = 0.0;    // a1
                B[side+5][2] = 0.0;    // a2
                B[side+5][3] = (sqr(dx)*nor[0])*2.0/3.0;    // a3
                B[side+5][4] = (dx*dy*nor[0])*2.0/3.0;      // a4
                B[side+5][5] = (sqr(dy)*nor[0])*2.0/3.0;    // a5
                B[side+5][6] = 0.0;    // b0
                B[side+5][7] = 0.0;    // b1
                                       // b2 = 0;  b_2 = - a_1
                B[side+5][8] = (sqr(dx)*nor[1])*2.0/3.0;             // b3
                B[side+5][3] += (-2.0*dx*dy*nor[1])*2.0/3.0;         // b4; b_4 = -2*a_3
                B[side+5][4] += (-0.5*sqr(dy)*nor[1])*2.0/3.0;       // b5; b_5 = -0.5*a_4
            }
            if(rk_step == RK_STEP)
            {
                for(side = 0; side < 3; side++)
                {
                    BBavg[side+2] = fg_side_dgB(tri)[side][1];
                    BBavg[side+5] = fg_side_dgB(tri)[side][2];
                }
            }
            else
            {
                for(side = 0; side < 3; side++)
                {
                    BBavg[side+2] = midsoln[tri->id].edge_dgBn[side][1][rk_step];
                    BBavg[side+5] = midsoln[tri->id].edge_dgBn[side][2][rk_step];
                }
            }

            /// set least square equation for minimizing a5 and b3
            //// a) \partial{smooth indicator Bx}{\partial a5} = 0
            A[0][0] = 0.0;
            A[0][1] = 0.0;
            A[0][2] = 4.0*Lmass_matrix[0][2];
            A[0][3] = 0.0;
            A[0][4] = 4.0*Lmass_matrix[0][4];
            A[0][5] = 8.0*Lmass_matrix[0][5];
            A[0][6] = A[0][7] = A[0][8] = 0.0;

            /// b) \partial{smooth indicator By}\{\partial b3} = 0
            A[1][0] = A[1][1] = A[1][2] = A[1][3] = A[1][4] = A[1][5] = 0.0;
            A[1][6] = 0.0;    /// b0
            A[1][7] = 4.0*Lmass_matrix[0][1];  // b1
            /// b2 coef = 0.0
            A[1][8] = 8.0*Lmass_matrix[0][3];  /// b3
            /// b4 coef
            A[1][3] += -2.0*(4.0*Lmass_matrix[0][4]); /// b4; b_4 = -2*a_3
            /// b5 coef = 0.0

            Bavg[0] = Bavg[1] = 0.0;
            //// Now constrained least square 
            M = 2; N = 9; P = 8; LDA = M; LDB = P; LWORK = M+N+P;
            // N: number of unknown, M: number of least square eqn.
            // P: number of constraint eqn.
            l = 0;
            for(j = 0; j < N; j++)
            {
                for(i = 0; i < M; i++)
                {
                    AA[l] = A[i][j];
                    l++;
                }
            }
            l = 0;
            for(j = 0; j < N; j++)
            {
                for(i = 0; i < P; i++)
                {
                    BB[l] = B[i][j];
                    l++;
                }
            }
            for(i = 0; i < M; i++)  // right side for the least square part
                CC[i] = Bavg[i];
            for(i = 0; i < P; i++)  // right side for the constrained part
                DD[i] = BBavg[i];
            FORTRAN_NAME(dgglse)(&M, &N, &P, AA, &LDA, ///// int M,int N,int P, double array A, int LDA
                                 BB, &LDB, CC, DD, XX[0], ///// double array B, int LDB, double array C, D, X,
                                 work, &LWORK, &INFO);
        }

        for(i =0; i < MAX_N_COEF; i++)
            ansx[0][i] = XX[0][i];
        ansy[0][0] = XX[0][6];          // b0
        ansy[0][1] = XX[0][7];          // b1
        ansy[0][2] = -ansx[0][1];       // b2;  b_2 = - a_1
        ansy[0][3] = XX[0][8];          // b3
        ansy[0][4] = -2.0*ansx[0][3];   // b4;  b_4 = -2*a_3
        ansy[0][5] = -0.5*ansx[0][4];   // b5;  b_5 = -0.5*a_4

        /// TMP, debug
        /****
        for(i =0; i < MAX_N_COEF; i++)
            ansx[1][i] = XX[1][i];
        ansy[1][0] = XX[1][6];          // b0
        ansy[1][1] = XX[1][7];          // b1
        ansy[1][2] = -ansx[1][1];       // b2;  b_2 = - a_1
        ansy[1][3] = XX[1][8];          // b3
        ansy[1][4] = -2.0*ansx[1][3];   // b4;  b_4 = -2*a_3
        ansy[1][5] = -0.5*ansx[1][4];   // b5;  b_5 = -0.5*a_4
        //////////
        for(i =0; i < MAX_N_COEF; i++)
            ansx[2][i] = XX[2][i];
        ansy[2][0] = XX[2][6];          // b0
        ansy[2][1] = XX[2][7];          // b1
        ansy[2][2] = -ansx[2][1];       // b2;  b_2 = - a_1
        ansy[2][3] = XX[2][8];          // b3
        ansy[2][4] = -2.0*ansx[2][3];   // b4;  b_4 = -2*a_3
        ansy[2][5] = -0.5*ansx[2][4];   // b5;  b_5 = -0.5*a_4
        ****/
        /// END: TMP, debug

        if(rk_step == RK_STEP)
            st = tri->st;
        else
            st = midsoln[tri->id].st[rk_step];
        /// TMP
        /****
        for(i = 0; i < MAX_N_COEF; i++)
        {
            ansx[5][i] = dg_B(st)[0][i];
            ansy[5][i] = dg_B(st)[1][i];
        }
        ****/
        /// END: TMP

        // for(i = 0; i < 2; i++)
        //     cellavgB[i] = Mag(st)[i];
        for(i = 0; i < MAX_N_COEF; i++)
        {
            dg_B(st)[0][i] = ansx[0][i];
            dg_B(st)[1][i] = ansy[0][i];
        }

        Mag(st)[0] = Mag(st)[1] = 0.0;
        for(i= 0; i< MAX_N_COEF; i++)
        {
            Mag(st)[0] += dg_B(st)[0][i]*Lmass_matrix[0][i];
            Mag(st)[1] += dg_B(st)[1][i]*Lmass_matrix[0][i];
        }
        for(i = 0; i < 2; i++)
            Mag(st)[i] /= Lmass_matrix[0][0];

        ///// TMP
        if(debug == YES)
        {
            double tmp_sum = 0.0;
            //test_tri_B_P1_polynomial_2nd_reconstruction(tri, midsoln, limit_store, rk_step, Bx_p1, By_p1);
            printf("TRI[%d]::::::\n", tri->id);
            //verify_edge_B(tri,rk_step, midsoln, ansx[0], ansy[0]);
            printf("WENO reconstruc Bx: ");
            for(i = 0; i < MAX_N_COEF; i++)
                printf("%e, ", ansx[5][i]);
            printf("\nCONT reconstruc Bx: ");
            for(i = 0; i < MAX_N_COEF; i++)
                printf("%e, ", dg_B(st)[0][i]);
            printf("\n");

            printf("WENO reconstruc By: ");
            for(i = 0; i < MAX_N_COEF; i++)
                printf("%e, ", ansy[5][i]);
            printf("\nCONT reconstruc By: ");
            for(i = 0; i < MAX_N_COEF; i++)
                printf("%e, ", dg_B(st)[1][i]);
            printf("\n");
            for(side = 0; side < 8; side++)
            {
                tmp_sum = 0.0;
                for(j = 0; j < 9; j++)
                    tmp_sum += alltri_HR_sten[tri->id].P2_B_2nd_recons_sten_B[side][j]*XX[0][j];
                printf("Left side of  constraint eq[%d] = %e, right  = %e\n", side, tmp_sum, BBavg[side]);
            }

            for(side = 0; side < 2; side++)
            {
                tmp_sum = 0.0;
                for(j = 0; j < 9; j++)
                    tmp_sum += alltri_HR_sten[tri->id].P2_B_2nd_recons_sten_A[side][j]*XX[0][j];
                printf("Left side of  least-sqr eq[%d] = %e, right  = %e\n", side, tmp_sum, Bavg[side]);
            }

            printf("CONT P1 reconstruc Bx: ");
            for(i = 0; i < 3; i++)
                printf("%e, ", Bx_p1[i]);
            printf("\n");
            printf("CONT P1 reconstruc By: ");
            for(i = 0; i < 3; i++)
                printf("%e, ", By_p1[i]);
            printf("\n\n");

            printf("CONT reconstruc +one-more-avg Bx: ");
            for(i = 0; i < MAX_N_COEF; i++)
                printf("%e, ", ansx[1][i]);
            printf("\nCONT reconstruc +one-more-avg By: ");
            for(i = 0; i < MAX_N_COEF; i++)
                printf("%e, ", ansy[1][i]);
            printf("\n");
            verify_edge_B(tri,rk_step, midsoln, ansx[1], ansy[1]);

            printf("\n\n");

            printf("CONT reconstruc +one-constr Bx: ");
            for(i = 0; i < MAX_N_COEF; i++)
                printf("%e, ", ansx[2][i]);
            printf("\nCONT reconstruc +one-constr By: ");
            for(i = 0; i < MAX_N_COEF; i++)
                printf("%e, ", ansy[2][i]);
            printf("\n");
            verify_edge_B(tri,rk_step, midsoln, ansx[2], ansy[2]);

            for(i = 0; i < 8; i++)
            {
                 printf("matr-B[%d]: ", i);
                 for(j = 0; j < 9; j++)
                     printf("%e ", alltri_HR_sten[tri->id].P2_B_2nd_recons_sten_B[i][j]);
                 printf("\n");
            }
            for(i = 0; i < 2; i++)
            {
                 printf("matr-A[%d]: ", i);
                 for(j = 0; j < 9; j++)
                     printf("%e ", alltri_HR_sten[tri->id].P2_B_2nd_recons_sten_A[i][j]);
                 printf("\n");
            }

            for(i = 0; i < 9; i++)
            {
                 printf("Iden[%d]: ", i);
                 for(j = 0; j < 9; j++)
                     printf("%e ", Iden[i][j]);
                 printf("\n");
            }
        }
}



//// Construct P1 poly of Magnetic field on tri by avarage of normal component on edge and 1st variation.
//// Conservation is NOT enforced on tri and its neighbors. 
//// We also use the fact a1+b2=0.0 so that we solve 5 by 5 system.
////  b2 = -a1 relation is used.
////  The solution of the linear system are coefficients in the order of
////  a0; a1; a2; b0; b1; b2;
////  with: B_x= a0 + a1 (x-x_c) + a2 (y-y_c);
////        B_y= b0 + b1 (x-x_c) + b2 (y-y_c);
////  Do not form loops 
//// The solution is set to be 0 for debugging the hydro code
LOCAL void tri_B_P1_polynomial_2nd_reconstruction(
         TRI       *tri,
         Mid_soln *midsoln,
         Limiting_store **limit_store,
         int      rk_step)
{
        TRI       *Bnbtri[3], *tmp_tri, *tris[20];
        double    AA[300], BB[300], CC[300], DD[300], XX[8][300], work[300], tmp, len;
        static double   **A = NULL, **invA, **I, **MB;
        int       dim = 2, i, j, k, M, N, P, l, LDA, LDB, LWORK, INFO, indx, cv_indx;
        Locstate  st, nbst[3], st2;
        double    **Lmass_matrix, Bavg[20], *pcrds[3], dy, dx, xx1, yy1, *cent = fg_centroid(tri), **tmpA, **tmpB, BBavg[8];
        int       side;
        static double    eps = 1.0e-6;
        double    wei[6], sum, alpha[6], B[2][6], outB0[4], sqr_B_len[30], t[3], nor[3];
        int       debug = NO, Edge_side[20], tmp_side, next_side, N_edge, num_tris_vertex;
        int       use_central_sten = YES, N_STEN;
        int       a1, a2, b1;
        double    sqrt_area;

        if(A == NULL)
        {
            matrix(&(A), 6, 6, sizeof(double));
            matrix(&(MB), 2, 6, sizeof(double));
            matrix(&(invA), 6, 6, sizeof(double));
            matrix(&(I), 6, 6, sizeof(double));
        }

        for(side = 0; side < 3; side++)
        {
            pcrds[side] = Coords(Point_of_tri(tri)[side]);
        }

        sqrt_area = sqrt(fg_area(tri));

        if(alltri_HR_sten != NULL && alltri_HR_sten[tri->id].B_2nd_recons_sten_set == NO)
        {
            alltri_HR_sten[tri->id].B_2nd_recons_sten_set = YES;
            matrix(&tmpA,5,5,sizeof(double));
            alltri_HR_sten[tri->id].B_2nd_recons_sten = tmpA;

            /// equation related 0th moment on 2 edges
            for(i = 0; i < 2; i++)
            {
                dy = pcrds[(i+1)%3][1] - pcrds[i][1];
                dx = pcrds[(i+1)%3][0] - pcrds[i][0];
                xx1 = pcrds[i][0] - cent[0];
                yy1 = pcrds[i][1] - cent[1];

                len = fg_length_side(tri)[i];

                tmpA[i][0] = dy;
                tmpA[i][1] = dy*(xx1 + 0.5*dx);
                tmpA[i][2] = dy*(yy1 + 0.5*dy);

                tmpA[i][3] = -dx;  /// b_0
                tmpA[i][4] = -dx*(xx1 + 0.5*dx);  // b_1
                tmpA[i][1] += dx*(yy1 + 0.5*dy);    // b_2 = - a_1
                for(j = 0; j < 5; j++)
                   tmpA[i][j] /= len;
            }
            if(rk_step == RK_STEP)
            {
                for(i = 0; i < 2; i++)
                    Bavg[i] = fg_side_dgB(tri)[i][0];
            }
            else
            {
                for(i = 0; i < 2; i++)
                    Bavg[i] = midsoln[tri->id].edge_dgBn[i][0][rk_step];
            }

            /// equation related to slope on 3 edges
            for(side = 0; side < 3; side++)
            {
                for(i = 0; i < dim; i++)
                    t[i] = fg_side_vector(tri)[side][i];
                nor[0] = t[1];
                nor[1] = -t[0];

                tmpA[side+2][0] = 0.0;
                tmpA[side+2][1] = 0.5*(pcrds[(side+1)%3][0] - pcrds[side][0])*nor[0];
                tmpA[side+2][2] = 0.5*(pcrds[(side+1)%3][1] - pcrds[side][1])*nor[0];
                tmpA[side+2][3] = 0.0;
                tmpA[side+2][4] = 0.5*(pcrds[(side+1)%3][0] - pcrds[side][0])*nor[1];
                tmpA[side+2][1] += -0.5*(pcrds[(side+1)%3][1] - pcrds[side][1])*nor[1];
            }
            if(rk_step == RK_STEP)
            {
                for(side = 0; side < 3; side++)
                    Bavg[side+2] = fg_side_dgB(tri)[side][1];
            }
            else
            {
                for(side = 0; side < 3; side++)
                    Bavg[side+2] = midsoln[tri->id].edge_dgBn[side][1][rk_step];
            }
            solve_by_gj(tmpA,5,Bavg,XX[0]);
        }
        else if(alltri_HR_sten != NULL && alltri_HR_sten[tri->id].B_2nd_recons_sten_set == YES)
        {
            tmpA = alltri_HR_sten[tri->id].B_2nd_recons_sten;
            if(rk_step == RK_STEP)
            {
                for(i = 0; i < 2; i++)
                    Bavg[i] = fg_side_dgB(tri)[i][0];
            }
            else
            {
                for(i = 0; i < 2; i++)
                    Bavg[i] = midsoln[tri->id].edge_dgBn[i][0][rk_step];
            }

            /// slope part
            if(rk_step == RK_STEP)
            {
                for(side = 0; side < 3; side++)
                    Bavg[side+2] = fg_side_dgB(tri)[side][1];
            }
            else
            {
                for(side = 0; side < 3; side++)
                    Bavg[side+2] = midsoln[tri->id].edge_dgBn[side][1][rk_step];
            }
            solve_by_gj(tmpA,5,Bavg,XX[0]);
        }

        if(alltri_HR_sten == NULL)
        {
            for(i = 0; i < 2; i++)
            {
                dy = pcrds[(i+1)%3][1] - pcrds[i][1];
                dx = pcrds[(i+1)%3][0] - pcrds[i][0];
                xx1 = pcrds[i][0] - cent[0];
                yy1 = pcrds[i][1] - cent[1];

                len = fg_length_side(tri)[i];

                A[i][0] = dy;
                A[i][1] = dy*(xx1 + 0.5*dx);
                A[i][2] = dy*(yy1 + 0.5*dy);

                A[i][3] = -dx;  /// b_0
                A[i][4] = -dx*(xx1 + 0.5*dx);  // b_1
                A[i][1] += dx*(yy1 + 0.5*dy);    // b_2 = - a_1
                for(j = 0; j < 5; j++)
                   A[i][j] /= len;
            }

            if(rk_step == RK_STEP)
            {
                for(i = 0; i < 2; i++)
                    Bavg[i] = fg_side_dgB(tri)[i][0];
            }
            else
            {
                for(i = 0; i < 2; i++)
                    Bavg[i] = midsoln[tri->id].edge_dgBn[i][0][rk_step];
            }
            /// equation related to slope on edge
            for(side = 0; side < 3; side++)
            {
                for(i = 0; i < dim; i++)
                    t[i] = fg_side_vector(tri)[side][i];
                nor[0] = t[1];
                nor[1] = -t[0];

                A[side+2][0] = 0.0;
                A[side+2][1] = 0.5*(pcrds[(side+1)%3][0] - pcrds[side][0])*nor[0];
                A[side+2][2] = 0.5*(pcrds[(side+1)%3][1] - pcrds[side][1])*nor[0];
                A[side+2][3] = 0.0;
                A[side+2][4] = 0.5*(pcrds[(side+1)%3][0] - pcrds[side][0])*nor[1];
                A[side+2][1] += -0.5*(pcrds[(side+1)%3][1] - pcrds[side][1])*nor[1];
            }
            if(rk_step == RK_STEP)
            {
                for(side = 0; side < 3; side++)
                    Bavg[side+2] = fg_side_dgB(tri)[side][1];
            }
            else
            {
                for(side = 0; side < 3; side++)
                    Bavg[side+2] = midsoln[tri->id].edge_dgBn[side][1][rk_step];
            }
            solve_by_gj(A,5,Bavg,XX[0]);
        }

        if(rk_step == RK_STEP)
            st = tri->st;
        else
            st = midsoln[tri->id].st[rk_step];

        XX[0][5] = -XX[0][1];
        for(i = 0; i < MAX_N_COEF; i++)
        {
            dg_B(st)[0][i] = XX[0][i];
            dg_B(st)[1][i] = XX[0][MAX_N_COEF+i];
        }
        Mag(st)[0] = XX[0][0];
        Mag(st)[1] = XX[0][3];

        /***
        if(tri->id == 395)
        {
            printf("tri_B_P1_polynomial_2nd_reconstruction()\n\n");
            printf("B coefficient:\n");
            for(i = 0; i < MAX_N_COEF; i++)
                printf("Bx[%d] = %e; ", i, dg_B(st)[0][i]);
            printf("\n");
            for(i = 0; i < MAX_N_COEF; i++)
                printf("By[%d] = %e; ", i, dg_B(st)[1][i]);
            printf("\n");
        }
        ***/
}



// reconstruction of Bn on surface from Bn on edges //Huijing
LOCAL void constrained_trans_B_reconstruction_DG(
        Front    *fr,
        Mid_soln *midsoln,
        int      rk_step,
        Limiting_store **limit_store)
{
        RECT_GRID *gr = fr->rect_grid;
        TRI       *tri, *crsp_tri, *nbtri[3], *sten_tris[50], *tmptri, *new_cand[50];
        SURFACE   **surf;
        int       dim = 2, i, j, k, indx, side, pre_con_method = 1;
        float     *cent, B0[3];
        size_t    sizest = fr->sizest;
        Locstate  st;
        int       nn_num, repetition, N_use = 0, in_list, new_nn_num = 0, num;
        int       debug = NO, vt_adj;
        POINT     *pt[3];
        double    **con_u = NULL; 

        // printf("ENTER constrained_trans_B_reconstruction_DG()");

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf);
                 !at_end_of_tri_list(tri,*surf); tri = tri->next)
            {
                cent = fg_centroid(tri);
                if(tri->BC_type == SUBDOMAIN)
                    continue;

                for(i = 0; i < 3; i++)
                {
                    nbtri[i] = Tri_on_side(tri,i);
                    pt[i] = Point_of_tri(tri)[i];
                }

                if(MAX_N_COEF == 1)
                {
                    tri_B_P0_polynomial_reconstruction(tri, midsoln,rk_step, B0);
                    continue;
                }

                if(MAX_N_COEF == 3)
                {
                    // tri_B_P1_polynomial_reconstruction_ver1(tri, midsoln,limit_store,rk_step);//OLD
                    // tri_B_P1_polynomial_reconstruction_upwind(tri, midsoln,limit_store,rk_step);
                    // tri_B_P1_polynomial_reconstruction_consv(tri, midsoln,limit_store,rk_step);
                    // tri_B_P0_polynomial_reconstruction(tri, midsoln,rk_step, B0);
                    // tri_B_P1_polynomial_reconstruction_ver1(tri, midsoln,limit_store,rk_step);
                    // tri_comput_P1_polynomials_from_avg_MHD(tri, nbtri, 3, midsoln,limit_store,rk_step);
                    // tri_comput_P1_polynomials_from_avg(tri, nbtri, 3, midsoln,limit_store,rk_step);
                    tri_B_P1_polynomial_reconstruction_ver2(tri, midsoln,limit_store,rk_step);//NEW
                    continue;
                }

                if(MAX_N_COEF == 6)
                {
                    // if(debugging("blast_MHD"))
                    // {
                    //     if(NO == tri_B_P2_polynomial_reconstruction_9edge_1(tri, midsoln,limit_store,rk_step))
                    //     {
                    //         tri_B_P2_polynomial_reconstruction_consv(tri, midsoln,limit_store,rk_step);
                    //     }
                    // }
                    // else
                    {
                        // tri_B_P0_polynomial_reconstruction(tri, midsoln,rk_step, B0);
                        // tri_B_P2_polynomial_reconstruction_consv(tri, midsoln,limit_store,rk_step);
                        // tri_B_P2_polynomial_reconstruction_9edge_1(tri, midsoln,limit_store,rk_step);
                        tri_B_P2_polynomial_reconstruction_new(tri, midsoln,limit_store,rk_step);
                    }
                    /***
                    get_sten_neighbr_B_field(nbtri, tri, sten_tris, &nn_num);
                    if(nn_num != 6)
                    {
                        printf("ERROR: WENO_constrained_trans_B_reconstruction(), B field not enough stencil\n");
                        clean_up(ERROR); 
                    }

                    // NOTE: tri_comput_P2_polynomials_from_avg_MHD(),
                    //       tri_comput_P2_polynomials_from_avg(), and  
                    //       tri_comput_P2_polynomials_from_avg_MHD_ver1() MUST use
                    //       get_sten_neighbr_B_field() to construct stencil
                    tri_comput_P2_polynomials_from_avg_MHD(tri, sten_tris, nn_num, midsoln,limit_store,rk_step, con_u);
                    // tri_comput_P2_polynomials_from_avg_MHD_ver1(tri, sten_tris, nn_num, midsoln,limit_store,rk_step);
                    // tri_comput_P2_polynomials_from_avg(tri, sten_tris, nn_num, midsoln,limit_store,rk_step);

                    ***/
                    /**
                    get_sten_consv_var_P2(nbtri, tri, sten_tris, &nn_num);
                    if(nn_num != 6)
                    {
                        printf("ERROR: WENO_constrained_trans_B_reconstruction(), B field not enough stencil\n");
                        clean_up(ERROR); 
                    }
                    tri_comput_P2_polynomials_from_avg_MHD_ver2(tri, sten_tris, nn_num, midsoln,limit_store,rk_step);
                    **/
                    continue;
                }
 
                set_HR_sten(nbtri, tri, sten_tris, &nn_num);

                /****
                if(tri->id == 2330)
                { 
                    printf("WENO_constrained_trans_B_reconstruction() work on tri %d, num tris found[%d]\n",
                            tri->id, nn_num);
                    for(i = 0; i < nn_num; i++)
                        printf("%d, tri[%d] %p in stencil\n", i, sten_tris[i]->id, sten_tris[i]);
                }
                ****/
                
                /***
                for(; ;)
                {
                    N_use = 0;    
                    num = nn_num;
                    for(i = 0; i < num; i++)
                    {
                        for(side = 0; side < 3; side++)
                        {
                            if((tmptri = Tri_on_side(sten_tris[i],side)) == NULL)
                                continue;
                        
                            vt_adj = NO;
                            for(j = 0; j < 3; j++)
                            {
                                if(pt[j] == Point_of_tri(tmptri)[0] ||
                                   pt[j] == Point_of_tri(tmptri)[1] ||
                                   pt[j] == Point_of_tri(tmptri)[2])
                                {
                                   vt_adj = YES;
                                   break;
                                }
                            }                           
                            if(NO == vt_adj)
                                continue;

                            repetition = NO;
                            for(j = 0; j < num; j++)
                            {
                                if(tmptri == sten_tris[j] || tmptri == tri)
                                {
                                    repetition = YES;
                                    break;
                                }
                            }

                            if(repetition == NO)
                            {
                                sten_tris[nn_num + N_use] = tmptri;
                                N_use++;
                            }
                        }
                    }
                    if(N_use != 0)
                    {
                        nn_num += N_use;
                    }
                    else
                        break;
                }
                *****/
 
                if(nn_num < 9)
                // if(nn_num != 9)
                {
                    ////////// TMP
                    /***
                    printf("ERROR: WENO_constrained_trans_B_reconstruction(), num of cells is not enough, nn_num= %d\n", nn_num);
                    printf("tri(%d), type %d, ceontriod (%g %g)\n", tri->id, tri->BC_type,
                       fg_centroid(tri)[0],  fg_centroid(tri)[1]);
                    print_tri_crds(tri);
                    for(i = 0; i < nn_num; i++)
                        printf("tri [%d] in stencil, type %d, cent[%g, %g]\n",
                                sten_tris[i]->id, sten_tris[i]->BC_type, fg_centroid(sten_tris[i])[0], 
                                fg_centroid(sten_tris[i])[1]);
                    clean_up(ERROR);
                    ***/ 
                    ////////// END TMP

                    N_use = 0; 
                    for(i = 0; i < nn_num; i++)
                    {
                        for(side = 0; side < 3; side++)
                        {
                            tmptri = Tri_on_side(sten_tris[i],side);
                            repetition = NO;
                            for(j = 0; j < nn_num; j++)
                            {
                                if(tmptri == sten_tris[j] || tmptri == tri)
                                {
                                    repetition = YES;
                                    break; 
                                }
                            }
                            ///// TMP
                            // if(debug == YES && tmptri->id == 14)
                            //     printf("tri[%d], repetition = %d\n", tmptri->id, repetition);

                            if(repetition == NO)
                            {
                                in_list = NO;
                                for(k = 0; k < N_use; k++)
                                {
                                    if(new_cand[k] == tmptri)
                                    {
                                        in_list = YES;
                                        break; 
                                    }
                                }
                                if(in_list == NO)
                                {
                                    new_cand[N_use] = tmptri;
                                    N_use++;
                                }
                            }
                        }    
                    }
                    //// add the new_cand to stencil
                    indx = 9 - nn_num;
                    if(indx > N_use)
                    { 
                        printf("ERROR: WENO_constrained_trans_B_reconstruction(), can not find");
                        printf(" enough candidates\n");  
                        clean_up(ERROR);
                    }
                    for(i = 0; i < indx; i++)
                        sten_tris[nn_num+i] = new_cand[i];
                    nn_num = 9;

                    ///// TMP
                    /***
                    if(debug == YES)
                    {
                        printf("WENO_constrained_trans_B_reconstruction() after found new tris\n");
                        for(i = 0; i < nn_num; i++)
                            printf("%d, tri[%d] %p in stencil\n", i, sten_tris[i]->id, sten_tris[i]);
                    }
                    ***/
                } /// END::: if(nn_num < 9)
                /// printf("WENO_constrained_trans_B_reconstruction() num of tris in sten = %d\n", nn_num);
            
                if(nn_num == 9)
                    tri_comput_P3_polynomials_from_avg(tri, sten_tris, nn_num, midsoln,limit_store,rk_step);
                else
                    tri_comput_P3_polynomials_from_avg_by_min(tri, sten_tris, nn_num, midsoln,limit_store,rk_step);
            }
        }
}

/* NOTE: the average states used for reconstruction are saved at midsoln.st[0] level
 */
LOCAL void WENO_constrained_trans_B_reconstruction(
        Front    *fr,
        Mid_soln *midsoln,
        int      rk_step,
        Limiting_store **limit_store)
{
        RECT_GRID *gr = fr->rect_grid;
        TRI       *tri, *crsp_tri, *nbtri[3], *sten_tris[50], *tmptri, *new_cand[50];
        SURFACE   **surf;
        int       dim = 2, i, j, k, indx, side, pre_con_method = 1;
        float     *cent, B0[3];
        size_t    sizest = fr->sizest;
        Locstate  st;
        int       nn_num, repetition, N_use = 0, in_list, new_nn_num = 0, num;
        int       debug = NO, vt_adj;
        POINT     *pt[3];
        double    **con_u = NULL; 

        // printf("ENTER WENO_constrained_trans_B_reconstruction()");

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf);
                 !at_end_of_tri_list(tri,*surf); tri = tri->next)
            {
                cent = fg_centroid(tri);
                if(tri->BC_type == SUBDOMAIN)
                    continue;

                for(i = 0; i < 3; i++)
                {
                    nbtri[i] = Tri_on_side(tri,i);
                    pt[i] = Point_of_tri(tri)[i];
                }

                if(MAX_N_COEF == 1)
                {
                    tri_B_P0_polynomial_reconstruction(tri, midsoln,rk_step, B0);
                    continue;
                }

                if(MAX_N_COEF == 3)
                {
                    tri_B_P1_polynomial_reconstruction_ver1(tri, midsoln,limit_store,rk_step);
                    // tri_B_P1_polynomial_reconstruction_upwind(tri, midsoln,limit_store,rk_step);
                    // tri_B_P1_polynomial_reconstruction_consv(tri, midsoln,limit_store,rk_step);
                    // tri_B_P0_polynomial_reconstruction(tri, midsoln,rk_step, B0);
                    // tri_B_P1_polynomial_reconstruction_ver1(tri, midsoln,limit_store,rk_step);
                    // tri_comput_P1_polynomials_from_avg_MHD(tri, nbtri, 3, midsoln,limit_store,rk_step);
                    // tri_comput_P1_polynomials_from_avg(tri, nbtri, 3, midsoln,limit_store,rk_step);
                    continue;
                }

                if(MAX_N_COEF == 6)
                {
                    // if(debugging("blast_MHD"))
                    // {
                    //     if(NO == tri_B_P2_polynomial_reconstruction_9edge_1(tri, midsoln,limit_store,rk_step))
                    //     {
                    //         tri_B_P2_polynomial_reconstruction_consv(tri, midsoln,limit_store,rk_step);
                    //     }
                    // }
                    // else
                    {
                        // tri_B_P0_polynomial_reconstruction(tri, midsoln,rk_step, B0);
                        // tri_B_P2_polynomial_reconstruction_consv(tri, midsoln,limit_store,rk_step);
                        tri_B_P2_polynomial_reconstruction_9edge_1(tri, midsoln,limit_store,rk_step);
                    }
                    /***
                    get_sten_neighbr_B_field(nbtri, tri, sten_tris, &nn_num);
                    if(nn_num != 6)
                    {
                        printf("ERROR: WENO_constrained_trans_B_reconstruction(), B field not enough stencil\n");
                        clean_up(ERROR); 
                    }

                    // NOTE: tri_comput_P2_polynomials_from_avg_MHD(),
                    //       tri_comput_P2_polynomials_from_avg(), and  
                    //       tri_comput_P2_polynomials_from_avg_MHD_ver1() MUST use
                    //       get_sten_neighbr_B_field() to construct stencil
                    tri_comput_P2_polynomials_from_avg_MHD(tri, sten_tris, nn_num, midsoln,limit_store,rk_step, con_u);
                    // tri_comput_P2_polynomials_from_avg_MHD_ver1(tri, sten_tris, nn_num, midsoln,limit_store,rk_step);
                    // tri_comput_P2_polynomials_from_avg(tri, sten_tris, nn_num, midsoln,limit_store,rk_step);

                    ***/
                    /**
                    get_sten_consv_var_P2(nbtri, tri, sten_tris, &nn_num);
                    if(nn_num != 6)
                    {
                        printf("ERROR: WENO_constrained_trans_B_reconstruction(), B field not enough stencil\n");
                        clean_up(ERROR); 
                    }
                    tri_comput_P2_polynomials_from_avg_MHD_ver2(tri, sten_tris, nn_num, midsoln,limit_store,rk_step);
                    **/
                    continue;
                }
 
                set_HR_sten(nbtri, tri, sten_tris, &nn_num);

                /****
                if(tri->id == 2330)
                { 
                    printf("WENO_constrained_trans_B_reconstruction() work on tri %d, num tris found[%d]\n",
                            tri->id, nn_num);
                    for(i = 0; i < nn_num; i++)
                        printf("%d, tri[%d] %p in stencil\n", i, sten_tris[i]->id, sten_tris[i]);
                }
                ****/
                
                /***
                for(; ;)
                {
                    N_use = 0;    
                    num = nn_num;
                    for(i = 0; i < num; i++)
                    {
                        for(side = 0; side < 3; side++)
                        {
                            if((tmptri = Tri_on_side(sten_tris[i],side)) == NULL)
                                continue;
                        
                            vt_adj = NO;
                            for(j = 0; j < 3; j++)
                            {
                                if(pt[j] == Point_of_tri(tmptri)[0] ||
                                   pt[j] == Point_of_tri(tmptri)[1] ||
                                   pt[j] == Point_of_tri(tmptri)[2])
                                {
                                   vt_adj = YES;
                                   break;
                                }
                            }                           
                            if(NO == vt_adj)
                                continue;

                            repetition = NO;
                            for(j = 0; j < num; j++)
                            {
                                if(tmptri == sten_tris[j] || tmptri == tri)
                                {
                                    repetition = YES;
                                    break;
                                }
                            }

                            if(repetition == NO)
                            {
                                sten_tris[nn_num + N_use] = tmptri;
                                N_use++;
                            }
                        }
                    }
                    if(N_use != 0)
                    {
                        nn_num += N_use;
                    }
                    else
                        break;
                }
                *****/
 
                if(nn_num < 9)
                // if(nn_num != 9)
                {
                    ////////// TMP
                    /***
                    printf("ERROR: WENO_constrained_trans_B_reconstruction(), num of cells is not enough, nn_num= %d\n", nn_num);
                    printf("tri(%d), type %d, ceontriod (%g %g)\n", tri->id, tri->BC_type,
                       fg_centroid(tri)[0],  fg_centroid(tri)[1]);
                    print_tri_crds(tri);
                    for(i = 0; i < nn_num; i++)
                        printf("tri [%d] in stencil, type %d, cent[%g, %g]\n",
                                sten_tris[i]->id, sten_tris[i]->BC_type, fg_centroid(sten_tris[i])[0], 
                                fg_centroid(sten_tris[i])[1]);
                    clean_up(ERROR);
                    ***/ 
                    ////////// END TMP

                    N_use = 0; 
                    for(i = 0; i < nn_num; i++)
                    {
                        for(side = 0; side < 3; side++)
                        {
                            tmptri = Tri_on_side(sten_tris[i],side);
                            repetition = NO;
                            for(j = 0; j < nn_num; j++)
                            {
                                if(tmptri == sten_tris[j] || tmptri == tri)
                                {
                                    repetition = YES;
                                    break; 
                                }
                            }
                            ///// TMP
                            // if(debug == YES && tmptri->id == 14)
                            //     printf("tri[%d], repetition = %d\n", tmptri->id, repetition);

                            if(repetition == NO)
                            {
                                in_list = NO;
                                for(k = 0; k < N_use; k++)
                                {
                                    if(new_cand[k] == tmptri)
                                    {
                                        in_list = YES;
                                        break; 
                                    }
                                }
                                if(in_list == NO)
                                {
                                    new_cand[N_use] = tmptri;
                                    N_use++;
                                }
                            }
                        }    
                    }
                    //// add the new_cand to stencil
                    indx = 9 - nn_num;
                    if(indx > N_use)
                    { 
                        printf("ERROR: WENO_constrained_trans_B_reconstruction(), can not find");
                        printf(" enough candidates\n");  
                        clean_up(ERROR);
                    }
                    for(i = 0; i < indx; i++)
                        sten_tris[nn_num+i] = new_cand[i];
                    nn_num = 9;

                    ///// TMP
                    /***
                    if(debug == YES)
                    {
                        printf("WENO_constrained_trans_B_reconstruction() after found new tris\n");
                        for(i = 0; i < nn_num; i++)
                            printf("%d, tri[%d] %p in stencil\n", i, sten_tris[i]->id, sten_tris[i]);
                    }
                    ***/
                } /// END::: if(nn_num < 9)
                /// printf("WENO_constrained_trans_B_reconstruction() num of tris in sten = %d\n", nn_num);
            
                if(nn_num == 9)
                    tri_comput_P3_polynomials_from_avg(tri, sten_tris, nn_num, midsoln,limit_store,rk_step);
                else
                    tri_comput_P3_polynomials_from_avg_by_min(tri, sten_tris, nn_num, midsoln,limit_store,rk_step);
            }
        }
}

/// do a constant reconstruction.
EXPORT void tri_B_P0_polynomial_reconstruction(
         TRI       *tri,
         Mid_soln  *midsoln,
         int      rk_step,
         double    *outB0)
{
        TRI       *Bnbtri[3], *tmp_tri, *tris[20];
        double    tmp, len, A[2][2];
        int       dim = 2, i, j, k, M, N, P, l, LDA, LDB, LWORK, INFO, indx, cv_indx;
        Locstate  st, nbst[3], st2;
        double    **Lmass_matrix, Bavg[20], *pcrds[3], dy, dx, rside[2], B0[2];
        int       side;
        static double    eps = 1.0e-6, t[3], nor[3];
        double    wei[6], sum, alpha[6], B[2][6], cellavgB[2];
        int       debug = NO, N_cell, Edge_side[20], tmp_side, next_side;

        /***
        if(tri->id == 350 || tri->id == 405)
        {
            printf("tri[%d] enter tri_B_P0_polynomial_reconstruction()\n", tri->id);
            debug = YES;
        }
        ***/

        if(rk_step == RK_STEP)
        {
            for(i = 0; i < 3; i++)
                Bavg[i] = fg_side_B(tri)[i];
        }
        else
        {
            for(i = 0; i < 3; i++)
                Bavg[i] = midsoln[tri->id].edge_Bn[i][rk_step];
        }

        if(rk_step == RK_STEP)
            st = tri->st;
        else
            st = midsoln[tri->id].st[rk_step];

        k = 0;
        for(side = 0; side < 2; side++)
        {
            for(i = 0; i < dim; i++)
                t[i] = fg_side_vector(tri)[side][i];
            nor[0] = t[1];
            nor[1] = -t[0];
            A[k][0] = nor[0];
            A[k][1] = nor[1];
            k++;
        }
        
        if(debug == YES)
        {
            sum = A[0][0]*Mag(st)[0] + A[0][1]*Mag(st)[1];
            printf("Bn from evolved face B = %g, edge Bn = %g on side[0]\n",
                         sum, Bavg[0]);
            sum = A[1][0]*Mag(st)[0] + A[1][1]*Mag(st)[1];
            printf("Bn from evolved face B = %g, edge Bn = %g on side[1]\n",
                         sum, Bavg[1]);
        }

        rside[0] = Bavg[0];
        rside[1] = Bavg[1];

        comp_coef(A,rside,B0);

        outB0[0] = B0[0];
        outB0[1] = B0[1];
        
        for(i = 1; i < MAX_N_COEF; i++)
        {
            dg_B(st)[0][i] = 0.0;
            dg_B(st)[1][i] = 0.0;
        }

        dg_B(st)[0][0] = Mag(st)[0] = B0[0];
        dg_B(st)[1][0] = Mag(st)[1] = B0[1];

        /* Begin:::consistency check */
        /// consistency check
        side = 2; 
        for(i = 0; i < dim; i++)
            t[i] = fg_side_vector(tri)[side][i];
        nor[0] = t[1];
        nor[1] = -t[0];

        sum = nor[0]*B0[0] + nor[1]*B0[1];

        if(fabs(sum - Bavg[2]) > 1.0e-10)
        {
            printf("ERROR: tri_B_P0_polynomial_reconstruction() tri[%d]\n", tri->id);
            printf("sum = %13.12g,  Bavg[2] = %13.12g\n", sum,  Bavg[2]);
            clean_up(ERROR);  
        }
        /* END:::consistency check */
}


//// 5 edges-case 2, one-sided case
LOCAL int Mag_p1_5edge_one_side_sten(
         TRI       *tri,
         int       side,
         TRI       *tris[],
         int       *Edge_side)
{
         int       N_edge = 0, tmp_side, n_side, next_side;
         TRI       *nbtri, *tmp_tri;

            /// first collect 5 edges on 4 cells.
            N_edge = 0;   
            tris[N_edge] = tri;
            Edge_side[N_edge] = side;
            N_edge++;

            tris[N_edge] = tri;
            Edge_side[N_edge] = (side+1)%3;
            N_edge++;

            for(tmp_side = 0; tmp_side < 3; tmp_side++)
            {
                // if(tri == Tri_on_side(Bnbtri[side],tmp_side))
                if(tri == Tri_on_side(Tri_on_side(tri,side),tmp_side))
                    break;
            }
            // tris[N_cell] = Bnbtri[side];
            tris[N_edge] = Tri_on_side(tri,side);
            Edge_side[N_edge] = (tmp_side+1)%3;
            N_edge++;
            next_side = (tmp_side+2)%3;

            // tmp_tri = Tri_on_side(Bnbtri[side],(tmp_side+1)%3);
            tmp_tri = Tri_on_side(Tri_on_side(tri,side),(tmp_side+1)%3);
            for(tmp_side = 0; tmp_side < 3; tmp_side++)
            {
                // if(Bnbtri[side] == Tri_on_side(tmp_tri,tmp_side))
                if(Tri_on_side(tri,side) == Tri_on_side(tmp_tri,tmp_side))
                    break;
            }

            tris[N_edge] = tmp_tri;
            Edge_side[N_edge] = (tmp_side+1)%3;
            N_edge++;

            // tmp_tri = Tri_on_side(Bnbtri[side], next_side);
            tmp_tri = Tri_on_side(Tri_on_side(tri,side), next_side);
            for(tmp_side = 0; tmp_side < 3; tmp_side++)
            {
                // if(Bnbtri[side] == Tri_on_side(tmp_tri,tmp_side))
                if(Tri_on_side(tri,side) == Tri_on_side(tmp_tri,tmp_side))
                    break;
            }
            tris[N_edge] = tmp_tri;
            Edge_side[N_edge] = (tmp_side+2)%3;
            N_edge++;

            return N_edge;
}

//// 6 edges-case 2, one-sided case
LOCAL int Mag_p2_6edge_one_side_sten_new(
         TRI       *tri,
         int       side,
         TRI       *tris[],
         int       *Edge_side)
{
         int       N_edge = 0, tmp_side, n_side, next_side;
         TRI       *nbtri, *tmp_tri;

            /// first collect 6 edges on 4 cells.
            N_edge = 0;
            tris[N_edge] = tri;
            Edge_side[N_edge] = side;
            N_edge++;

            tris[N_edge] = tri;
            Edge_side[N_edge] = (side+1)%3;
            N_edge++;

            tris[N_edge] = tri;
            Edge_side[N_edge] = (side+2)%3;
            N_edge++;

            for(tmp_side = 0; tmp_side < 3; tmp_side++)
            {
                // if(tri == Tri_on_side(Bnbtri[side],tmp_side))
                if(tri == Tri_on_side(Tri_on_side(tri,side),tmp_side))
                    break;
            }
            // tris[N_cell] = Bnbtri[side];
            tris[N_edge] = Tri_on_side(tri,side);
            Edge_side[N_edge] = (tmp_side+1)%3;
            N_edge++;
            next_side = (tmp_side+2)%3;

            // tmp_tri = Tri_on_side(Bnbtri[side],(tmp_side+1)%3);
            tmp_tri = Tri_on_side(Tri_on_side(tri,side),(tmp_side+1)%3);
            for(tmp_side = 0; tmp_side < 3; tmp_side++)
            {
                // if(Bnbtri[side] == Tri_on_side(tmp_tri,tmp_side))
                if(Tri_on_side(tri,side) == Tri_on_side(tmp_tri,tmp_side))
                    break;
            }
            tris[N_edge] = tmp_tri;
            Edge_side[N_edge] = (tmp_side+1)%3;
            N_edge++;

            // tmp_tri = Tri_on_side(Bnbtri[side], next_side);
            tmp_tri = Tri_on_side(Tri_on_side(tri,side), next_side);
            for(tmp_side = 0; tmp_side < 3; tmp_side++)
            {
                // if(Bnbtri[side] == Tri_on_side(tmp_tri,tmp_side))
                if(Tri_on_side(tri,side) == Tri_on_side(tmp_tri,tmp_side))
                    break;
            }
            tris[N_edge] = tmp_tri;
            Edge_side[N_edge] = (tmp_side+2)%3;
            N_edge++;

            return N_edge;
}


//// Construct P1 poly of Magnetic field on tri by normal edge values.
//// Conservation is NOT enforced on tri and its neighbors. 
//// We also use the fact a1+b2=0.0 so that we solve 5 by 5 system.
////  b2 = -a1 relation is used.
////  The solution of the linear system are coefficients in the order of
////  a0; a1; a2; b0; b1; b2;
////  with: B_x= a0 + a1 (x-x_c) + a2 (y-y_c);
////        B_y= b0 + b1 (x-x_c) + b2 (y-y_c);
////  Do not form loops 
//// The solution is set to be 0 for debugging the hydro code
LOCAL void tri_B_P1_polynomial_reconstruction_ver2(
         TRI       *tri,   
         Mid_soln *midsoln,  
         Limiting_store **limit_store,
         int      rk_step)
{
        TRI       *Bnbtri[3], *tmp_tri, *tris[20];
        double    AA[300], BB[300], CC[300], DD[300], XX[8][300], work[300], tmp, len;
        static double   **A = NULL, **invA, **I, **MB;
        int       dim = 2, i, j, k, M, N, P, l, LDA, LDB, LWORK, INFO, indx, cv_indx;
        Locstate  st, nbst[3], st2;
        double    **Lmass_matrix, Bavg[20], *pcrds[3], dy, dx, xx1, yy1, **tmpA, **tmpB, BBavg[8];
        int       side;
        static double    eps = 1.0e-6;
        double    wei[6], sum, alpha[6], B[2][6], outB0[4], sqr_B_len[30];
        int       debug = NO, Edge_side[20], tmp_side, next_side, N_edge, num_tris_vertex;
        int       use_central_sten = YES, N_STEN;
        int       a1, a2, b1; 
        int       irow;
        double    nx, ny, x0, y0, v0x, v0y, v1x, v1y;
        double    t[3], nor[3], sqrt_area;
        double    *cent = fg_centroid(tri);


        if(A == NULL)
        {
            matrix(&(A), 6, 6, sizeof(double));
            matrix(&(MB), 2, 6, sizeof(double));
            matrix(&(invA), 6, 6, sizeof(double));
            matrix(&(I), 6, 6, sizeof(double));
        }

        // printf("ERROR: tri_B_P1_polynomial_reconstruction_ver2() %d\n",tri->id);
        sqrt_area = sqrt(fg_area(tri));

        if(alltri_HR_sten != NULL && alltri_HR_sten[tri->id].Bsten_set == NO)
        {
            alltri_HR_sten[tri->id].Bsten_set = YES;

            // use 3 edges of 1 tri
            N_edge = Mag_p1_3edge_central_sten_new(tri, tris, Edge_side);//new 3 edges for P1

            matrix(&tmpA,5,5,sizeof(double));
            alltri_HR_sten[tri->id].MB_A = tmpA;
            for(i = 0; i < N_edge; i++)
            {
                pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                for(j = 0; j < dim; j++)
                    t[j] = fg_side_vector(tris[i])[Edge_side[i]][j];
                nor[0] = t[1];
                nor[1] = -t[0];

                nx = nor[0]; ny = nor[1];
                x0 = cent[0]; y0 = cent[1];
                v0x = pcrds[0][0]; v0y = pcrds[0][1];
                v1x = pcrds[1][0]; v1y = pcrds[1][1];

                // both average and slope information for edge 0 and 1
                if( i == 0 || i == 1 )
                {
                    irow = 2*i;//eqn 0 and 1 for edge 0; eqn 2 and 3 for edge 1 
                    tmpA[irow][0] = 2.0*nx;
                    tmpA[irow][1] = nx*(v0x + v1x - 2.0*x0)/sqrt_area;
                    tmpA[irow][2] = nx*(v0y + v1y - 2.0*y0)/sqrt_area;
                    tmpA[irow][3] = 2.0*ny;    // b_0
                    tmpA[irow][4] = ny*(v0x + v1x - 2.0*x0)/sqrt_area;    // b_1
                    tmpA[irow][1] -= ny*(v0y + v1y - 2.0*y0)/sqrt_area;    // b_2 = - a_1
                    if(rk_step == RK_STEP)
                    {
                        Bavg[irow] = 2.0*fg_side_dgB(tris[i])[Edge_side[i]][0];
                    }
                    else
                    {
                        Bavg[irow] = 2.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][0][rk_step];
                    }
                }

                irow++;
                if(i==2) irow = 4;//only slope for edge 2 //eqn 4 for edge 2
                tmpA[irow][0] = 0.0;
                tmpA[irow][1] = nx*(v1x - v0x)/3.0/sqrt_area;
                tmpA[irow][2] = nx*(v1y - v0y)/3.0/sqrt_area;
                tmpA[irow][3] = 0.0;    // b_0
                tmpA[irow][4] = ny*(v1x - v0x)/3.0/sqrt_area;    // b_1
                tmpA[irow][1] -= ny*(v1y - v0y)/3.0/sqrt_area;    // b_2 = - a_1
                if(rk_step == RK_STEP)
                {
                    Bavg[irow] = 2.0/3.0*fg_side_dgB(tris[i])[Edge_side[i]][1];
                }
                else
                {
                    Bavg[irow] = 2.0/3.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][1][rk_step];
                }
            }

            solve_by_gj(tmpA,5,Bavg,XX[0]);
        }
        else if(alltri_HR_sten != NULL && alltri_HR_sten[tri->id].Bsten_set == YES)
        {
            N_edge = Mag_p1_3edge_central_sten_new(tri, tris, Edge_side);

            tmpA = alltri_HR_sten[tri->id].MB_A;
            for(i = 0; i < N_edge; i++)
            {
                if( i==0 || i==1 )
                {
                    irow = 2*i;
                    if(rk_step == RK_STEP)
                    {
                        Bavg[irow] = 2.0*fg_side_dgB(tris[i])[Edge_side[i]][0];
                    }
                    else
                    {
                        Bavg[irow] = 2.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][0][rk_step];
                    }
                }

                irow ++;
                if(i==2) irow = 4;
                if(rk_step == RK_STEP)
                {
                    Bavg[irow] = 2.0/3.0*fg_side_dgB(tris[i])[Edge_side[i]][1];
                }
                else
                {
                    Bavg[irow] = 2.0/3.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][1][rk_step];
                }
            }
            solve_by_gj(tmpA,5,Bavg,XX[0]);
        }

        if(alltri_HR_sten == NULL)
        {
            /// TMP
            /***
            if(tri->id == 5177 && pp_mynode() == 0)
                debug = YES;
            else if(tri->id == 4977 && pp_mynode() == 1)
                debug = YES;
            else if(tri->id == 794 && pp_mynode() == 2)
                debug = YES;
            if(YES == debug)
            {
                printf("In tri_B_P1_polynomial_reconstruction_ver2(), tri %d\n", tri->id);
                print_tri_crds(tri);  
                debug = YES;
                if(rk_step == RK_STEP)
                {
                    for(i = 0; i < 3; i++)
                    {
                        printf("side[%d] bn %12.11g, %12.11g\n",
                           i, fg_side_dgB(tri)[i][0], fg_side_dgB(tri)[i][1]); 
                    }
                }
            }
            ***/
            /// END::: TMP

            // printf("CASE:: alltri_HR_sten == NULL\n");
            N_edge = Mag_p1_3edge_central_sten_new(tri, tris, Edge_side);

            for(i = 0; i < N_edge; i++)
            {
                pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                for(j = 0; j < dim; j++)
                    t[j] = fg_side_vector(tris[i])[Edge_side[i]][j];
                nor[0] = t[1];
                nor[1] = -t[0];

                nx = nor[0]; ny = nor[1];
                x0 = cent[0]; y0 = cent[1];
                v0x = pcrds[0][0]; v0y = pcrds[0][1];
                v1x = pcrds[1][0]; v1y = pcrds[1][1];

                if( i==0 || i==1 )
                {
                    irow = 2*i;
                    A[irow][0] = 2.0*nx;
                    A[irow][1] = nx*(v0x + v1x - 2.0*x0)/sqrt_area;
                    A[irow][2] = nx*(v0y + v1y - 2.0*y0)/sqrt_area;
                    A[irow][3] = 2.0*ny;    // b_0
                    A[irow][4] = ny*(v0x + v1x - 2.0*x0)/sqrt_area;    // b_1
                    A[irow][1] -= ny*(v0y + v1y - 2.0*y0)/sqrt_area;    // b_2 = - a_1
                    if(rk_step == RK_STEP)
                    {
                        Bavg[irow] = 2.0*fg_side_dgB(tris[i])[Edge_side[i]][0];
                    }
                    else
                    {
                        Bavg[irow] = 2.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][0][rk_step];
                    }
                }

                irow ++;
                if(i==2) irow = 4;
                A[irow][0] = 0.0;
                A[irow][1] = nx*(v1x - v0x)/3.0/sqrt_area;
                A[irow][2] = nx*(v1y - v0y)/3.0/sqrt_area;
                A[irow][3] = 0.0;    // b_0
                A[irow][4] = ny*(v1x - v0x)/3.0/sqrt_area;    // b_1
                A[irow][1] -= ny*(v1y - v0y)/3.0/sqrt_area;    // b_2 = - a_1
                if(rk_step == RK_STEP)
                {
                    Bavg[irow] = 2.0/3.0*fg_side_dgB(tris[i])[Edge_side[i]][1];
                }
                else
                {
                    Bavg[irow] = 2.0/3.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][1][rk_step];
                }
            }
            solve_by_gj(A,5,Bavg,XX[0]);
        }

        if(rk_step == RK_STEP)
            st = tri->st;
        else
            st = midsoln[tri->id].st[rk_step];

        XX[0][5] = -XX[0][1];
        for(i = 0; i < MAX_N_COEF; i++)
        {
            dg_B(st)[0][i] = XX[0][i];
            dg_B(st)[1][i] = XX[0][MAX_N_COEF+i];
        }
        Mag(st)[0] = XX[0][0];
        Mag(st)[1] = XX[0][3];

        /***
        if(YES == debug)
        {
            printf("tri_B_P1_polynomial_reconstruction_ver2()\n\n");
            printf("B coefficient:\n");
            for(i = 0; i < MAX_N_COEF; i++)
                printf("Bx[%d] = %e; ", i, dg_B(st)[0][i]);
            printf("\n");
            for(i = 0; i < MAX_N_COEF; i++)
                printf("By[%d] = %e; ", i, dg_B(st)[1][i]);
            printf("\n");
            fflush(stdout);
        }
        ***/
}

//// Construct P1 poly of Magnetic field on tri by normal edge values.
//// Conservation is NOT enforced on tri and its neighbors. 
//// We also use the fact a1+b2=0.0 so that we solve 5 by 5 system.
////  b2 = -a1 relation is used.
////  The solution of the linear system are coefficients in the order of
////  a0; a1; a2; b0; b1; b2;
////  with: B_x= a0 + a1 (x-x_c) + a2 (y-y_c);
////        B_y= b0 + b1 (x-x_c) + b2 (y-y_c);
////  Do not form loops 
//// The solution is set to be 0 for debugging the hydro code
LOCAL void tri_B_P1_polynomial_reconstruction_ver1(
         TRI       *tri,   
         Mid_soln *midsoln,  
         Limiting_store **limit_store,
         int      rk_step)
{
        TRI       *Bnbtri[3], *tmp_tri, *tris[20];
        double    AA[300], BB[300], CC[300], DD[300], XX[8][300], work[300], tmp, len;
        static double   **A = NULL, **invA, **I, **MB;
        int       dim = 2, i, j, k, M, N, P, l, LDA, LDB, LWORK, INFO, indx, cv_indx;
        Locstate  st, nbst[3], st2;
        double    **Lmass_matrix, Bavg[20], *pcrds[3], dy, dx, xx1, yy1, *cent = fg_centroid(tri), **tmpA, **tmpB, BBavg[8];
        int       side;
        static double    eps = 1.0e-6;
        double    wei[6], sum, alpha[6], B[2][6], outB0[4], sqr_B_len[30];
        int       debug = NO, Edge_side[20], tmp_side, next_side, N_edge, num_tris_vertex;
        int       use_central_sten = YES, N_STEN;
        int       a1, a2, b1; 

        // printf("ERROR: tri_B_P1_polynomial_reconstruction_ver1() %d\n",tri->id);

        ///// only do sanity test at this point
        /****
        if(rk_step == RK_STEP)
        {
                for(i = 0; i < 3; i++)
                {
                    Bavg[i] = fg_side_B(tri)[i];
        
                    Bnbtri[i] = Tri_on_side(tri,i);
                    for(side = 0; side < 3; side++)
                    {
                        if(tri == Tri_on_side(Bnbtri[i], side))
                            break;
                    }    
                    if(fabs(Bavg[i] + fg_side_B(Bnbtri[i])[side]) > 1.0e-11)
                    {
                        printf("ERROR: tri_B_P1_polynomial_reconstruction_ver1()\n");
                        printf("normal Bn are not consistent\n");
                        printf("for tri[%d], its neighbr[%d], Bn %g, Bn %g, side[%d, %d]\n",
                                       tri->id, Bnbtri[i]->id, Bavg[i], fg_side_B(Bnbtri[i])[side],
                                            i, side);
                        print_tri_crds(tri);
                        print_tri_crds(Bnbtri[i]);
                        clean_up(ERROR);
                    }
                }
        }
        else
        {
                for(i = 0; i < 3; i++)
                {
                    Bavg[i] = midsoln[tri->id].edge_Bn[i][rk_step];

                    Bnbtri[i] = Tri_on_side(tri,i);
                    for(side = 0; side < 3; side++)
                    {
                        if(tri == Tri_on_side(Bnbtri[i], side))
                            break;
                    } 
                    if(fabs(Bavg[i] + midsoln[Bnbtri[i]->id].edge_Bn[side][rk_step]) > 1.0e-11 
                       // && tri->BC_type != SUBDOMAIN && Bnbtri[i]->BC_type != SUBDOMAIN
                      )
                    {
                        printf("ERROR: tri_B_P1_polynomial_reconstruction_ver1() 2\n");
                        printf("normal Bn are not consistent\n");
                        printf("for tri[%d], its neighbr[%d], Bn %g, Bn %g, side[%d, %d]\n",
                                       tri->id, Bnbtri[i]->id, Bavg[i], midsoln[Bnbtri[i]->id].edge_Bn[side][rk_step],
                                            i, side);
                        print_tri_crds(tri);
                        print_tri_crds(Bnbtri[i]);
                        clean_up(ERROR);
                    }
                    else
                    {
                        // printf("Tri[%d] and [%d] are consistent\n", tri->id, Bnbtri[i]->id);
                        // printf("for tri[%d], its neighbr[%d], Bn %g, Bn %g, side[%d, %d]\n",
                        //                tri->id, Bnbtri[i]->id, Bavg[i], midsoln[Bnbtri[i]->id].edge_Bn[side][rk_step],
                        //                     i, side);
                        // print_tri_crds(tri);
                        // print_tri_crds(Bnbtri[i]);
                    }

                }
        }
        //  return;   
        *****/

        // if(tri->id == 2055 || tri->id == 2552)
        // if(tri->id == 3763 || tri->id ==844)
        // if(tri->id == 350 || tri->id == 405)

        // if(tri->id == 590)
        /*****  /////// Divergence-free check
        {
            double tmp;
            if(rk_step == RK_STEP)
            {
                for(i = 0; i < 3; i++)
                {
                    Bavg[i] = fg_side_B(tri)[i];
                }
            }
            else
            {
                for(i = 0; i < 3; i++)
                {
                    Bavg[i] = midsoln[tri->id].edge_Bn[i][rk_step];
                }
            }
            if(rk_step == RK_STEP)
                st = tri->st;
            else
                st = midsoln[tri->id].st[rk_step];

            tmp = Bavg[0]*fg_length_side(tri)[0] +
                  Bavg[1]*fg_length_side(tri)[1] +
                  Bavg[2]*fg_length_side(tri)[2];
            if(fabs(tmp) > 1.0e-11)
            {
                printf("\n\n &&&&&&&&&&& ERROR: tri[%d] enter tri_B_P1_polynomial_reconstruction_ver1(), initial divg = %g\n",
                     tri->id, tmp);
                printf("Bavg[%12.11g, %12.11g, %12.11g]\n", Bavg[0], Bavg[1], Bavg[2]);
                printf("Evolved cell avg Bx, By = [%g, %g]\n", Mag(st)[0],   Mag(st)[1]);
                print_tri_crds(tri);
            // debug = YES;
                clean_up(ERROR);
            }
        }
        ****/

        if(A == NULL)
        {
            matrix(&(A), 6, 6, sizeof(double));
            matrix(&(MB), 2, 6, sizeof(double));
            matrix(&(invA), 6, 6, sizeof(double));
            matrix(&(I), 6, 6, sizeof(double));
        }

        for(side = 0; side < 3; side++)
        {
            Bnbtri[side] = Tri_on_side(tri,side);
            pcrds[side] = Coords(Point_of_tri(tri)[side]);
        }

        if(alltri_HR_sten != NULL && alltri_HR_sten[tri->id].Bsten_set == NO)
        {
            alltri_HR_sten[tri->id].Bsten_set = YES;
            ////////////////////////////////////////
            /// Start: central stencil reconstruction------alltri_HR_sten != NULL && Bsten_set = NO
            ////////////////////////////////////////
            if(use_central_sten == YES)
            {
                N_edge = Mag_p1_5edge_central_sten(tri, tris, Edge_side);
                matrix(&tmpA,5,5,sizeof(double));
                alltri_HR_sten[tri->id].MB_A = tmpA;
                for(i = 0; i < N_edge; i++)
                {
                    pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                    pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                    dy = pcrds[1][1] - pcrds[0][1];
                    dx = pcrds[1][0] - pcrds[0][0];
                    xx1 = pcrds[0][0] - cent[0];
                    yy1 = pcrds[0][1] - cent[1];

                    len = fg_length_side(tris[i])[Edge_side[i]];

                    tmpA[i][0] = dy;
                    tmpA[i][1] = dy*(xx1 + 0.5*dx);
                    tmpA[i][2] = dy*(yy1 + 0.5*dy);

                    tmpA[i][3] = -dx;  /// b_0
                    tmpA[i][4] = -dx*(xx1 + 0.5*dx);  // b_1
                    tmpA[i][1] += dx*(yy1 + 0.5*dy);    // b_2 = - a_1
                    for(j = 0; j < 5; j++)
                       tmpA[i][j] /= len;
                }

                if(rk_step == RK_STEP)
                {
                    for(i = 0; i < N_edge; i++)
                        Bavg[i] = fg_side_B(tris[i])[Edge_side[i]];
                }
                else
                {
                    for(i = 0; i < N_edge; i++)
                        Bavg[i] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                }
                solve_by_gj(tmpA,5,Bavg,XX[6]);
            }
            ////////////////////////////////////////
            /// END: central stencil reconstruction------alltri_HR_sten != NULL && Bsten_set = NO
            ////////////////////////////////////////

            ////////////////////////////////////////
            /// Start: one-sided stencil reconstruction------alltri_HR_sten != NULL && Bsten_set = NO
            ////////////////////////////////////////
            for(side = 0; side < 3; side++) 
            {
                N_edge = Mag_p1_5edge_one_side_sten(tri, side, tris, Edge_side);
                matrix(&tmpA,5,5,sizeof(double));
                alltri_HR_sten[tri->id].Bsten[side] = tmpA;

                for(i = 0; i < N_edge; i++)
                {
                    pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                    pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                    dy = pcrds[1][1] - pcrds[0][1];
                    dx = pcrds[1][0] - pcrds[0][0];
                    xx1 = pcrds[0][0] - cent[0];
                    yy1 = pcrds[0][1] - cent[1];

                    // len = sqrt( sqr(dx) + sqr(dy) );
                    len = fg_length_side(tris[i])[Edge_side[i]];

                    /***
                    tmpA[i][0] = dy;
                    tmpA[i][1] = 0.5*dy*dx + dy*pcrds[0][0];
                    tmpA[i][2] = 0.5*sqr(dy) + dy*pcrds[0][1];
                    tmpA[i][3] = -dx;
                    tmpA[i][4] = -(0.5*sqr(dx) + dx*pcrds[0][0]);
                    // A[side][5] = -(0.5*dx*dy + dx*pcrds[side][1]);
                    tmp = (0.5*dx*dy + dx*pcrds[0][1]);
                    tmpA[i][1] += tmp;
                    // A[i][5] = -tmp;
                    ***/

                    tmpA[i][0] = dy;
                    tmpA[i][1] = dy*(xx1 + 0.5*dx);
                    tmpA[i][2] = dy*(yy1 + 0.5*dy);

                    tmpA[i][3] = -dx;  /// b_0
                    tmpA[i][4] = -dx*(xx1 + 0.5*dx);  // b_1
                    tmpA[i][1] += dx*(yy1 + 0.5*dy);    // b_2 = - a_1
                    for(j = 0; j < 5; j++)
                    tmpA[i][j] /= len;
                }

                if(rk_step == RK_STEP)
                {
                    for(i = 0; i < N_edge; i++)
                        Bavg[i] = fg_side_B(tris[i])[Edge_side[i]];
                }
                else
                {
                    for(i = 0; i < N_edge; i++)
                        Bavg[i] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                }
                solve_by_gj(tmpA,5,Bavg,XX[side]);
            }
            ////////////////////////////////////////
            /// END: one-sided stencil reconstruction------alltri_HR_sten != NULL && Bsten_set = NO
            ////////////////////////////////////////
            ////////////////////////////////////////
            /// Start: reverse-sided stencil reconstruction------alltri_HR_sten != NULL && Bsten_set = NO
            ////////////////////////////////////////
            for(side = 0; side < 3; side++)
            {
                matrix(&tmpA,5,5,sizeof(double));
                matrix(&tmpB,2,5,sizeof(double));
                alltri_HR_sten[tri->id].Bsten_rev[side] = tmpA;
                alltri_HR_sten[tri->id].Bsten_rev_MB_B[side] = tmpB;

                count_num_tris_vertex(tri,Point_of_tri(tri)[(side+2)%3], &num_tris_vertex);
                if(num_tris_vertex == 5)
                {
                    N_edge = Mag_p1_5edge_reverse_sten_5pt_vertex(tri,side,tris, Edge_side);
                }
                else if(num_tris_vertex < 5)
                {
                    printf("ERROR: tri_B_P2_polynomial_reconstruction_9edge_1, not enough tri at vertex\n");
                    clean_up(ERROR);
                }
                else
                    N_edge = Mag_p1_5edge_reverse_sten(tri,side,tris, Edge_side);

                num_tris_vertex = 5; //// TMP
                if(num_tris_vertex == 5)
                {
                    for(i = 0; i < N_edge; i++)
                    {
                        pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                        pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                        dx = pcrds[1][0] - pcrds[0][0];
                        dy = pcrds[1][1] - pcrds[0][1];
                        xx1 = pcrds[0][0] - cent[0];
                        yy1 = pcrds[0][1] - cent[1];

                        len = fg_length_side(tris[i])[Edge_side[i]];
    
                        tmpA[i][0] = dy;
                        tmpA[i][1] = dy*(xx1 + 0.5*dx);
                        tmpA[i][2] = dy*(yy1 + 0.5*dy);

                        tmpA[i][3] = -dx;    // b_0
                        tmpA[i][4] = -dx*(xx1 + 0.5*dx);  // b_1
                        tmpA[i][1] += dx*(yy1 + 0.5*dy);    // b_2 = - a_1

                        for(j = 0; j < 5; j++)
                            tmpA[i][j] /= len;
                    }
                    if(rk_step == RK_STEP)
                    {
                        for(i = 0; i < N_edge; i++)
                            Bavg[i] = fg_side_B(tris[i])[Edge_side[i]];
                    }
                    else
                    {
                        for(i = 0; i < N_edge; i++)
                            Bavg[i] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                    }

                    solve_by_gj(tmpA,5,Bavg,XX[side+3]);
                }
                else /// there are 6 edges
                {
                    //// Now constrained least square
                    M = 4; N = 5; P = 2; LDA = M; LDB = P; LWORK = M+N+P;
                    // M: number of least square eqn.
                    // N: number of unknown, 
                    // P: number of constraint eqn.
                    // first setup least square eqn.
                    for(i = 2; i < N_edge; i++)
                    {
                        pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                        pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                        dx = pcrds[1][0] - pcrds[0][0];
                        dy = pcrds[1][1] - pcrds[0][1];
                        xx1 = pcrds[0][0] - cent[0];
                        yy1 = pcrds[0][1] - cent[1];

                        len = fg_length_side(tris[i])[Edge_side[i]];

                        tmpA[i-2][0] = dy;
                        tmpA[i-2][1] = dy*(xx1 + 0.5*dx);
                        tmpA[i-2][2] = dy*(yy1 + 0.5*dy);

                        tmpA[i-2][3] = -dx;    // b_0
                        tmpA[i-2][4] = -dx*(xx1 + 0.5*dx);  // b_1
                        tmpA[i-2][1] += dx*(yy1 + 0.5*dy);    // b_2 = - a_1

                        for(j = 0; j < 4; j++)
                            tmpA[i-2][j] /= len;
                    } 

                    if(rk_step == RK_STEP)
                    {
                        for(i = 2; i < N_edge; i++)
                            Bavg[i-2] = fg_side_B(tris[i])[Edge_side[i]];
                    }
                    else
                    {
                        for(i = 2; i < N_edge; i++)
                            Bavg[i-2] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                    }
                    // Now setup constraint eqn.
                    for(i = 0; i < 2; i++)
                    {
                        pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                        pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                        dx = pcrds[1][0] - pcrds[0][0];
                        dy = pcrds[1][1] - pcrds[0][1];
                        xx1 = pcrds[0][0] - cent[0];
                        yy1 = pcrds[0][1] - cent[1];
    
                        len = fg_length_side(tris[i])[Edge_side[i]];
                        tmpB[i][0] = dy;
                        tmpB[i][1] = dy*(xx1 + 0.5*dx);
                        tmpB[i][2] = dy*(yy1 + 0.5*dy);

                        tmpB[i][3] = -dx;    // b_0
                        tmpB[i][4] = -dx*(xx1 + 0.5*dx);  // b_1
                        tmpB[i][1] += dx*(yy1 + 0.5*dy);    // b_2 = - a_1

                        for(j = 0; j < 2; j++)
                            tmpB[i][j] /= len;
                    }
                    if(rk_step == RK_STEP)
                    {
                        for(i = 0; i < 2; i++)
                        {
                            BBavg[i] = fg_side_B(tris[i])[Edge_side[i]];
                        }
                    }
                    else
                    {
                        for(i = 0; i < 2; i++)
                        {
                            BBavg[i] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                        }
                    }

                    l = 0;
                    for(j = 0; j < N; j++)
                    {
                        for(i = 0; i < M; i++)
                        {
                            AA[l] = tmpA[i][j];
                            l++;
                        }
                    }
                    l = 0;
                    for(j = 0; j < N; j++)
                    {
                        for(i = 0; i < P; i++)
                        {
                            BB[l] = tmpB[i][j];
                            l++;
                        }
                    }
                    for(i = 0; i < M; i++)  // right side for the least square part
                        CC[i] = Bavg[i];
                    for(i = 0; i < P; i++)  // right side for the constrained part
                        DD[i] = BBavg[i];
                    FORTRAN_NAME(dgglse)(&M, &N, &P, AA, &LDA, ///// int M,int N,int P, double array A, int LDA
                                     BB, &LDB, CC, DD, XX[side+3], ///// double array B, int LDB, double array C, D, X,
                                     work, &LWORK, &INFO);
                } /// END: /// there are 6 edges
            }
            ////////////////////////////////////////
            /// END: reverse stencil reconstruction -------alltri_HR_sten != NULL && Bsten_set == NO
            ////////////////////////////////////////
        }
        else if(alltri_HR_sten != NULL && alltri_HR_sten[tri->id].Bsten_set == YES)
        {
            ////////////////////////////////////////
            /// Start: central stencil reconstruction------alltri_HR_sten != NULL && Bsten_set = NO
            ////////////////////////////////////////
            if(use_central_sten == YES)
            {
                N_edge = Mag_p1_5edge_central_sten(tri, tris, Edge_side);
                tmpA = alltri_HR_sten[tri->id].MB_A;
                for(i = 0; i < N_edge; i++)
                {
                    pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                    pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                    dy = pcrds[1][1] - pcrds[0][1];
                    dx = pcrds[1][0] - pcrds[0][0];
                    xx1 = pcrds[0][0] - cent[0];
                    yy1 = pcrds[0][1] - cent[1];

                    len = fg_length_side(tris[i])[Edge_side[i]];
        
                    tmpA[i][0] = dy;
                    tmpA[i][1] = dy*(xx1 + 0.5*dx);
                    tmpA[i][2] = dy*(yy1 + 0.5*dy);
            
                    tmpA[i][3] = -dx;  /// b_0
                    tmpA[i][4] = -dx*(xx1 + 0.5*dx);  // b_1
                    tmpA[i][1] += dx*(yy1 + 0.5*dy);    // b_2 = - a_1
                    for(j = 0; j < 5; j++)
                       tmpA[i][j] /= len;
                }
            
                if(rk_step == RK_STEP)
                {
                    for(i = 0; i < N_edge; i++)
                        Bavg[i] = fg_side_B(tris[i])[Edge_side[i]];
                }
                else
                {
                    for(i = 0; i < N_edge; i++)
                        Bavg[i] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                }
                solve_by_gj(tmpA,5,Bavg,XX[6]);
            }
            ////////////////////////////////////////
            /// END: central stencil reconstruction------alltri_HR_sten != NULL && Bsten_set = NO
            ////////////////////////////////////////

            ////////////////////////////////////////
            /// Start: one-sided stencil reconstruction------alltri_HR_sten != NULL && Bsten_set == YES
            ////////////////////////////////////////
            for(side = 0; side < 3; side++)
            {
                tmpA = alltri_HR_sten[tri->id].Bsten[side];
                N_edge = Mag_p1_5edge_one_side_sten(tri, side, tris, Edge_side);
                for(i = 0; i < N_edge; i++)
                {
                    pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                    pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                    dy = pcrds[1][1] - pcrds[0][1];
                    dx = pcrds[1][0] - pcrds[0][0];
                    xx1 = pcrds[0][0] - cent[0];
                    yy1 = pcrds[0][1] - cent[1];

                    len = fg_length_side(tris[i])[Edge_side[i]];

                    tmpA[i][0] = dy;
                    tmpA[i][1] = dy*(xx1 + 0.5*dx);
                    tmpA[i][2] = dy*(yy1 + 0.5*dy);

                    tmpA[i][3] = -dx;  /// b_0
                    tmpA[i][4] = -dx*(xx1 + 0.5*dx);  // b_1
                    tmpA[i][1] += dx*(yy1 + 0.5*dy);    // b_2 = - a_1
                    for(j = 0; j < 5; j++)
                    tmpA[i][j] /= len;
                }

                if(rk_step == RK_STEP)
                {
                    for(i = 0; i < N_edge; i++)
                        Bavg[i] = fg_side_B(tris[i])[Edge_side[i]];
                }
                else
                {
                    for(i = 0; i < N_edge; i++)
                        Bavg[i] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                }
                solve_by_gj(tmpA,5,Bavg,XX[side]);
            }
            ////////////////////////////////////////
            /// END: one-sided stencil reconstruction------alltri_HR_sten != NULL && Bsten_set == YES
            ////////////////////////////////////////
            ////////////////////////////////////////
            /// Start: reverse-sided stencil reconstruction------alltri_HR_sten != NULL && Bsten_set == YES
            ////////////////////////////////////////
            for(side = 0; side < 3; side++)
            {
                tmpA = alltri_HR_sten[tri->id].Bsten_rev[side];
                tmpB = alltri_HR_sten[tri->id].Bsten_rev_MB_B[side];
                count_num_tris_vertex(tri,Point_of_tri(tri)[(side+2)%3], &num_tris_vertex);
                if(num_tris_vertex == 5)
                {
                    N_edge = Mag_p1_5edge_reverse_sten_5pt_vertex(tri,side,tris, Edge_side);
                }
                else if(num_tris_vertex < 5)
                {
                    printf("ERROR: tri_B_P2_polynomial_reconstruction_9edge_1, not enough tri at vertex\n");
                    clean_up(ERROR);
                }
                else
                    N_edge = Mag_p1_5edge_reverse_sten(tri,side,tris, Edge_side);

                num_tris_vertex = 5; //// TMP
                if(num_tris_vertex == 5)
                {
                    if(rk_step == RK_STEP)
                    {
                        for(i = 0; i < N_edge; i++)
                            Bavg[i] = fg_side_B(tris[i])[Edge_side[i]];
                    }
                    else
                    {
                        for(i = 0; i < N_edge; i++)
                            Bavg[i] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                    }
                    solve_by_gj(tmpA,5,Bavg,XX[side+3]);
                }
                else /// END: /// there are 6 edges
                {
                    //// Now constrained least square
                    M = 4; N = 5; P = 2; LDA = M; LDB = P; LWORK = M+N+P;
                    // M: number of least square eqn.
                    // N: number of unknown, 
                    // P: number of constraint eqn.
                    // first setup least square eqn.
                    if(rk_step == RK_STEP)
                    {
                        for(i = 2; i < N_edge; i++)
                            Bavg[i-2] = fg_side_B(tris[i])[Edge_side[i]];
                    }
                    else
                    {
                        for(i = 2; i < N_edge; i++)
                            Bavg[i-2] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                    }
                    // Now setup constraint eqn.
                    if(rk_step == RK_STEP)
                    {
                        for(i = 0; i < 2; i++)
                        {
                            BBavg[i] = fg_side_B(tris[i])[Edge_side[i]];
                        }
                    }
                    else
                    {
                        for(i = 0; i < 2; i++)
                        {
                            BBavg[i] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                        }
                    }

                    l = 0;
                    for(j = 0; j < N; j++)
                    {
                        for(i = 0; i < M; i++)
                        {
                            AA[l] = tmpA[i][j];
                            l++;
                        }
                    }
                    l = 0;
                    for(j = 0; j < N; j++)
                    {
                        for(i = 0; i < P; i++)
                        {
                            BB[l] = tmpB[i][j];
                            l++;
                        }
                    }
                    for(i = 0; i < M; i++)  // right side for the least square part
                        CC[i] = Bavg[i];
                    for(i = 0; i < P; i++)  // right side for the constrained part
                        DD[i] = BBavg[i];
                    FORTRAN_NAME(dgglse)(&M, &N, &P, AA, &LDA, ///// int M,int N,int P, double array A, int LDA
                                     BB, &LDB, CC, DD, XX[side+3], ///// double array B, int LDB, double array C, D, X,
                                     work, &LWORK, &INFO);
                } //// /// END: /// there are 6 edges
            }
            ////////////////////////////////////////
            /// END: reverse-sided stencil reconstruction------alltri_HR_sten != NULL && Bsten_set == YES
            ////////////////////////////////////////
        }

        if(alltri_HR_sten == NULL)
        {
            ////////////////////////////////////////
            /// Start: central stencil reconstruction------alltri_HR_sten != NULL && Bsten_set = NO
            ////////////////////////////////////////
            if(use_central_sten == YES)
            {
                N_edge = Mag_p1_5edge_central_sten(tri, tris, Edge_side);
                for(i = 0; i < N_edge; i++)
                {
                    pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                    pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                    dy = pcrds[1][1] - pcrds[0][1];
                    dx = pcrds[1][0] - pcrds[0][0];
                    xx1 = pcrds[0][0] - cent[0];
                    yy1 = pcrds[0][1] - cent[1];

                    len = fg_length_side(tris[i])[Edge_side[i]];
        
                    A[i][0] = dy;
                    A[i][1] = dy*(xx1 + 0.5*dx);
                    A[i][2] = dy*(yy1 + 0.5*dy);
            
                    A[i][3] = -dx;  /// b_0
                    A[i][4] = -dx*(xx1 + 0.5*dx);  // b_1
                    A[i][1] += dx*(yy1 + 0.5*dy);    // b_2 = - a_1
                    for(j = 0; j < 5; j++)
                       A[i][j] /= len;
                }
            
                if(rk_step == RK_STEP)
                {
                    for(i = 0; i < N_edge; i++)
                        Bavg[i] = fg_side_B(tris[i])[Edge_side[i]];
                }
                else
                {
                    for(i = 0; i < N_edge; i++)
                        Bavg[i] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                }
                solve_by_gj(A,5,Bavg,XX[6]);
            }
            ////////////////////////////////////////
            /// END: central stencil reconstruction------alltri_HR_sten != NULL && Bsten_set = NO
            ////////////////////////////////////////

            ////////////////////////////////////////
            /// Start: one-sided stencil reconstruction ----alltri_HR_sten = NULL
            ////////////////////////////////////////
            for(side = 0; side < 3; side++)
            {
                N_edge = Mag_p1_5edge_one_side_sten(tri, side, tris, Edge_side);
                if(rk_step == RK_STEP)
                {
                    for(i = 0; i < N_edge; i++)
                        Bavg[i] = fg_side_B(tris[i])[Edge_side[i]];
                }
                else
                {
                    for(i = 0; i < N_edge; i++)
                        Bavg[i] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                }
                for(i = 0; i < N_edge; i++)
                {
                    pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                    pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                    dy = pcrds[1][1] - pcrds[0][1];
                    dx = pcrds[1][0] - pcrds[0][0];
                    xx1 = pcrds[0][0] - cent[0];
                    yy1 = pcrds[0][1] - cent[1];

                    // len = sqrt( sqr(dx) + sqr(dy) );
                    len = fg_length_side(tris[i])[Edge_side[i]];

                    A[i][0] = dy;
                    A[i][1] = dy*(xx1 + 0.5*dx);
                    A[i][2] = dy*(yy1 + 0.5*dy);

                    A[i][3] = -dx;  /// b_0
                    A[i][4] = -dx*(xx1 + 0.5*dx);  // b_1
                    A[i][1] += dx*(yy1 + 0.5*dy);    // b_2 = - a_1
                    for(j = 0; j < 5; j++)
                        A[i][j] /= len;
                }

                if(debug == YES && side == 0)
                {
                    for(i = 0; i < N_edge; i++) 
                    {
                        pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                        pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);
                        printf("Bavg[%d] = %g on tri[%d] edge[%d], pts(%g, %g)->(%g, %g)\n",
                              i, Bavg[i], tris[i]->id, Edge_side[i], pcrds[0][0], pcrds[0][1],
                              pcrds[1][0], pcrds[1][1] );
                    }
                }
                solve_by_gj(A,5,Bavg,XX[side]);
            
                /**
                if(debug == YES && side == 0)
                {
                    // print_matrix("A", 6, 6, A, " %15.14g ");
                    inverse_matrix(A,5,invA);
                    // matrix_inv(A, 5, invA);
                    // print_matrix("invA", 6, 6, invA, " %g ");
                    matrix_matrix_mult(A, invA, 5, 5, I);
                    print_matrix("I", 5, 5, I, " %g ");
                    print_general_vector("B soln", XX[side], 5, "\n");
                    // clean_up(0);
                }
                **/
            }
            ////////////////////////////////////////
            /// END: one-sided stencil reconstruction ----alltri_HR_sten = NULL
            ////////////////////////////////////////
            ////////////////////////////////////////
            /// Start: reverse-sided stencil reconstruction ----alltri_HR_sten = NULL
            ////////////////////////////////////////
            for(side = 0; side < 3; side++)
            {
                count_num_tris_vertex(tri,Point_of_tri(tri)[(side+2)%3], &num_tris_vertex);
                if(num_tris_vertex == 5)
                {
                    N_edge = Mag_p1_5edge_reverse_sten_5pt_vertex(tri,side,tris, Edge_side);
                }
                else if(num_tris_vertex < 5)
                {
                    printf("ERROR: tri_B_P1_polynomial_reconstruction_ver1, not enough tri at vertex\n");
                    clean_up(ERROR);
                }
                else
                    N_edge = Mag_p1_5edge_reverse_sten(tri,side,tris, Edge_side);

                num_tris_vertex = 5; //// TMP
                if(num_tris_vertex == 5)
                {
                    for(i = 0; i < N_edge; i++)
                    {
                        pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                        pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                        dx = pcrds[1][0] - pcrds[0][0];
                        dy = pcrds[1][1] - pcrds[0][1];
                        xx1 = pcrds[0][0] - cent[0];
                        yy1 = pcrds[0][1] - cent[1];

                        len = fg_length_side(tris[i])[Edge_side[i]];
    
                        A[i][0] = dy;
                        A[i][1] = dy*(xx1 + 0.5*dx);
                        A[i][2] = dy*(yy1 + 0.5*dy);

                        A[i][3] = -dx;    // b_0
                        A[i][4] = -dx*(xx1 + 0.5*dx);  // b_1
                        A[i][1] += dx*(yy1 + 0.5*dy);    // b_2 = - a_1

                        for(j = 0; j < 5; j++)
                            A[i][j] /= len;
                    }
                    if(rk_step == RK_STEP)
                    {
                        for(i = 0; i < N_edge; i++)
                            Bavg[i] = fg_side_B(tris[i])[Edge_side[i]];
                    }
                    else
                    {
                        for(i = 0; i < N_edge; i++)
                            Bavg[i] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                    }
                    solve_by_gj(A,5,Bavg,XX[side+3]);
                }
                else /// there are 6 edges
                {
                    //// Now constrained least square
                    M = 4; N = 5; P = 2; LDA = M; LDB = P; LWORK = M+N+P;
                    // M: number of least square eqn.
                    // N: number of unknown, 
                    // P: number of constraint eqn.
                    // first setup least square eqn.
                    for(i = 2; i < N_edge; i++)
                    {
                        pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                        pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                        dx = pcrds[1][0] - pcrds[0][0];
                        dy = pcrds[1][1] - pcrds[0][1];
                        xx1 = pcrds[0][0] - cent[0];
                        yy1 = pcrds[0][1] - cent[1];

                        len = fg_length_side(tris[i])[Edge_side[i]];

                        A[i-2][0] = dy;
                        A[i-2][1] = dy*(xx1 + 0.5*dx);
                        A[i-2][2] = dy*(yy1 + 0.5*dy);

                        A[i-2][3] = -dx;    // b_0
                        A[i-2][4] = -dx*(xx1 + 0.5*dx);  // b_1
                        A[i-2][1] += dx*(yy1 + 0.5*dy);    // b_2 = - a_1

                        for(j = 0; j < 4; j++)
                            A[i-2][j] /= len;
                    }
                    if(rk_step == RK_STEP)
                    {
                        for(i = 2; i < N_edge; i++)
                            Bavg[i-2] = fg_side_B(tris[i])[Edge_side[i]];
                    }
                    else
                    {
                        for(i = 2; i < N_edge; i++)
                            Bavg[i-2] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                    }
                    // Now setup constraint eqn.
                    for(i = 0; i < 2; i++)
                    {
                        pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                        pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                        dx = pcrds[1][0] - pcrds[0][0];
                        dy = pcrds[1][1] - pcrds[0][1];
                        xx1 = pcrds[0][0] - cent[0];
                        yy1 = pcrds[0][1] - cent[1];

                        len = fg_length_side(tris[i])[Edge_side[i]];
                        MB[i][0] = dy;
                        MB[i][1] = dy*(xx1 + 0.5*dx);
                        MB[i][2] = dy*(yy1 + 0.5*dy);

                        MB[i][3] = -dx;    // b_0
                        MB[i][4] = -dx*(xx1 + 0.5*dx);  // b_1
                        MB[i][1] += dx*(yy1 + 0.5*dy);    // b_2 = - a_1

                        for(j = 0; j < 2; j++)
                            MB[i][j] /= len;
                    }
                    if(rk_step == RK_STEP)
                    {
                        for(i = 0; i < 2; i++)
                        {
                            BBavg[i] = fg_side_B(tris[i])[Edge_side[i]];
                        }
                    }
                    else
                    {
                        for(i = 0; i < 2; i++)
                        {
                            BBavg[i] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                        }
                    }
                    l = 0;
                    for(j = 0; j < N; j++)
                    {
                        for(i = 0; i < M; i++)
                        {
                            AA[l] = A[i][j];
                            l++;
                        }
                    }
                    l = 0;
                    for(j = 0; j < N; j++)
                    {
                        for(i = 0; i < P; i++)
                        {
                            BB[l] = MB[i][j];
                            l++;
                        }
                    }
                    for(i = 0; i < M; i++)  // right side for the least square part
                        CC[i] = Bavg[i];
                    for(i = 0; i < P; i++)  // right side for the constrained part
                        DD[i] = BBavg[i];
                    FORTRAN_NAME(dgglse)(&M, &N, &P, AA, &LDA, ///// int M,int N,int P, double array A, int LDA
                                     BB, &LDB, CC, DD, XX[side+3], ///// double array B, int LDB, double array C, D, X,
                                     work, &LWORK, &INFO);
                } /// END: there are 6 edges
            } 
            ////////////////////////////////////////
            /// END: reverse-sided stencil reconstruction ----alltri_HR_sten = NULL
            ////////////////////////////////////////
        }

        if(rk_step == RK_STEP)
            st = tri->st;
        else
            st = midsoln[tri->id].st[rk_step];

        if(YES == use_central_sten)
            N_STEN = 7;
        else
            N_STEN = 6;

        ///// Comment out to Test Minmod limiter
        sum = 0.0;
        for(cv_indx = 0; cv_indx < N_STEN; cv_indx++)
        {
            XX[cv_indx][5] = -XX[cv_indx][1];
            if(6 == cv_indx)   /// central stencil
                alpha[cv_indx] = 10.0/sqr(eps + 2.0*sqr(XX[cv_indx][1]) + sqr(XX[cv_indx][2]) 
                                           + sqr(XX[cv_indx][4]) );
            else 
                alpha[cv_indx] = 1.0/sqr(eps + 2.0*sqr(XX[cv_indx][1]) + sqr(XX[cv_indx][2]) 
                                          + sqr(XX[cv_indx][4]) );
            sum += alpha[cv_indx];
        }
        for(cv_indx = 0; cv_indx < N_STEN; cv_indx++)
            wei[cv_indx] = alpha[cv_indx]/sum;
            // wei[cv_indx] = 1.0/3.0;

        for(i = 0; i < MAX_N_COEF; i++)
            B[0][i] = B[1][i] = 0.0;

        for(i = 0; i < N_STEN; i++)
        {
            B[0][0] += wei[i]*XX[i][0];
            B[0][1] += wei[i]*XX[i][1];
            B[0][2] += wei[i]*XX[i][2];

            B[1][0] += wei[i]*XX[i][3];
            B[1][1] += wei[i]*XX[i][4];
            B[1][2] += wei[i]*XX[i][5];
        }

        /// NOTE:: we assumed Taylor's series expansion representation for
        ///         soln at cell center.

        for(i = 0; i < MAX_N_COEF; i++)
        {
            dg_B(st)[0][i] = B[0][i];
            dg_B(st)[1][i] = B[1][i];
        }
        Mag(st)[0] = B[0][0];
        Mag(st)[1] = B[1][0];
        //// END: Comment out to Test Minmod limiter

        /*****
        ////// Minmod limiter for B field
        for(cv_indx = 0; cv_indx < N_STEN; cv_indx++)
            XX[cv_indx][5] = -XX[cv_indx][1];
        a1 = minmod_sign(XX[0][1], XX[1][1]);
        a2 = minmod_sign(XX[0][2], XX[1][2]);
        b1 = minmod_sign(XX[0][4], XX[1][4]);
        for(cv_indx = 2; cv_indx < N_STEN; cv_indx++)
        {
            if(a1 == 0 || a1 != minmod_sign(XX[0][1], XX[cv_indx][1]))
            {
                a1 = 0;
                break; 
            }
            if(a2 == 0 || a2 != minmod_sign(XX[0][2], XX[cv_indx][2]))
            {
                a2 = 0;
                break; 
            }
            if(b1 == 0 || b1 != minmod_sign(XX[0][4], XX[cv_indx][4]))
            {
                b1 = 0;
                break; 
            }
        }
        if(a1 == 0 || a2 == 0 || b1 == 0)
        {
            tri_B_P0_polynomial_reconstruction(tri, midsoln,rk_step, outB0);
            // printf("tri[%d] use P0 reconstruction\n",tri->id); 
        }
        else
        {
            // printf("tri[%d] use P1 reconstruction\n",tri->id); 
            for(cv_indx = 0; cv_indx < N_STEN; cv_indx++)
                sqr_B_len[cv_indx] = 2.0*sqr(XX[cv_indx][1]) + sqr(XX[cv_indx][2])
                                           + sqr(XX[cv_indx][4]);
            tmp = HUGE_VAL;
            a1 = -1;
            for(cv_indx = 0; cv_indx < N_STEN; cv_indx++)
            {
                if(tmp > sqr_B_len[cv_indx])
                {
                    tmp = sqr_B_len[cv_indx];
                    a1 = cv_indx;
                }
            }
            // B[0][0] = XX[a1][0];
            // B[0][1] = XX[a1][1];
            // B[0][2] = XX[a1][2];

            // B[1][0] = XX[a1][3];
            // B[1][1] = XX[a1][4];
            // B[1][2] = XX[a1][5];

            /// NOTE:: we assumed Taylor's series expansion representation for
            ///         soln at cell center.

            for(i = 0; i < MAX_N_COEF; i++)
            {
                dg_B(st)[0][i] = XX[a1][i];
                dg_B(st)[1][i] = XX[a1][i+3];
            }
            Mag(st)[0] = XX[a1][0];
            Mag(st)[1] = XX[a1][3];
        }
        *****/
        // if(debug  == YES)
        /***
        {
            double ansx[20][4], ansy[20][4];

            for(cv_indx = 0; cv_indx < N_STEN; cv_indx++)
            {
                for(i = 0; i < MAX_N_COEF; i++)
                {
                    ansx[cv_indx][i] = XX[cv_indx][i];
                    ansy[cv_indx][i] = XX[cv_indx][i+3];
                }
                verify_edge_B(tri,rk_step, midsoln, ansx[cv_indx], ansy[cv_indx]);
            }
        }
        ***/

        /*****
        if(tri->id == 16039)
        {
            double B0[2], ansx[6][4], ansy[6][4];

            for(cv_indx = 0; cv_indx < 3; cv_indx++)
            {
                for(i = 0; i < MAX_N_COEF; i++)
                {
                    ansx[cv_indx][i] = XX[cv_indx][i];
                    ansy[cv_indx][i] = XX[cv_indx][i+3];
                }
                printf("++++++Sten[%d]+++++:::\n", cv_indx);
                verify_edge_B(tri,rk_step, midsoln, ansx[cv_indx], ansy[cv_indx]);
                printf("Bx coeff: ");
                for(i = 0; i < MAX_N_COEF; i++)
                    printf("%g ", XX[cv_indx][i]);
                printf("\n");
                printf("By coeff: ");
                for(i = 0; i < MAX_N_COEF; i++)
                    printf("%g ", XX[cv_indx][i+3]);
                printf("\n");
            }

            printf("Weighted combination\n");
            printf("Bx coeff: ");
            for(i = 0; i < MAX_N_COEF; i++)
                printf("%g ", dg_B(st)[0][i]);
            printf("\n");
            printf("By coeff: ");
            for(i = 0; i < MAX_N_COEF; i++)
                printf("%g ", dg_B(st)[1][i]);
            printf("\n");
            // tri_B_P0_polynomial_reconstruction(tri, midsoln, rk_step, B0);
            // printf("Piecewise const reconstruction[%g %g]\n", B0[0], B0[1]);
            // printf("\n Exit in tri_B_P1_polynomial_reconstruction_ver1()\n");
            // clean_up(0);
        }
        *****/
}

//// Construct P1 poly of Magnetic field on tri by normal edge values.
//// Conservation is NOT enforced on tri and its neighbors. 
//// We also use the fact a1+b2=0.0 so that we solve 5 by 5 system.
////  b2 = -a1 relation is used.
////  The solution of the linear system are coefficients in the order of
////  a0; a1; a2; b0; b1; b2;
////  with: B_x= a0 + a1 (x-x_c) + a2 (y-y_c);
////        B_y= b0 + b1 (x-x_c) + b2 (y-y_c);
////  Do not form loops 
//// The solution is set to be 0 for debugging the hydro code
LOCAL void tri_B_P1_polynomial_reconstruction_upwind(
         TRI       *tri,   
         Mid_soln *midsoln,  
         Limiting_store **limit_store,
         int      rk_step)
{
        TRI       *Bnbtri[3], *tmp_tri, *tris[20];
        double    AA[300], BB[300], CC[300], DD[300], XX[8][300], work[300], tmp, len;
        static double   **A = NULL, **invA, **I, **MB;
        int       dim = 2, i, j, k, M, N, P, l, LDA, LDB, LWORK, INFO, indx, cv_indx;
        Locstate  st, nbst[3], st2;
        double    **Lmass_matrix, Bavg[20], *pcrds[3], dy, dx, xx1, yy1, *cent = fg_centroid(tri), **tmpA, **tmpB, BBavg[8];
        int       side;
        static double    eps = 1.0e-6;
        double    wei[6], sum, alpha[6], B[2][6], outB0[4], sqr_B_len[30];
        int       debug = NO, Edge_side[20], tmp_side, next_side, N_edge, num_tris_vertex;
        int       use_central_sten = NO, N_STEN;
        int       a1, a2, b1; 

        // printf("ERROR: tri_B_P1_polynomial_reconstruction_ver1() %d\n",tri->id);

        ///// only do sanity test at this point
        /****
        if(rk_step == RK_STEP)
        {
                for(i = 0; i < 3; i++)
                {
                    Bavg[i] = fg_side_B(tri)[i];
        
                    Bnbtri[i] = Tri_on_side(tri,i);
                    for(side = 0; side < 3; side++)
                    {
                        if(tri == Tri_on_side(Bnbtri[i], side))
                            break;
                    }    
                    if(fabs(Bavg[i] + fg_side_B(Bnbtri[i])[side]) > 1.0e-11)
                    {
                        printf("ERROR: tri_B_P1_polynomial_reconstruction_ver1()\n");
                        printf("normal Bn are not consistent\n");
                        printf("for tri[%d], its neighbr[%d], Bn %g, Bn %g, side[%d, %d]\n",
                                       tri->id, Bnbtri[i]->id, Bavg[i], fg_side_B(Bnbtri[i])[side],
                                            i, side);
                        print_tri_crds(tri);
                        print_tri_crds(Bnbtri[i]);
                        clean_up(ERROR);
                    }
                }
        }
        else
        {
                for(i = 0; i < 3; i++)
                {
                    Bavg[i] = midsoln[tri->id].edge_Bn[i][rk_step];

                    Bnbtri[i] = Tri_on_side(tri,i);
                    for(side = 0; side < 3; side++)
                    {
                        if(tri == Tri_on_side(Bnbtri[i], side))
                            break;
                    } 
                    if(fabs(Bavg[i] + midsoln[Bnbtri[i]->id].edge_Bn[side][rk_step]) > 1.0e-11 
                       // && tri->BC_type != SUBDOMAIN && Bnbtri[i]->BC_type != SUBDOMAIN
                      )
                    {
                        printf("ERROR: tri_B_P1_polynomial_reconstruction_ver1() 2\n");
                        printf("normal Bn are not consistent\n");
                        printf("for tri[%d], its neighbr[%d], Bn %g, Bn %g, side[%d, %d]\n",
                                       tri->id, Bnbtri[i]->id, Bavg[i], midsoln[Bnbtri[i]->id].edge_Bn[side][rk_step],
                                            i, side);
                        print_tri_crds(tri);
                        print_tri_crds(Bnbtri[i]);
                        clean_up(ERROR);
                    }
                    else
                    {
                        // printf("Tri[%d] and [%d] are consistent\n", tri->id, Bnbtri[i]->id);
                        // printf("for tri[%d], its neighbr[%d], Bn %g, Bn %g, side[%d, %d]\n",
                        //                tri->id, Bnbtri[i]->id, Bavg[i], midsoln[Bnbtri[i]->id].edge_Bn[side][rk_step],
                        //                     i, side);
                        // print_tri_crds(tri);
                        // print_tri_crds(Bnbtri[i]);
                    }

                }
        }
        //  return;   
        *****/

        // if(tri->id == 2055 || tri->id == 2552)
        // if(tri->id == 3763 || tri->id ==844)
        // if(tri->id == 350 || tri->id == 405)

        // if(tri->id == 590)
        /*****  /////// Divergence-free check
        {
            double tmp;
            if(rk_step == RK_STEP)
            {
                for(i = 0; i < 3; i++)
                {
                    Bavg[i] = fg_side_B(tri)[i];
                }
            }
            else
            {
                for(i = 0; i < 3; i++)
                {
                    Bavg[i] = midsoln[tri->id].edge_Bn[i][rk_step];
                }
            }
            if(rk_step == RK_STEP)
                st = tri->st;
            else
                st = midsoln[tri->id].st[rk_step];

            tmp = Bavg[0]*fg_length_side(tri)[0] +
                  Bavg[1]*fg_length_side(tri)[1] +
                  Bavg[2]*fg_length_side(tri)[2];
            if(fabs(tmp) > 1.0e-11)
            {
                printf("\n\n &&&&&&&&&&& ERROR: tri[%d] enter tri_B_P1_polynomial_reconstruction_ver1(), initial divg = %g\n",
                     tri->id, tmp);
                printf("Bavg[%12.11g, %12.11g, %12.11g]\n", Bavg[0], Bavg[1], Bavg[2]);
                printf("Evolved cell avg Bx, By = [%g, %g]\n", Mag(st)[0],   Mag(st)[1]);
                print_tri_crds(tri);
            // debug = YES;
                clean_up(ERROR);
            }
        }
        ****/

        if(A == NULL)
        {
            matrix(&(A), 6, 6, sizeof(double));
            matrix(&(MB), 2, 6, sizeof(double));
            matrix(&(invA), 6, 6, sizeof(double));
            matrix(&(I), 6, 6, sizeof(double));
        }

        for(side = 0; side < 3; side++)
        {
            Bnbtri[side] = Tri_on_side(tri,side);
            pcrds[side] = Coords(Point_of_tri(tri)[side]);
        }

        if(alltri_HR_sten != NULL && alltri_HR_sten[tri->id].Bsten_set == NO)
        {
            alltri_HR_sten[tri->id].Bsten_set = YES;
            ////////////////////////////////////////
            /// Start: central stencil reconstruction------alltri_HR_sten != NULL && Bsten_set = NO
            ////////////////////////////////////////
            if(use_central_sten == YES)
            {
                N_edge = Mag_p1_5edge_central_sten(tri, tris, Edge_side);
                matrix(&tmpA,5,5,sizeof(double));
                alltri_HR_sten[tri->id].MB_A = tmpA;
                for(i = 0; i < N_edge; i++)
                {
                    pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                    pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                    dy = pcrds[1][1] - pcrds[0][1];
                    dx = pcrds[1][0] - pcrds[0][0];
                    xx1 = pcrds[0][0] - cent[0];
                    yy1 = pcrds[0][1] - cent[1];

                    len = fg_length_side(tris[i])[Edge_side[i]];

                    tmpA[i][0] = dy;
                    tmpA[i][1] = dy*(xx1 + 0.5*dx);
                    tmpA[i][2] = dy*(yy1 + 0.5*dy);

                    tmpA[i][3] = -dx;  /// b_0
                    tmpA[i][4] = -dx*(xx1 + 0.5*dx);  // b_1
                    tmpA[i][1] += dx*(yy1 + 0.5*dy);    // b_2 = - a_1
                    for(j = 0; j < 5; j++)
                       tmpA[i][j] /= len;
                }

                if(rk_step == RK_STEP)
                {
                    for(i = 0; i < N_edge; i++)
                        Bavg[i] = fg_side_B(tris[i])[Edge_side[i]];
                }
                else
                {
                    for(i = 0; i < N_edge; i++)
                        Bavg[i] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                }
                solve_by_gj(tmpA,5,Bavg,XX[6]);
            }
            ////////////////////////////////////////
            /// END: central stencil reconstruction------alltri_HR_sten != NULL && Bsten_set = NO
            ////////////////////////////////////////

            ////////////////////////////////////////
            /// Start: one-sided stencil reconstruction------alltri_HR_sten != NULL && Bsten_set = NO
            ////////////////////////////////////////
            for(side = 0; side < 3; side++) 
            {
                N_edge = Mag_p1_5edge_one_side_sten(tri, side, tris, Edge_side);
                matrix(&tmpA,5,5,sizeof(double));
                alltri_HR_sten[tri->id].Bsten[side] = tmpA;

                for(i = 0; i < N_edge; i++)
                {
                    pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                    pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                    dy = pcrds[1][1] - pcrds[0][1];
                    dx = pcrds[1][0] - pcrds[0][0];
                    xx1 = pcrds[0][0] - cent[0];
                    yy1 = pcrds[0][1] - cent[1];

                    // len = sqrt( sqr(dx) + sqr(dy) );
                    len = fg_length_side(tris[i])[Edge_side[i]];

                    /***
                    tmpA[i][0] = dy;
                    tmpA[i][1] = 0.5*dy*dx + dy*pcrds[0][0];
                    tmpA[i][2] = 0.5*sqr(dy) + dy*pcrds[0][1];
                    tmpA[i][3] = -dx;
                    tmpA[i][4] = -(0.5*sqr(dx) + dx*pcrds[0][0]);
                    // A[side][5] = -(0.5*dx*dy + dx*pcrds[side][1]);
                    tmp = (0.5*dx*dy + dx*pcrds[0][1]);
                    tmpA[i][1] += tmp;
                    // A[i][5] = -tmp;
                    ***/

                    tmpA[i][0] = dy;
                    tmpA[i][1] = dy*(xx1 + 0.5*dx);
                    tmpA[i][2] = dy*(yy1 + 0.5*dy);

                    tmpA[i][3] = -dx;  /// b_0
                    tmpA[i][4] = -dx*(xx1 + 0.5*dx);  // b_1
                    tmpA[i][1] += dx*(yy1 + 0.5*dy);    // b_2 = - a_1
                    for(j = 0; j < 5; j++)
                    tmpA[i][j] /= len;
                }

                if(rk_step == RK_STEP)
                {
                    for(i = 0; i < N_edge; i++)
                        Bavg[i] = fg_side_B(tris[i])[Edge_side[i]];
                }
                else
                {
                    for(i = 0; i < N_edge; i++)
                        Bavg[i] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                }
                solve_by_gj(tmpA,5,Bavg,XX[side]);
            }
            ////////////////////////////////////////
            /// END: one-sided stencil reconstruction------alltri_HR_sten != NULL && Bsten_set = NO
            ////////////////////////////////////////
            ////////////////////////////////////////
            /// Start: reverse-sided stencil reconstruction------alltri_HR_sten != NULL && Bsten_set = NO
            ////////////////////////////////////////
            /****
            for(side = 0; side < 3; side++)
            {
                matrix(&tmpA,5,5,sizeof(double));
                matrix(&tmpB,2,5,sizeof(double));
                alltri_HR_sten[tri->id].Bsten_rev[side] = tmpA;
                alltri_HR_sten[tri->id].Bsten_rev_MB_B[side] = tmpB;

                count_num_tris_vertex(tri,Point_of_tri(tri)[(side+2)%3], &num_tris_vertex);
                if(num_tris_vertex == 5)
                {
                    N_edge = Mag_p1_5edge_reverse_sten_5pt_vertex(tri,side,tris, Edge_side);
                }
                else if(num_tris_vertex < 5)
                {
                    printf("ERROR: tri_B_P2_polynomial_reconstruction_9edge_1, not enough tri at vertex\n");
                    clean_up(ERROR);
                }
                else
                    N_edge = Mag_p1_5edge_reverse_sten(tri,side,tris, Edge_side);

                num_tris_vertex = 5; //// TMP
                if(num_tris_vertex == 5)
                {
                    for(i = 0; i < N_edge; i++)
                    {
                        pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                        pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                        dx = pcrds[1][0] - pcrds[0][0];
                        dy = pcrds[1][1] - pcrds[0][1];
                        xx1 = pcrds[0][0] - cent[0];
                        yy1 = pcrds[0][1] - cent[1];

                        len = fg_length_side(tris[i])[Edge_side[i]];
    
                        tmpA[i][0] = dy;
                        tmpA[i][1] = dy*(xx1 + 0.5*dx);
                        tmpA[i][2] = dy*(yy1 + 0.5*dy);

                        tmpA[i][3] = -dx;    // b_0
                        tmpA[i][4] = -dx*(xx1 + 0.5*dx);  // b_1
                        tmpA[i][1] += dx*(yy1 + 0.5*dy);    // b_2 = - a_1

                        for(j = 0; j < 5; j++)
                            tmpA[i][j] /= len;
                    }
                    if(rk_step == RK_STEP)
                    {
                        for(i = 0; i < N_edge; i++)
                            Bavg[i] = fg_side_B(tris[i])[Edge_side[i]];
                    }
                    else
                    {
                        for(i = 0; i < N_edge; i++)
                            Bavg[i] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                    }

                    solve_by_gj(tmpA,5,Bavg,XX[side+3]);
                }
                else /// there are 6 edges
                {
                    //// Now constrained least square
                    M = 4; N = 5; P = 2; LDA = M; LDB = P; LWORK = M+N+P;
                    // M: number of least square eqn.
                    // N: number of unknown, 
                    // P: number of constraint eqn.
                    // first setup least square eqn.
                    for(i = 2; i < N_edge; i++)
                    {
                        pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                        pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                        dx = pcrds[1][0] - pcrds[0][0];
                        dy = pcrds[1][1] - pcrds[0][1];
                        xx1 = pcrds[0][0] - cent[0];
                        yy1 = pcrds[0][1] - cent[1];

                        len = fg_length_side(tris[i])[Edge_side[i]];

                        tmpA[i-2][0] = dy;
                        tmpA[i-2][1] = dy*(xx1 + 0.5*dx);
                        tmpA[i-2][2] = dy*(yy1 + 0.5*dy);

                        tmpA[i-2][3] = -dx;    // b_0
                        tmpA[i-2][4] = -dx*(xx1 + 0.5*dx);  // b_1
                        tmpA[i-2][1] += dx*(yy1 + 0.5*dy);    // b_2 = - a_1

                        for(j = 0; j < 4; j++)
                            tmpA[i-2][j] /= len;
                    } 

                    if(rk_step == RK_STEP)
                    {
                        for(i = 2; i < N_edge; i++)
                            Bavg[i-2] = fg_side_B(tris[i])[Edge_side[i]];
                    }
                    else
                    {
                        for(i = 2; i < N_edge; i++)
                            Bavg[i-2] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                    }
                    // Now setup constraint eqn.
                    for(i = 0; i < 2; i++)
                    {
                        pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                        pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                        dx = pcrds[1][0] - pcrds[0][0];
                        dy = pcrds[1][1] - pcrds[0][1];
                        xx1 = pcrds[0][0] - cent[0];
                        yy1 = pcrds[0][1] - cent[1];
    
                        len = fg_length_side(tris[i])[Edge_side[i]];
                        tmpB[i][0] = dy;
                        tmpB[i][1] = dy*(xx1 + 0.5*dx);
                        tmpB[i][2] = dy*(yy1 + 0.5*dy);

                        tmpB[i][3] = -dx;    // b_0
                        tmpB[i][4] = -dx*(xx1 + 0.5*dx);  // b_1
                        tmpB[i][1] += dx*(yy1 + 0.5*dy);    // b_2 = - a_1

                        for(j = 0; j < 2; j++)
                            tmpB[i][j] /= len;
                    }
                    if(rk_step == RK_STEP)
                    {
                        for(i = 0; i < 2; i++)
                        {
                            BBavg[i] = fg_side_B(tris[i])[Edge_side[i]];
                        }
                    }
                    else
                    {
                        for(i = 0; i < 2; i++)
                        {
                            BBavg[i] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                        }
                    }

                    l = 0;
                    for(j = 0; j < N; j++)
                    {
                        for(i = 0; i < M; i++)
                        {
                            AA[l] = tmpA[i][j];
                            l++;
                        }
                    }
                    l = 0;
                    for(j = 0; j < N; j++)
                    {
                        for(i = 0; i < P; i++)
                        {
                            BB[l] = tmpB[i][j];
                            l++;
                        }
                    }
                    for(i = 0; i < M; i++)  // right side for the least square part
                        CC[i] = Bavg[i];
                    for(i = 0; i < P; i++)  // right side for the constrained part
                        DD[i] = BBavg[i];
                    FORTRAN_NAME(dgglse)(&M, &N, &P, AA, &LDA, ///// int M,int N,int P, double array A, int LDA
                                     BB, &LDB, CC, DD, XX[side+3], ///// double array B, int LDB, double array C, D, X,
                                     work, &LWORK, &INFO);
                } /// END: /// there are 6 edges
            }
            *****/
            ////////////////////////////////////////
            /// END: reverse stencil reconstruction -------alltri_HR_sten != NULL && Bsten_set == NO
            ////////////////////////////////////////
        }
        else if(alltri_HR_sten != NULL && alltri_HR_sten[tri->id].Bsten_set == YES)
        {
            ////////////////////////////////////////
            /// Start: central stencil reconstruction------alltri_HR_sten != NULL && Bsten_set = NO
            ////////////////////////////////////////
            if(use_central_sten == YES)
            {
                N_edge = Mag_p1_5edge_central_sten(tri, tris, Edge_side);
                tmpA = alltri_HR_sten[tri->id].MB_A;
                for(i = 0; i < N_edge; i++)
                {
                    pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                    pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                    dy = pcrds[1][1] - pcrds[0][1];
                    dx = pcrds[1][0] - pcrds[0][0];
                    xx1 = pcrds[0][0] - cent[0];
                    yy1 = pcrds[0][1] - cent[1];

                    len = fg_length_side(tris[i])[Edge_side[i]];
        
                    tmpA[i][0] = dy;
                    tmpA[i][1] = dy*(xx1 + 0.5*dx);
                    tmpA[i][2] = dy*(yy1 + 0.5*dy);
            
                    tmpA[i][3] = -dx;  /// b_0
                    tmpA[i][4] = -dx*(xx1 + 0.5*dx);  // b_1
                    tmpA[i][1] += dx*(yy1 + 0.5*dy);    // b_2 = - a_1
                    for(j = 0; j < 5; j++)
                       tmpA[i][j] /= len;
                }
            
                if(rk_step == RK_STEP)
                {
                    for(i = 0; i < N_edge; i++)
                        Bavg[i] = fg_side_B(tris[i])[Edge_side[i]];
                }
                else
                {
                    for(i = 0; i < N_edge; i++)
                        Bavg[i] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                }
                solve_by_gj(tmpA,5,Bavg,XX[6]);
            }
            ////////////////////////////////////////
            /// END: central stencil reconstruction------alltri_HR_sten != NULL && Bsten_set = NO
            ////////////////////////////////////////

            ////////////////////////////////////////
            /// Start: one-sided stencil reconstruction------alltri_HR_sten != NULL && Bsten_set == YES
            ////////////////////////////////////////
            for(side = 0; side < 3; side++)
            {
                tmpA = alltri_HR_sten[tri->id].Bsten[side];
                N_edge = Mag_p1_5edge_one_side_sten(tri, side, tris, Edge_side);
                for(i = 0; i < N_edge; i++)
                {
                    pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                    pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                    dy = pcrds[1][1] - pcrds[0][1];
                    dx = pcrds[1][0] - pcrds[0][0];
                    xx1 = pcrds[0][0] - cent[0];
                    yy1 = pcrds[0][1] - cent[1];

                    len = fg_length_side(tris[i])[Edge_side[i]];

                    tmpA[i][0] = dy;
                    tmpA[i][1] = dy*(xx1 + 0.5*dx);
                    tmpA[i][2] = dy*(yy1 + 0.5*dy);

                    tmpA[i][3] = -dx;  /// b_0
                    tmpA[i][4] = -dx*(xx1 + 0.5*dx);  // b_1
                    tmpA[i][1] += dx*(yy1 + 0.5*dy);    // b_2 = - a_1
                    for(j = 0; j < 5; j++)
                    tmpA[i][j] /= len;
                }

                if(rk_step == RK_STEP)
                {
                    for(i = 0; i < N_edge; i++)
                        Bavg[i] = fg_side_B(tris[i])[Edge_side[i]];
                }
                else
                {
                    for(i = 0; i < N_edge; i++)
                        Bavg[i] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                }
                solve_by_gj(tmpA,5,Bavg,XX[side]);
            }
            ////////////////////////////////////////
            /// END: one-sided stencil reconstruction------alltri_HR_sten != NULL && Bsten_set == YES
            ////////////////////////////////////////
            ////////////////////////////////////////
            /// Start: reverse-sided stencil reconstruction------alltri_HR_sten != NULL && Bsten_set == YES
            ////////////////////////////////////////
            /****
            for(side = 0; side < 3; side++)
            {
                tmpA = alltri_HR_sten[tri->id].Bsten_rev[side];
                tmpB = alltri_HR_sten[tri->id].Bsten_rev_MB_B[side];
                count_num_tris_vertex(tri,Point_of_tri(tri)[(side+2)%3], &num_tris_vertex);
                if(num_tris_vertex == 5)
                {
                    N_edge = Mag_p1_5edge_reverse_sten_5pt_vertex(tri,side,tris, Edge_side);
                }
                else if(num_tris_vertex < 5)
                {
                    printf("ERROR: tri_B_P2_polynomial_reconstruction_9edge_1, not enough tri at vertex\n");
                    clean_up(ERROR);
                }
                else
                    N_edge = Mag_p1_5edge_reverse_sten(tri,side,tris, Edge_side);

                num_tris_vertex = 5; //// TMP
                if(num_tris_vertex == 5)
                {
                    if(rk_step == RK_STEP)
                    {
                        for(i = 0; i < N_edge; i++)
                            Bavg[i] = fg_side_B(tris[i])[Edge_side[i]];
                    }
                    else
                    {
                        for(i = 0; i < N_edge; i++)
                            Bavg[i] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                    }
                    solve_by_gj(tmpA,5,Bavg,XX[side+3]);
                }
                else /// END: /// there are 6 edges
                {
                    //// Now constrained least square
                    M = 4; N = 5; P = 2; LDA = M; LDB = P; LWORK = M+N+P;
                    // M: number of least square eqn.
                    // N: number of unknown, 
                    // P: number of constraint eqn.
                    // first setup least square eqn.
                    if(rk_step == RK_STEP)
                    {
                        for(i = 2; i < N_edge; i++)
                            Bavg[i-2] = fg_side_B(tris[i])[Edge_side[i]];
                    }
                    else
                    {
                        for(i = 2; i < N_edge; i++)
                            Bavg[i-2] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                    }
                    // Now setup constraint eqn.
                    if(rk_step == RK_STEP)
                    {
                        for(i = 0; i < 2; i++)
                        {
                            BBavg[i] = fg_side_B(tris[i])[Edge_side[i]];
                        }
                    }
                    else
                    {
                        for(i = 0; i < 2; i++)
                        {
                            BBavg[i] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                        }
                    }

                    l = 0;
                    for(j = 0; j < N; j++)
                    {
                        for(i = 0; i < M; i++)
                        {
                            AA[l] = tmpA[i][j];
                            l++;
                        }
                    }
                    l = 0;
                    for(j = 0; j < N; j++)
                    {
                        for(i = 0; i < P; i++)
                        {
                            BB[l] = tmpB[i][j];
                            l++;
                        }
                    }
                    for(i = 0; i < M; i++)  // right side for the least square part
                        CC[i] = Bavg[i];
                    for(i = 0; i < P; i++)  // right side for the constrained part
                        DD[i] = BBavg[i];
                    FORTRAN_NAME(dgglse)(&M, &N, &P, AA, &LDA, ///// int M,int N,int P, double array A, int LDA
                                     BB, &LDB, CC, DD, XX[side+3], ///// double array B, int LDB, double array C, D, X,
                                     work, &LWORK, &INFO);
                } //// /// END: /// there are 6 edges
            }
            ****/ 
            ////////////////////////////////////////
            /// END: reverse-sided stencil reconstruction------alltri_HR_sten != NULL && Bsten_set == YES
            ////////////////////////////////////////
        }

        if(alltri_HR_sten == NULL)
        {
            ////////////////////////////////////////
            /// Start: central stencil reconstruction------alltri_HR_sten != NULL && Bsten_set = NO
            ////////////////////////////////////////
            if(use_central_sten == YES)
            {
                N_edge = Mag_p1_5edge_central_sten(tri, tris, Edge_side);
                for(i = 0; i < N_edge; i++)
                {
                    pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                    pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                    dy = pcrds[1][1] - pcrds[0][1];
                    dx = pcrds[1][0] - pcrds[0][0];
                    xx1 = pcrds[0][0] - cent[0];
                    yy1 = pcrds[0][1] - cent[1];

                    len = fg_length_side(tris[i])[Edge_side[i]];
        
                    A[i][0] = dy;
                    A[i][1] = dy*(xx1 + 0.5*dx);
                    A[i][2] = dy*(yy1 + 0.5*dy);
            
                    A[i][3] = -dx;  /// b_0
                    A[i][4] = -dx*(xx1 + 0.5*dx);  // b_1
                    A[i][1] += dx*(yy1 + 0.5*dy);    // b_2 = - a_1
                    for(j = 0; j < 5; j++)
                       A[i][j] /= len;
                }
            
                if(rk_step == RK_STEP)
                {
                    for(i = 0; i < N_edge; i++)
                        Bavg[i] = fg_side_B(tris[i])[Edge_side[i]];
                }
                else
                {
                    for(i = 0; i < N_edge; i++)
                        Bavg[i] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                }
                solve_by_gj(A,5,Bavg,XX[6]);
            }
            ////////////////////////////////////////
            /// END: central stencil reconstruction------alltri_HR_sten != NULL && Bsten_set = NO
            ////////////////////////////////////////

            ////////////////////////////////////////
            /// Start: one-sided stencil reconstruction ----alltri_HR_sten = NULL
            ////////////////////////////////////////
            for(side = 0; side < 3; side++)
            {
                N_edge = Mag_p1_5edge_one_side_sten(tri, side, tris, Edge_side);
                if(rk_step == RK_STEP)
                {
                    for(i = 0; i < N_edge; i++)
                        Bavg[i] = fg_side_B(tris[i])[Edge_side[i]];
                }
                else
                {
                    for(i = 0; i < N_edge; i++)
                        Bavg[i] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                }
                for(i = 0; i < N_edge; i++)
                {
                    pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                    pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                    dy = pcrds[1][1] - pcrds[0][1];
                    dx = pcrds[1][0] - pcrds[0][0];
                    xx1 = pcrds[0][0] - cent[0];
                    yy1 = pcrds[0][1] - cent[1];

                    // len = sqrt( sqr(dx) + sqr(dy) );
                    len = fg_length_side(tris[i])[Edge_side[i]];

                    A[i][0] = dy;
                    A[i][1] = dy*(xx1 + 0.5*dx);
                    A[i][2] = dy*(yy1 + 0.5*dy);

                    A[i][3] = -dx;  /// b_0
                    A[i][4] = -dx*(xx1 + 0.5*dx);  // b_1
                    A[i][1] += dx*(yy1 + 0.5*dy);    // b_2 = - a_1
                    for(j = 0; j < 5; j++)
                        A[i][j] /= len;
                }

                if(debug == YES && side == 0)
                {
                    for(i = 0; i < N_edge; i++) 
                    {
                        pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                        pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);
                        printf("Bavg[%d] = %g on tri[%d] edge[%d], pts(%g, %g)->(%g, %g)\n",
                              i, Bavg[i], tris[i]->id, Edge_side[i], pcrds[0][0], pcrds[0][1],
                              pcrds[1][0], pcrds[1][1] );
                    }
                }
                solve_by_gj(A,5,Bavg,XX[side]);
            
                /**
                if(debug == YES && side == 0)
                {
                    // print_matrix("A", 6, 6, A, " %15.14g ");
                    inverse_matrix(A,5,invA);
                    // matrix_inv(A, 5, invA);
                    // print_matrix("invA", 6, 6, invA, " %g ");
                    matrix_matrix_mult(A, invA, 5, 5, I);
                    print_matrix("I", 5, 5, I, " %g ");
                    print_general_vector("B soln", XX[side], 5, "\n");
                    // clean_up(0);
                }
                **/
            }
            ////////////////////////////////////////
            /// END: one-sided stencil reconstruction ----alltri_HR_sten = NULL
            ////////////////////////////////////////
            ////////////////////////////////////////
            /// Start: reverse-sided stencil reconstruction ----alltri_HR_sten = NULL
            ////////////////////////////////////////
            /****
            for(side = 0; side < 3; side++)
            {
                count_num_tris_vertex(tri,Point_of_tri(tri)[(side+2)%3], &num_tris_vertex);
                if(num_tris_vertex == 5)
                {
                    N_edge = Mag_p1_5edge_reverse_sten_5pt_vertex(tri,side,tris, Edge_side);
                }
                else if(num_tris_vertex < 5)
                {
                    printf("ERROR: tri_B_P1_polynomial_reconstruction_ver1, not enough tri at vertex\n");
                    clean_up(ERROR);
                }
                else
                    N_edge = Mag_p1_5edge_reverse_sten(tri,side,tris, Edge_side);

                num_tris_vertex = 5; //// TMP
                if(num_tris_vertex == 5)
                {
                    for(i = 0; i < N_edge; i++)
                    {
                        pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                        pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                        dx = pcrds[1][0] - pcrds[0][0];
                        dy = pcrds[1][1] - pcrds[0][1];
                        xx1 = pcrds[0][0] - cent[0];
                        yy1 = pcrds[0][1] - cent[1];

                        len = fg_length_side(tris[i])[Edge_side[i]];
    
                        A[i][0] = dy;
                        A[i][1] = dy*(xx1 + 0.5*dx);
                        A[i][2] = dy*(yy1 + 0.5*dy);

                        A[i][3] = -dx;    // b_0
                        A[i][4] = -dx*(xx1 + 0.5*dx);  // b_1
                        A[i][1] += dx*(yy1 + 0.5*dy);    // b_2 = - a_1

                        for(j = 0; j < 5; j++)
                            A[i][j] /= len;
                    }
                    if(rk_step == RK_STEP)
                    {
                        for(i = 0; i < N_edge; i++)
                            Bavg[i] = fg_side_B(tris[i])[Edge_side[i]];
                    }
                    else
                    {
                        for(i = 0; i < N_edge; i++)
                            Bavg[i] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                    }
                    solve_by_gj(A,5,Bavg,XX[side+3]);
                }
                else /// there are 6 edges
                {
                    //// Now constrained least square
                    M = 4; N = 5; P = 2; LDA = M; LDB = P; LWORK = M+N+P;
                    // M: number of least square eqn.
                    // N: number of unknown, 
                    // P: number of constraint eqn.
                    // first setup least square eqn.
                    for(i = 2; i < N_edge; i++)
                    {
                        pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                        pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                        dx = pcrds[1][0] - pcrds[0][0];
                        dy = pcrds[1][1] - pcrds[0][1];
                        xx1 = pcrds[0][0] - cent[0];
                        yy1 = pcrds[0][1] - cent[1];

                        len = fg_length_side(tris[i])[Edge_side[i]];

                        A[i-2][0] = dy;
                        A[i-2][1] = dy*(xx1 + 0.5*dx);
                        A[i-2][2] = dy*(yy1 + 0.5*dy);

                        A[i-2][3] = -dx;    // b_0
                        A[i-2][4] = -dx*(xx1 + 0.5*dx);  // b_1
                        A[i-2][1] += dx*(yy1 + 0.5*dy);    // b_2 = - a_1

                        for(j = 0; j < 4; j++)
                            A[i-2][j] /= len;
                    }
                    if(rk_step == RK_STEP)
                    {
                        for(i = 2; i < N_edge; i++)
                            Bavg[i-2] = fg_side_B(tris[i])[Edge_side[i]];
                    }
                    else
                    {
                        for(i = 2; i < N_edge; i++)
                            Bavg[i-2] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                    }
                    // Now setup constraint eqn.
                    for(i = 0; i < 2; i++)
                    {
                        pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                        pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                        dx = pcrds[1][0] - pcrds[0][0];
                        dy = pcrds[1][1] - pcrds[0][1];
                        xx1 = pcrds[0][0] - cent[0];
                        yy1 = pcrds[0][1] - cent[1];

                        len = fg_length_side(tris[i])[Edge_side[i]];
                        MB[i][0] = dy;
                        MB[i][1] = dy*(xx1 + 0.5*dx);
                        MB[i][2] = dy*(yy1 + 0.5*dy);

                        MB[i][3] = -dx;    // b_0
                        MB[i][4] = -dx*(xx1 + 0.5*dx);  // b_1
                        MB[i][1] += dx*(yy1 + 0.5*dy);    // b_2 = - a_1

                        for(j = 0; j < 2; j++)
                            MB[i][j] /= len;
                    }
                    if(rk_step == RK_STEP)
                    {
                        for(i = 0; i < 2; i++)
                        {
                            BBavg[i] = fg_side_B(tris[i])[Edge_side[i]];
                        }
                    }
                    else
                    {
                        for(i = 0; i < 2; i++)
                        {
                            BBavg[i] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                        }
                    }
                    l = 0;
                    for(j = 0; j < N; j++)
                    {
                        for(i = 0; i < M; i++)
                        {
                            AA[l] = A[i][j];
                            l++;
                        }
                    }
                    l = 0;
                    for(j = 0; j < N; j++)
                    {
                        for(i = 0; i < P; i++)
                        {
                            BB[l] = MB[i][j];
                            l++;
                        }
                    }
                    for(i = 0; i < M; i++)  // right side for the least square part
                        CC[i] = Bavg[i];
                    for(i = 0; i < P; i++)  // right side for the constrained part
                        DD[i] = BBavg[i];
                    FORTRAN_NAME(dgglse)(&M, &N, &P, AA, &LDA, ///// int M,int N,int P, double array A, int LDA
                                     BB, &LDB, CC, DD, XX[side+3], ///// double array B, int LDB, double array C, D, X,
                                     work, &LWORK, &INFO);
                } /// END: there are 6 edges
            } 
            *****/
            ////////////////////////////////////////
            /// END: reverse-sided stencil reconstruction ----alltri_HR_sten = NULL
            ////////////////////////////////////////
        }

        if(rk_step == RK_STEP)
            st = tri->st;
        else
            st = midsoln[tri->id].st[rk_step];

        if(YES == use_central_sten)
            N_STEN = 4;
        else
            N_STEN = 3;

        /**** Comment out to Test Minmod limiter
        sum = 0.0;
        for(cv_indx = 0; cv_indx < N_STEN; cv_indx++)
        {
            XX[cv_indx][5] = -XX[cv_indx][1];
            if(6 == cv_indx)   /// central stencil
                alpha[cv_indx] = 10.0/sqr(eps + 2.0*sqr(XX[cv_indx][1]) + sqr(XX[cv_indx][2]) 
                                           + sqr(XX[cv_indx][4]) );
            else 
                alpha[cv_indx] = 1.0/sqr(eps + 2.0*sqr(XX[cv_indx][1]) + sqr(XX[cv_indx][2]) 
                                          + sqr(XX[cv_indx][4]) );
            sum += alpha[cv_indx];
        }
        for(cv_indx = 0; cv_indx < N_STEN; cv_indx++)
            wei[cv_indx] = alpha[cv_indx]/sum;
            // wei[cv_indx] = 1.0/3.0;

        for(i = 0; i < MAX_N_COEF; i++)
            B[0][i] = B[1][i] = 0.0;

        for(i = 0; i < N_STEN; i++)
        {
            B[0][0] += wei[i]*XX[i][0];
            B[0][1] += wei[i]*XX[i][1];
            B[0][2] += wei[i]*XX[i][2];

            B[1][0] += wei[i]*XX[i][3];
            B[1][1] += wei[i]*XX[i][4];
            B[1][2] += wei[i]*XX[i][5];
        }

        /// NOTE:: we assumed Taylor's series expansion representation for
        ///         soln at cell center.

        for(i = 0; i < MAX_N_COEF; i++)
        {
            dg_B(st)[0][i] = B[0][i];
            dg_B(st)[1][i] = B[1][i];
        }
        Mag(st)[0] = B[0][0];
        Mag(st)[1] = B[1][0];
        *****/
        ////// Minmod limiter for B field
        for(cv_indx = 0; cv_indx < N_STEN; cv_indx++)
            XX[cv_indx][5] = -XX[cv_indx][1];
        a1 = minmod_sign(XX[0][1], XX[1][1]);
        a2 = minmod_sign(XX[0][2], XX[1][2]);
        b1 = minmod_sign(XX[0][4], XX[1][4]);
        for(cv_indx = 2; cv_indx < N_STEN; cv_indx++)
        {
            if(a1 == 0 || a1 != minmod_sign(XX[0][1], XX[cv_indx][1]))
            {
                a1 = 0;
                break; 
            }
            if(a2 == 0 || a2 != minmod_sign(XX[0][2], XX[cv_indx][2]))
            {
                a2 = 0;
                break; 
            }
            if(b1 == 0 || b1 != minmod_sign(XX[0][4], XX[cv_indx][4]))
            {
                b1 = 0;
                break; 
            }
        }
        if(a1 == 0 || a2 == 0 || b1 == 0)
        {
            tri_B_P0_polynomial_reconstruction(tri, midsoln,rk_step, outB0);
            // printf("tri[%d] use P0 reconstruction\n",tri->id); 
        }
        else
        {
            // printf("tri[%d] use P1 reconstruction\n",tri->id); 
            for(cv_indx = 0; cv_indx < N_STEN; cv_indx++)
                sqr_B_len[cv_indx] = 2.0*sqr(XX[cv_indx][1]) + sqr(XX[cv_indx][2])
                                           + sqr(XX[cv_indx][4]);
            tmp = HUGE_VAL;
            a1 = -1;
            for(cv_indx = 0; cv_indx < N_STEN; cv_indx++)
            {
                if(tmp > sqr_B_len[cv_indx])
                {
                    tmp = sqr_B_len[cv_indx];
                    a1 = cv_indx;
                }
            }
            /// NOTE:: we assumed Taylor's series expansion representation for
            ///         soln at cell center.

            for(i = 0; i < MAX_N_COEF; i++)
            {
                dg_B(st)[0][i] = XX[a1][i];
                dg_B(st)[1][i] = XX[a1][i+3];
            }
            Mag(st)[0] = XX[a1][0];
            Mag(st)[1] = XX[a1][3];
        }

        // if(debug  == YES)
        /***
        {
            double ansx[20][4], ansy[20][4];

            for(cv_indx = 0; cv_indx < N_STEN; cv_indx++)
            {
                for(i = 0; i < MAX_N_COEF; i++)
                {
                    ansx[cv_indx][i] = XX[cv_indx][i];
                    ansy[cv_indx][i] = XX[cv_indx][i+3];
                }
                verify_edge_B(tri,rk_step, midsoln, ansx[cv_indx], ansy[cv_indx]);
            }
        }
        ***/

        /*****
        if(tri->id == 16039)
        {
            double B0[2], ansx[6][4], ansy[6][4];

            for(cv_indx = 0; cv_indx < 3; cv_indx++)
            {
                for(i = 0; i < MAX_N_COEF; i++)
                {
                    ansx[cv_indx][i] = XX[cv_indx][i];
                    ansy[cv_indx][i] = XX[cv_indx][i+3];
                }
                printf("++++++Sten[%d]+++++:::\n", cv_indx);
                verify_edge_B(tri,rk_step, midsoln, ansx[cv_indx], ansy[cv_indx]);
                printf("Bx coeff: ");
                for(i = 0; i < MAX_N_COEF; i++)
                    printf("%g ", XX[cv_indx][i]);
                printf("\n");
                printf("By coeff: ");
                for(i = 0; i < MAX_N_COEF; i++)
                    printf("%g ", XX[cv_indx][i+3]);
                printf("\n");
            }

            printf("Weighted combination\n");
            printf("Bx coeff: ");
            for(i = 0; i < MAX_N_COEF; i++)
                printf("%g ", dg_B(st)[0][i]);
            printf("\n");
            printf("By coeff: ");
            for(i = 0; i < MAX_N_COEF; i++)
                printf("%g ", dg_B(st)[1][i]);
            printf("\n");
            // tri_B_P0_polynomial_reconstruction(tri, midsoln, rk_step, B0);
            // printf("Piecewise const reconstruction[%g %g]\n", B0[0], B0[1]);
            // printf("\n Exit in tri_B_P1_polynomial_reconstruction_ver1()\n");
            // clean_up(0);
        }
        *****/
}

////  Construct Bx and By piecewise linear polynomial on tri. using
////  edge values.
////  Conservation IS ENFORCED on tri.
////  THis implies that a0 = average_Bx; b0 = average_By.
////  The solution of the linear system are coefficients in the order of
////  a0; a1; a2; b0; b1; b2;
////  with: B_x= a0 + a1 (x-x0) + a2 (y-y0);
////        B_y= b0 + b1 (x-x0) + b2 (y-y0);
////  Here (x0, y0) is the cell center.
////  We also use the fact a1+b2=0.0 so that 
////  b2 = -a1 relation is used.
////  The solution of the linear system are coefficients in the order of
////  a1; a2; b1;
////  IMPORTANT: we save the result in the form of Taylor expansion about the cell center.
LOCAL void tri_B_P1_polynomial_reconstruction_consv(
         TRI       *tri,   
         Mid_soln *midsoln,  
         Limiting_store **limit_store,
         int      rk_step)
{
        TRI       *Bnbtri[3], *tmptri, *tris[30];
        double    AA[300], BB[300], CC[300], DD[300], XX[8][40], ans[8][40], tmp, len;
        static double   **A = NULL, **invA, **I;
        int       dim = 2, i, j, k, M, N, P, l, LDA, LDB, LWORK, INFO, indx, cv_indx;
        Locstate  st, nbst[3], st2;
        double    Bavg[20], *pcrds[3], dy, dx, cellavgB[2];
        int       side, tmp_side;
        static double    eps = 1.0e-6;
        double    wei[6], sum, alpha[6], B[2][6];
        int       debug = NO, N_cell, Edge_side[30];
        double    **Lmass_matrix = tri->Lmass_matrix, *cent = fg_centroid(tri);

        ///// only do sanity test at this point
        /***
        if(rk_step == RK_STEP)
        {
                for(i = 0; i < 3; i++)
                {
                    Bavg[i] = fg_side_B(tri)[i];

                    Bnbtri[i] = Tri_on_side(tri,i);
                    for(side = 0; side < 3; side++)
                    {
                        if(tri == Tri_on_side(Bnbtri[i], side))
                            break;
                    }
                    if(fabs(Bavg[i] + fg_side_B(Bnbtri[i])[side]) > 1.0e-12)
                    {
                        printf("ERROR: tri_B_P1_polynomial_reconstruction()\n");
                        printf("normal Bn are not consistent\n");
                        printf("for tri[%d], its neighbr[%d], Bn %g, Bn %g, side[%d, %d]\n",
                                       tri->id, Bnbtri[i]->id, Bavg[i], fg_side_B(Bnbtri[i])[side],
                                            i, side);
                        print_tri_crds(tri);
                        print_tri_crds(Bnbtri[i]);
                        clean_up(ERROR);
                    }
                }
        }
        else
        {
                for(i = 0; i < 3; i++)
                {
                    Bavg[i] = midsoln[tri->id].edge_Bn[i][rk_step];

                    Bnbtri[i] = Tri_on_side(tri,i);
                    for(side = 0; side < 3; side++)
                    {
                        if(tri == Tri_on_side(Bnbtri[i], side))
                            break;
                    }
                    if(fabs(Bavg[i] + midsoln[Bnbtri[i]->id].edge_Bn[side][rk_step]) > 1.0e-12
                       // && tri->BC_type != SUBDOMAIN && Bnbtri[i]->BC_type != SUBDOMAIN
                      )
                    {
                        printf("ERROR: tri_B_P1_polynomial_reconstruction() 2\n");
                        printf("normal Bn are not consistent\n");
                        printf("for tri[%d], its neighbr[%d], Bn %g, Bn %g, side[%d, %d]\n",
                                       tri->id, Bnbtri[i]->id, Bavg[i], midsoln[Bnbtri[i]->id].edge_Bn[side][rk_step],
                                            i, side);
                        print_tri_crds(tri);
                        print_tri_crds(Bnbtri[i]);
                        clean_up(ERROR);
                    }
                    else
                    {
                        // printf("Tri[%d] and [%d] are consistent\n", tri->id, Bnbtri[i]->id);
                        // printf("for tri[%d], its neighbr[%d], Bn %g, Bn %g, side[%d, %d]\n",
                        //                tri->id, Bnbtri[i]->id, Bavg[i], midsoln[Bnbtri[i]->id].edge_Bn[side][rk_step],
                        //                     i, side);
                        // print_tri_crds(tri);
                        // print_tri_crds(Bnbtri[i]);
                    }

                }
        }
        //  return;   
        ****/

        // if(tri->id == 189)
        /**
        if(tri->id == 350)
        {
            printf("\n\n &&&&&&&&&&&  tri[%d] enter tri_B_P1_polynomial_reconstruction()\n", tri->id);
            print_tri_crds(tri);
            debug = YES;
        }
        **/

        if(A == NULL)
        {
            matrix(&(A), 6, 6, sizeof(double));
            matrix(&(invA), 6, 6, sizeof(double));
            matrix(&(I), 6, 6, sizeof(double));
        }

        for(side = 0; side < 3; side++)
        {
            Bnbtri[side] = Tri_on_side(tri,side);
            pcrds[side] = Coords(Point_of_tri(tri)[side]);
        }

        if(rk_step == RK_STEP)
        {
            for(side = 0; side < 3; side++)
            {
                Bavg[side] = fg_side_B(tri)[side];
            }
        }
        else
        {
            for(side = 0; side < 3; side++)
            {
                Bavg[side] = midsoln[tri->id].edge_Bn[side][rk_step];
            }
        }

        if(rk_step == RK_STEP)
            st = tri->st;
        else
            st = midsoln[tri->id].st[rk_step];

        for(i = 0; i < 2; i++)
            cellavgB[i] = Mag(st)[i];

        if(debug == YES)
        {
            printf("sum B = %g, edge Bn[%g %g %g]\n", 
                    fg_length_side(tri)[0]*Bavg[0] + 
                    fg_length_side(tri)[1]*Bavg[1] + 
                    fg_length_side(tri)[2]*Bavg[2],
                    Bavg[0], Bavg[1], Bavg[2]);
            printf("Evolved cell avg Bx, By = [%g, %g]\n", Mag(st)[0],   Mag(st)[1]);
        }

        for(side = 0; side < 3; side++)
        {
            Bnbtri[side] = Tri_on_side(tri,side);
            pcrds[side] = Coords(Point_of_tri(tri)[side]);
        }

        /// The linear case. Polynomial expanded about the cell center.
        // for(i = 0; i < 1; i++)
        // {
        //     A[5][i] = A[4][i] = Lmass_matrix[0][i]/Lmass_matrix[0][0];
        // }
        // for(i = 1; i < 6; i++) /// since edge integral eqn has 6 variables.
        // {
        //     A[5][i] = A[4][i] = 0.0;
        // }
        // Bavg[4] = cellavgB[0]; // = a0
        // Bavg[5] = cellavgB[1]; // = b0
         
        for(side = 0; side < 3; side++)
        {
            /// first collect 2 edges on tri and 1 edge on adjacent tri.
            N_cell = 0;   /// actually, N_cell refers to collected edges.
            tris[N_cell] = tri;
            Edge_side[N_cell] = side;
            N_cell++;

            tris[N_cell] = tri;
            Edge_side[N_cell] = (side+2)%3;
            N_cell++;

            for(tmp_side = 0; tmp_side < 3; tmp_side++)
            {
                if(tri == Tri_on_side(Bnbtri[side],tmp_side))
                    break;
            }

            tris[N_cell] = Bnbtri[side];
            Edge_side[N_cell] = (tmp_side+2)%3;
            N_cell++;

            //// Now set up stencil
            for(i = 0; i < N_cell; i++)
            {
                pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                dy = pcrds[1][1] - pcrds[0][1];
                dx = pcrds[1][0] - pcrds[0][0];
                // len = sqrt( sqr(dx) + sqr(dy) );
                len = fg_length_side(tris[i])[Edge_side[i]];
                /***
                A[i][0] = dy;
                A[i][1] = 0.5*dy*dx + dy*pcrds[0][0];
                A[i][2] = 0.5*sqr(dy) + dy*pcrds[0][1];
                A[i][3] = -dx;
                A[i][4] = -(0.5*sqr(dx) + dx*pcrds[0][0]);
                tmp = (0.5*dx*dy + dx*pcrds[0][1]);
                // A[i][1] += tmp;
                A[i][5] = -tmp;
                for(j = 0; j < 6; j++)
                    A[i][j] /= len;
                ***/ 
                A[i][0] = 0.5*dy*dx + dy*pcrds[0][0] - dy*cent[0]; 
                A[i][1] = 0.5*sqr(dy) + dy*pcrds[0][1] - dy*cent[1];
                A[i][2] = -(0.5*sqr(dx) + dx*pcrds[0][0] - dx*cent[0]);
                tmp = (0.5*dx*dy + dx*pcrds[0][1] - dx*cent[1]);
                A[i][0] += tmp;
                for(j = 0; j < 3; j++)
                    A[i][j] /= len;
            }

            if(rk_step == RK_STEP)
            {
                for(i = 0; i < N_cell; i++)
                {
                    Bavg[i] = fg_side_B(tris[i])[Edge_side[i]];
                }
            }
            else
            {
                for(i = 0; i < N_cell; i++)
                {
                    Bavg[i] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                }
            }

            /// add a0(y2-y1) and b0(x2-x1). 
            for(i = 0; i < N_cell; i++)
            {
                pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                dy = pcrds[1][1] - pcrds[0][1];
                dx = pcrds[1][0] - pcrds[0][0];
                len = fg_length_side(tris[i])[Edge_side[i]];

                Bavg[i] += (-cellavgB[0]*dy + cellavgB[1]*dx)/len;
            }

            solve_by_gj(A,3,Bavg,XX[side]);

            // solve_by_gj(A,5,Bavg,XX[side]);
            /**
            if(debug == YES)
            {
                printf("dx = %g, dy = %g, x1,y1[%g %g] , x2,y2[%g %g]--- current tri edge[%d]\n",
                          dx, dy, pcrds[(side)][0], pcrds[(side)][1],
                                  pcrds[(side+1)%3][0], pcrds[(side+1)%3][1], side);
            }
            **/
        }

        sum = 0.0;
        for(cv_indx = 0; cv_indx < 3; cv_indx++)
        {
            ans[cv_indx][0] = cellavgB[0];
            ans[cv_indx][1] = XX[cv_indx][0];
            ans[cv_indx][2] = XX[cv_indx][1];
            ans[cv_indx][3] = cellavgB[1];
            ans[cv_indx][4] = XX[cv_indx][2];
            ans[cv_indx][5] = -ans[cv_indx][1];
            /***
            ans[cv_indx][0] = XX[cv_indx][0];
            ans[cv_indx][1] = XX[cv_indx][1];
            ans[cv_indx][2] = XX[cv_indx][2];
            ans[cv_indx][3] = XX[cv_indx][3];
            ans[cv_indx][4] = XX[cv_indx][4];
            ans[cv_indx][5] = -ans[cv_indx][1];
            ***/
            alpha[cv_indx] = 1.0/(eps + 2.0*(sqr(ans[cv_indx][1])) + (sqr(ans[cv_indx][2])) + (sqr(ans[cv_indx][4])) );
            sum += alpha[cv_indx];
        }
        for(cv_indx = 0; cv_indx < 3; cv_indx++)
        {
            wei[cv_indx] = alpha[cv_indx]/sum;
            // wei[cv_indx] = 1.0/3.0;
        }

        for(i = 0; i < 3; i++)
        {
            B[0][i] = B[1][i] = 0.0;
        }
        for(i = 0; i < 3; i++)
        {
            B[0][0] += wei[i]*ans[i][0];
            B[0][1] += wei[i]*ans[i][1];
            B[0][2] += wei[i]*ans[i][2];

            B[1][0] += wei[i]*ans[i][3];
            B[1][1] += wei[i]*ans[i][4];
            B[1][2] += wei[i]*ans[i][5];
        }

        ///
        /// NOTE:: we have used Taylor's series expansion representation for
        ///         soln at cell center.
        /// 
        for(i = 0; i < MAX_N_COEF; i++)
        {
            dg_B(st)[0][i] = B[0][i];
            dg_B(st)[1][i] = B[1][i];
        }

        if(debug == YES)
        {
            printf("Bx coeff: ");
            for(i = 0; i < MAX_N_COEF; i++)
                printf("%g ", dg_B(st)[0][i]);
            printf("\n");
            printf("By coeff: ");
            for(i = 0; i < MAX_N_COEF; i++)
                printf("%g ", dg_B(st)[1][i]);
            printf("\n");

            for(cv_indx = 0; cv_indx < 3; cv_indx++)
            {
                printf("cv[%d]: ", cv_indx);
                for(i = 0; i < 6; i++) 
                    printf("%g; ", ans[cv_indx][i]);
                printf("\n");
            }

            /// check if we get edge average.
            for(i = 0; i < 3; i++)
            {
                ans[0][i] = B[0][i];
                ans[0][i+3] = B[1][i];
            }

            if(rk_step == RK_STEP)
            {
                for(side = 0; side < 3; side++)
                {
                    Bavg[side] = fg_side_B(tri)[side];
                }
            }
            else
            {
                for(side = 0; side < 3; side++)
                {
                    Bavg[side] = midsoln[tri->id].edge_Bn[side][rk_step];
                }
            }

            for(side =0; side < 3; side++)
            {
                pcrds[0] = Coords(Point_of_tri(tri)[side]);
                pcrds[1] = Coords(Point_of_tri(tri)[ (side +1)%3 ]);

                dy = pcrds[1][1] - pcrds[0][1];
                dx = pcrds[1][0] - pcrds[0][0];
                len = fg_length_side(tri)[side];

                sum = dy*ans[0][0] + (0.5*dy*dx + dy*pcrds[0][0])*ans[0][1] + (0.5*sqr(dy) + dy*pcrds[0][1])*ans[0][2] +
                      -dx*ans[0][3] - (0.5*sqr(dx) + dx*pcrds[0][0])*ans[0][4] - (0.5*dx*dy + dx*pcrds[0][1])*ans[0][5]; 
                sum /= len;
                printf("Side[%d], cell average recoved Bn = %13.12g, org. Bn = %13.12g\n",
                       side, sum, Bavg[side]);
            }
            // clean_up(0);
        }
}

//// Construct poly by constraint least square.
//// Conservation is only enforced on tri and its neighbors. 
//// On other tries, conservation is enforced by least square.
LOCAL void tri_comput_P1_polynomials_from_avg(
         TRI       *tri,   
         TRI       *tris[],
         int       nn_num,
         Mid_soln *midsoln,  
         Limiting_store **limit_store,
         int      rk_step)
{
        TRI       *Bnbtri[3];
        double    Ab[4][20], Bb[4][4], A[20][MAX_N_COEF], B[4][MAX_N_COEF]; /// B[#cv][mass_matrix_term for each coeff]
        double    AA[300], BB[300], CC[300], DD[300], XX[300], work[300], tmp;
        static double   **ALmass_matrix = NULL;
        int       dim = 2, i, j, k, M, N, P, l, LDA, LDB, LWORK, INFO, indx, debug = NO, cv_indx;
        Locstate  st, nbst[3], st2;
        double          **Lmass_matrix;

        if(ALmass_matrix == NULL)
            matrix(&(ALmass_matrix), 6, MAX_N_COEF, sizeof(double));

        /***
        if(tri->id == 20)
        {
            printf("tri[%d] enter tri_comput_P2_polynomials_from_avg()\n", tri->id);
            debug = YES;
        }
        ***/

        for(i = 0; i < 3; i++)
            Bnbtri[i] = Tri_on_side(tri,i);

        //// first assemble far neighboring tris, which satisfy conservation in the least square sense.
        for(cv_indx = 0; cv_indx < nn_num; cv_indx++)
        {
             comp_mass_matrix_1st_row(MAX_N_COEF,tris[cv_indx],dim,fg_centroid(tri),mass_1st_row);
             for(i = 0; i < MAX_N_COEF; i++)
                 ALmass_matrix[cv_indx][i] = mass_1st_row[0][i];
             for(i = 0; i < MAX_N_COEF; i++)
                 A[cv_indx][i] = ALmass_matrix[cv_indx][i]/ALmass_matrix[cv_indx][0];
        }
      
        //// set constraint eqns
        Lmass_matrix = tri->Lmass_matrix;
        for(i = 0; i < MAX_N_COEF; i++)
            B[0][i] = Lmass_matrix[0][i]/Lmass_matrix[0][0];

        st = midsoln[tri->id].st[0];
        // for(i = 0; i < 3; i++)
        //     nbst[i] = midsoln[Bnbtri[i]->id].st[0];

        Bb[0][0] = Dens(st);
        Bb[1][0] = Mom(st)[0];
        Bb[2][0] = Mom(st)[1];
        Bb[3][0] = Energy(st);
        // for(i = 0; i < 3; i++)
        // {
        //     Bb[0][i+1] = Dens(nbst[i]);
        //     Bb[1][i+1] = Mom(nbst[i])[0];
        //     Bb[2][i+1] = Mom(nbst[i])[1];
        //     Bb[3][i+1] = Energy(nbst[i]);
        // }

        if(debug == YES)
        {
            printf("Bb[%g, %g, %g, %g]\n", Bb[0][0], Bb[0][1], Bb[0][2], Bb[0][3]);
        }

        for(i = 0; i < nn_num; i++)
        {
            Ab[0][i] = Dens(midsoln[tris[i]->id].st[0]);
            Ab[1][i] = Mom(midsoln[tris[i]->id].st[0])[0];
            Ab[2][i] = Mom(midsoln[tris[i]->id].st[0])[1];
            Ab[3][i] = Energy(midsoln[tris[i]->id].st[0]);
        }

        if(debug == YES)
        {
            for(i = 0; i < nn_num; i++)
                printf("Ab[%d] = %g, from tri %d\n", i, Ab[0][i], tris[i]->id);
        }

        if(rk_step == RK_STEP)
            st = tri->st;
        else
            st = midsoln[tri->id].st[rk_step];

        for(k = 0; k < N_EQN; k++)
        {
            M = nn_num; N = MAX_N_COEF; P = 1; LDA = M; LDB = P; LWORK = M+N+P;

            l = 0;
            for(j = 0; j < N; j++)
            {
                for(i = 0; i < M; i++)
                {
                    AA[l] = A[i][j];
                    l++;
                }
            }
            l = 0;
            for(j = 0; j < N; j++)
            {
                for(i = 0; i < P; i++)
                {
                    BB[l] = B[i][j];
                    l++;
                }
            }
            for(i = 0; i < M; i++)  // right side for the least square part
                CC[i] = Ab[k][i];
            for(i = 0; i < P; i++)  // right side for the constrained part
                DD[i] = Bb[k][i];

            FORTRAN_NAME(dgglse)(&M, &N, &P, AA, &LDA, ///// int M,int N,int P, double array A, int LDA
                                 BB, &LDB, CC, DD, XX, ///// double array B, int LDB, double array C, D, X,
                                 work, &LWORK, &INFO);
            ///// save fitted soln
            switch(k)
            {
            case 0:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Dens(st)[i] = XX[i];
            break;
            case 1:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Mom(st)[0][i] = XX[i];
            break;
            case 2:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Mom(st)[1][i] = XX[i];
            break;
            case 3:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Energy(st)[i] = XX[i];
            break;
            }
        }

        // Assign to CV and map polynomial to one defined on CV center
        /**
        if(rk_step == RK_STEP)
        {
            for(cv_indx = 0; cv_indx < N_PART; cv_indx++)
            {
                for(indx = 0; indx < MAX_N_COEF; indx++)
                {
                    tri->cv_soln->cv_dg_rho[cv_indx][indx] = dg_Dens(st)[indx];
                    tri->cv_soln->cv_dg_e[cv_indx][indx] = dg_Energy(st)[indx];
                    tri->cv_soln->cv_dg_m[0][cv_indx][indx] = dg_Mom(st)[0][indx];
                    tri->cv_soln->cv_dg_m[1][cv_indx][indx] = dg_Mom(st)[1][indx];
                }
            }
            map_poly_SV_to_CV_p2(tri);
        }
        else
        {
            for(cv_indx = 0; cv_indx < N_PART; cv_indx++)
            {
                for(indx = 0; indx < MAX_N_COEF; indx++)
                {
                    limit_store[rk_step][tri->id].cv_dg_rho[cv_indx][indx] = dg_Dens(st)[indx];
                    limit_store[rk_step][tri->id].cv_dg_e[cv_indx][indx] = dg_Energy(st)[indx];
                    limit_store[rk_step][tri->id].cv_dg_m[0][cv_indx][indx] = dg_Mom(st)[0][indx];
                    limit_store[rk_step][tri->id].cv_dg_m[1][cv_indx][indx] = dg_Mom(st)[1][indx];
                }
            }
            map_poly_SV_to_CV_ver2_p2(tri, limit_store[rk_step]);
        }
        **/
}

//// Construct poly by constraint least square.
//// Conservation is only enforced on tri. 
//// On other tries, conservation is enforced by least square.
EXPORT void init_tri_comput_P1_polynomials_from_avg_MHD(
         TRI       *tri,   
         TRI       *tris[],
         int       nn_num,
         Mid_soln  *midsoln,
         int      rk_step,
         double   **con_u)
{
        TRI       *Bnbtri[3];
        double    Ab[8][20], Bb[8][4], A[20][MAX_N_COEF], B[4][MAX_N_COEF]; /// B[#cv][mass_matrix_term for each coeff]
                                                                            /// Ab[#eqn][#cv]
        double    AA[300], BB[300], CC[300], DD[300], XX[300], work[300], tmp;
        static double   **ALmass_matrix = NULL;
        int       dim = 2, i, j, k, M, N, P, l, LDA, LDB, LWORK, INFO, indx, debug = NO, cv_indx, side;
        Locstate  st, nbst[3], st2;
        double          **Bmass_matrix, Bavg[8], *pcrds[3], dx, dy, sum, len;

        if(ALmass_matrix == NULL)
            matrix(&(ALmass_matrix), 6, MAX_N_COEF, sizeof(double));

        // if(tri->id == 189)
        /**
        if(tri->id == 590)
        {
            printf("\n\ntri[%d] enter init_tri_comput_P1_polynomials_from_avg()\n", tri->id);
            print_tri_crds(tri);
            debug = YES;
        }
        **/

        for(i = 0; i < 3; i++)
            Bnbtri[i] = Tri_on_side(tri,i);

        //// first assemble far neighboring tris, which satisfy conservation in the least square sense.
        for(cv_indx = 0; cv_indx < nn_num; cv_indx++)
        {
             // comp_mass_matrix_1st_row(MAX_N_COEF,tris[cv_indx],dim,fg_centroid(tri),mass_1st_row);
             comp_Mag_mass_matrix_1st_row(MAX_N_COEF,tris[cv_indx],dim,fg_centroid(tri),
                        sqrt(fg_area(tri)),mass_1st_row);
             for(i = 0; i < MAX_N_COEF; i++)
                 ALmass_matrix[cv_indx][i] = mass_1st_row[0][i];
             for(i = 0; i < MAX_N_COEF; i++)
                 A[cv_indx][i] = ALmass_matrix[cv_indx][i]/ALmass_matrix[cv_indx][0];
        }
      
        //// set constraint eqns
        Bmass_matrix = tri->Bmass_matrix;
        for(i = 0; i < MAX_N_COEF; i++)
            B[0][i] = Bmass_matrix[0][i]/Bmass_matrix[0][0];

        if(midsoln == NULL)
            st = tri->st;
        else
            st = midsoln[tri->id].st[0];
        // for(i = 0; i < 3; i++)
        //     nbst[i] = midsoln[Bnbtri[i]->id].st[0];

        Bb[0][0] = Dens(st);
        Bb[1][0] = Mom(st)[0];
        Bb[2][0] = Mom(st)[1];
        Bb[3][0] = Mom(st)[2];
        Bb[4][0] = Energy(st);
        Bb[5][0] = Mag(st)[2];
        //// This is for testing purpose
        Bb[6][0] = Mag(st)[0];
        Bb[7][0] = Mag(st)[1];

        if(debug == YES)
        {
            // printf("Bb[%g, %g, %g, %g]\n", Bb[0][0], Bb[0][1], Bb[0][2], Bb[0][3]);
        }

        if(midsoln == NULL)
        {
            for(i = 0; i < nn_num; i++)
            {
                Ab[0][i] = Dens(tris[i]->st);
                Ab[1][i] = Mom(tris[i]->st)[0];
                Ab[2][i] = Mom(tris[i]->st)[1];
                Ab[3][i] = Mom(tris[i]->st)[2];
                Ab[4][i] = Energy(tris[i]->st);

                Ab[5][i] = Mag(tris[i]->st)[2];
                ///// This is for testing purpose
                Ab[6][i] = Mag(tris[i]->st)[0];
                Ab[7][i] = Mag(tris[i]->st)[1];
            }
        }
        else
        {
            for(i = 0; i < nn_num; i++)
            {
                Ab[0][i] = Dens(midsoln[tris[i]->id].st[0]);
                Ab[1][i] = Mom(midsoln[tris[i]->id].st[0])[0];
                Ab[2][i] = Mom(midsoln[tris[i]->id].st[0])[1];
                Ab[3][i] = Mom(midsoln[tris[i]->id].st[0])[2];
                Ab[4][i] = Energy(midsoln[tris[i]->id].st[0]);

                Ab[5][i] = Mag(midsoln[tris[i]->id].st[0])[2];
                ///// This is for testing purpose
                Ab[6][i] = Mag(midsoln[tris[i]->id].st[0])[0];
                Ab[7][i] = Mag(midsoln[tris[i]->id].st[0])[1];
            }
        }

        if(rk_step == RK_STEP)
            st = tri->st;
        else
            st = midsoln[tri->id].st[rk_step];

        if(debug == YES)
        {
            // for(i = 0; i < nn_num; i++)
            //     printf("Ab[%d] = %g, from tri %d\n", i, Ab[0][i], tris[i]->id);
            g_verbose_print_state(st);
        }

        for(k = 0; k < N_EQN; k++)
        {
            M = nn_num; N = MAX_N_COEF; P = 1; LDA = M; LDB = P; LWORK = M+N+P;

            l = 0;
            for(j = 0; j < N; j++)
            {
                for(i = 0; i < M; i++)
                {
                    AA[l] = A[i][j];
                    l++;
                }
            }
            l = 0;
            for(j = 0; j < N; j++)
            {
                for(i = 0; i < P; i++)
                {
                    BB[l] = B[i][j];
                    l++;
                }
            }
            for(i = 0; i < M; i++)  // right side for the least square part
                CC[i] = Ab[k][i];
            for(i = 0; i < P; i++)  // right side for the constrained part
                DD[i] = Bb[k][i];

            FORTRAN_NAME(dgglse)(&M, &N, &P, AA, &LDA, ///// int M,int N,int P, double array A, int LDA
                                 BB, &LDB, CC, DD, XX, ///// double array B, int LDB, double array C, D, X,
                                 work, &LWORK, &INFO);
            ///// save fitted soln
            switch(k)
            {
            case 0:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Dens(st)[i] = XX[i];
                    // dg_Dens(st)[i] = 0.0;
                // dg_Dens(st)[0] = Dens(st);
            break;
            case 1:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Mom(st)[0][i] = XX[i];
                    // dg_Mom(st)[0][i] = 0.0;
                // dg_Mom(st)[0][0] = Mom(st)[0];
            break;
            case 2:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Mom(st)[1][i] = XX[i];
                    // dg_Mom(st)[1][i] = 0.0;
                // dg_Mom(st)[1][0] = Mom(st)[1];
            break;
            case 3:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Mom(st)[2][i] = XX[i];
                    // dg_Mom(st)[2][i] = 0.0;
                // dg_Mom(st)[2][0] = Mom(st)[2];
            break;
            case 4:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Energy(st)[i] = XX[i];
                    // dg_Energy(st)[i] = 0.0;
                // dg_Energy(st)[0] = Energy(st);
            break;
            case 5:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_B(st)[2][i] = XX[i];
                    // dg_B(st)[2][i] = 0.0;
                // dg_B(st)[2][0] = Mag(st)[2];
            break;
            case 6:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_B(st)[0][i] = XX[i];
                    // dg_B(st)[0][i] = 0.0;
                // dg_B(st)[0][0] = Mag(st)[0];
            break;
            case 7:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_B(st)[1][i] = XX[i];
                    // dg_B(st)[1][i] = 0.0;
                // dg_B(st)[1][0] = Mag(st)[1];
            break;
            }
        }

        if(con_u != NULL)
        {
            for(i = 0; i < MAX_N_COEF; i++)
            {
                con_u[0][i] = dg_Dens(st)[i];
                con_u[4][i] = dg_Energy(st)[i];
                for(j = 0; j < 3; j++)
                {
                    con_u[j+1][i] = dg_Mom(st)[j][i];
                    con_u[j+5][i] = dg_B(st)[j][i];
                }
            }
        }

        if(debug == YES)
        {
            // g_verbose_print_state(st);

            for(side = 0; side < 3; side++)
            {
                Bavg[side] = fg_side_B(tri)[side];
            }

            for(side =0; side < 3; side++)
            {
                pcrds[0] = Coords(Point_of_tri(tri)[side]);
                pcrds[1] = Coords(Point_of_tri(tri)[ (side +1)%3 ]);

                dy = pcrds[1][1] - pcrds[0][1];
                dx = pcrds[1][0] - pcrds[0][0];
                len = fg_length_side(tri)[side];

                sum = dy*dg_B(st)[0][0] + (0.5*dy*dx + dy*pcrds[0][0])*dg_B(st)[0][1] + (0.5*sqr(dy) + dy*pcrds[0][1])*dg_B(st)[0][2] +
                      -dx*dg_B(st)[1][0] - (0.5*sqr(dx) + dx*pcrds[0][0])*dg_B(st)[1][1] - (0.5*dx*dy + dx*pcrds[0][1])*dg_B(st)[1][2];
                sum /= len;
                printf("Side[%d], cell average recoved Bn = %13.12g, org. Bn = %13.12g\n",
                       side, sum, Bavg[side]);
                printf("Bx[%g, %g, %g]\n", dg_B(st)[0][0], dg_B(st)[0][1], dg_B(st)[0][2]);
                printf("By[%g, %g, %g]\n", dg_B(st)[1][0], dg_B(st)[1][1], dg_B(st)[1][2]);
            }
        }
}

/* use condition: b_2 = -a_1
                  b_4 = -2 a_3
                  b_5 = -0.5*a_4
   to reduce unknowns to 9 variables. 
   They are: a0, a1, a2, a3, a4, a5, b0, b1, b3.
   We also conserve the Magnetic field during the reconstruction.
   Note that the solution is represented as:
   Bx = a0 + a1(x-x_c) + a2(y - y_c) ...
   By = b0 + b1(x-x_c) + b2(y - y_c) ...
   which is the Taylor expansion about tri center.
*/
LOCAL void tri_B_P2_polynomial_reconstruction_consv(
         TRI       *tri,   
         Mid_soln *midsoln,  
         Limiting_store **limit_store,
         int      rk_step)
{
        TRI       *Bnbtri[3], *tris[30];
        double    AA[300], BB[300], CC[300], DD[300], XX[8][300], work[300], tmp;
        static double   **A = NULL, **invA, **I, eps = 1.0e-6;
        int       dim = 2, i, j, k, M, N, P, l, LDA, LDB, LWORK, INFO, indx, debug = NO, cv_indx;
        int       sten_indx, edge_indx[3][9], e_indx, N_cell;
        Locstate  st, nbst[3], st2;
        double    Bavg[30], *pcrds[3], dy, dx, len, cellavgB[2], xx1, yy1, sum;
        int       side, Edge_side[30], N_edge;
        double    **Lmass_matrix = tri->Lmass_matrix, *cent = fg_centroid(tri);
        double    OIx[8], OIy[8], alpha[8], wei[8], ansx[8][30], ansy[8][30], B[2][10];

        if(A == NULL)
        {
            matrix(&(A), 12, 12, sizeof(double));
            matrix(&(I), 12, 12, sizeof(double));
            matrix(&(invA), 12, 12, sizeof(double));
        }

        for(side = 0; side < 3; side++)
        {
            Bnbtri[side] = Tri_on_side(tri,side);
            pcrds[side] = Coords(Point_of_tri(tri)[side]);
        }

        /***
        if(tri->id == 232)
        {
            double tmp;
            if(rk_step == RK_STEP)
            {
                for(i = 0; i < 3; i++)
                {
                    Bavg[i] = fg_side_B(tri)[i];
                }
            }
            else
            {
                for(i = 0; i < 3; i++)
                {
                    Bavg[i] = midsoln[tri->id].edge_Bn[i][rk_step];
                }
            }
            if(rk_step == RK_STEP)
                st = tri->st;
            else
                st = midsoln[tri->id].st[rk_step];

            tmp = Bavg[0]*fg_length_side(tri)[0] +
                  Bavg[1]*fg_length_side(tri)[1] +
                  Bavg[2]*fg_length_side(tri)[2];
            printf("\n\n &&&&&&&&&&&  tri[%d] enter tri_B_P2_polynomial_reconstruction_consv(), initial divg = %g\n",
                     tri->id, tmp);
            printf("Bavg[%12.11g, %12.11g, %12.11g]\n", Bavg[0], Bavg[1], Bavg[2]);
            printf("Evolved cell avg Bx, By = [%g, %g]\n", Mag(st)[0],   Mag(st)[1]);
            // print_tri_crds(tri);
            debug = YES;
        }
        ***/

        /***
        if(rk_step == RK_STEP)
        {
            for(side = 0; side < 3; side++)
            {
                Bavg[side] = fg_side_B(tri)[side];
            }
        }
        else
        {
            for(side = 0; side < 3; side++)
            {
                Bavg[side] = midsoln[tri->id].edge_Bn[side][rk_step];
            }
        }
        ***/

        if(rk_step == RK_STEP)
            st = tri->st;
        else
            st = midsoln[tri->id].st[rk_step];

        ////////////////////////////////////
        ///// Set the conservation equations
        for(i = 0; i < 2; i++)
            Bavg[i] = cellavgB[i] = Mag(st)[i];

        for(i = 0; i < MAX_N_COEF; i++)
        {
            A[0][i] = Lmass_matrix[0][i]/Lmass_matrix[0][0];
            A[1][i] = 0.0;
        }
        for(i = MAX_N_COEF; i < 12; i++)
            A[0][i] = A[1][i] = 0.0;
        A[1][6] = A[0][0];  /// b0
        A[1][7] = A[0][1];  /// b1
        A[1][1] = -A[0][2]; /// b2 = -a1
        A[1][8] = A[0][3];  /// b3
        A[1][3] = -2.0*A[0][4]; /// b4 = -2*a3
        A[1][4] = -0.5*A[0][5]; /// b5 = -0.5*a4
        /////End: Set the conservation equations
        ////////////////////////////////////

        for(side = 0; side < 3; side++)
        {
            N_edge = Mag_p2_edge_sten(tri,side,tris, Edge_side);

            for(i = 0; i < N_edge; i++)
            {
                pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                dx = pcrds[1][0] - pcrds[0][0];
                dy = pcrds[1][1] - pcrds[0][1];
                xx1 = pcrds[0][0] - cent[0]; 
                yy1 = pcrds[0][1] - cent[1]; 

                len = fg_length_side(tris[i])[Edge_side[i]];

                A[i+2][0] = dy;
                A[i+2][1] = dy*(xx1 + 0.5*dx);
                A[i+2][2] = dy*(yy1 + 0.5*dy);
                A[i+2][3] = dy*(sqr(xx1) + xx1*dx + sqr(dx)/3.0);
                A[i+2][4] = dy*(xx1*yy1 + 0.5*xx1*dy +0.5*yy1*dx + dx*dy/3.0);
                A[i+2][5] = dy*(sqr(yy1) + yy1*dy + sqr(dy)/3.0);

                A[i+2][6] = -dx;    // b_0
                A[i+2][7] = -dx*(xx1 + 0.5*dx);  // b_1
                A[i+2][1] += dx*(yy1 + 0.5*dy);    // b_2 = - a_1
                A[i+2][8] = -dx*(sqr(xx1) + xx1*dx + sqr(dx)/3.0);   // b_3
                A[i+2][3] += 2.0*dx*(xx1*yy1 +0.5*xx1*dy + 0.5*yy1*dx +  dx*dy/3.0);  // b_4
                A[i+2][4] += 0.5*dx*(sqr(yy1) + yy1*dy + sqr(dy)/3.0);  // b_5

                for(j = 0; j < 9; j++)
                    A[i+2][j] /= len;
 
                //// TMP
                /***
                if(debug == YES)
                {
                    printf("edge[%d], from (%g; %g) to (%g; %g) on tri[%d] with cent(%g, %g)\n", 
                             i, pcrds[0][0], pcrds[0][1],  pcrds[1][0], pcrds[1][1], tris[i]->id,
                             fg_centroid(tris[i])[0], fg_centroid(tris[i])[1] );
                }
                ***/
                //// END: TMP
            }

            if(rk_step == RK_STEP)
            {
                for(i = 0; i < N_edge; i++)
                {
                    Bavg[i+2] = fg_side_B(tris[i])[Edge_side[i]];
                }
            }
            else
            {
                for(i = 0; i < N_edge; i++)
                {
                    Bavg[i+2] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                }
            }

            //// TMP
            /**
            if(debug == YES && side == 0)
            {
                printf("onside[%d] N_edge = %d\n", side, N_edge);
                for(i = 0; i < N_edge; i++)
                    printf("Bavg[%d] = %g\n", i+2, Bavg[i+2]);
                print_ldb_matrix("A",9, 9,A,"%7.6g; ");
                inverse_matrix(A,9,invA); 
                matrix_matrix_mult(A, invA, 9, 9, I);
                print_matrix("I", 9, 9, I, " %g ");
            }
            **/
            ////END: TMP

            solve_by_gj(A,9,Bavg,XX[side]);
        }

        sum = 0.0;
        for(cv_indx = 0; cv_indx < 3; cv_indx++)
        {
            for(i =0; i < MAX_N_COEF; i++) 
                ansx[cv_indx][i] = XX[cv_indx][i];
            ansy[cv_indx][0] = XX[cv_indx][6];          // b0
            ansy[cv_indx][1] = XX[cv_indx][7];          // b1
            ansy[cv_indx][2] = -ansx[cv_indx][1];       // b2
            ansy[cv_indx][3] = XX[cv_indx][8];          // b3
            ansy[cv_indx][4] = -2.0*ansx[cv_indx][3];   // b4
            ansy[cv_indx][5] = -0.5*ansx[cv_indx][4];   // b5

            OIx[cv_indx] = weno_weight_P2(tri, ansx[cv_indx]);
            OIy[cv_indx] = weno_weight_P2(tri, ansy[cv_indx]);
            tmp = eps + OIx[cv_indx] + OIy[cv_indx];
            // alpha[cv_indx] = 1.0/sqr(eps + OIx[cv_indx] + OIy[cv_indx]);
            alpha[cv_indx] = 1.0/quad(tmp);
            sum += alpha[cv_indx];
        }

        /***
        if(debug == YES)
        {
            for(cv_indx = 0; cv_indx < 3; cv_indx++)
            {
                printf("********On sten[%d], preliminary:\n", cv_indx);
                printf("Bx coeff: ");
                for(i = 0; i < MAX_N_COEF; i++)
                    printf("%g ", ansx[cv_indx][i]);
                printf("\n");
                printf("By coeff: ");
                for(i = 0; i < MAX_N_COEF; i++)
                    printf("%g ", ansy[cv_indx][i]);
                printf("\n");
            }
        } 
        ***/

        for(cv_indx = 0; cv_indx < 3; cv_indx++)
        {
            wei[cv_indx] = alpha[cv_indx]/sum;
            // wei[cv_indx] = 1.0/3.0;
        }

        for(i = 0; i < MAX_N_COEF; i++)
        {
            B[0][i] = B[1][i] = 0.0;
        }

        for(cv_indx = 0; cv_indx < 3; cv_indx++)
        {
            for(i = 0; i < MAX_N_COEF; i++)
            {
                B[0][i] += wei[cv_indx]*ansx[cv_indx][i];
                B[1][i] += wei[cv_indx]*ansy[cv_indx][i];
            }
        }

        for(i = 0; i < MAX_N_COEF; i++)
        {
            dg_B(st)[0][i] = B[0][i];
            dg_B(st)[1][i] = B[1][i];
        }

        /***
        Mag(st)[0] = Mag(st)[1] = 0.0;
        for(i= 0; i< MAX_N_COEF; i++)
        {
            Mag(st)[0] += dg_B(st)[0][i]*Lmass_matrix[0][i];
            Mag(st)[1] += dg_B(st)[1][i]*Lmass_matrix[0][i];
        }
        for(i = 0; i < 2; i++)
            Mag(st)[i] /= Lmass_matrix[0][0];
        ***/

        /**
        if(debug == YES)
        {
            printf("Bx coeff: ");
            for(i = 0; i < MAX_N_COEF; i++)
                printf("%g ", dg_B(st)[0][i]);
            printf("\n");
            printf("By coeff: ");
            for(i = 0; i < MAX_N_COEF; i++)
                printf("%g ", dg_B(st)[1][i]);
            printf("\n");
            clean_up(0);
        }
        **/ 
}

//NEW 0411
/* use condition: b_2 = -a_1
                  b_4 = -2 a_3
                  b_5 = -0.5*a_4
   to reduce unknowns to 9 variables. 
   They are: a0, a1, a2, a3, a4, a5, b0, b1, b3.
   We DO NOT conserve the Magnetic field during the reconstruction.
   Note that the solution is represented as:
   Bx = a0 + a1(x-x_c) + a2(y - y_c) ...
   By = b0 + b1(x-x_c) + b2(y - y_c) ...
   which is the Taylor expansion about tri center.
*/
LOCAL void tri_B_P2_polynomial_2nd_reconstruction_ver2(
         TRI       *tri,
         Mid_soln *midsoln,
         Limiting_store **limit_store,
         int      rk_step)
{
        TRI       *Bnbtri[3], *tris[30], *nbtri;
        double    AA[300], BB[300], CC[300], DD[300], XX[8][300], work[300], tmp;
        static double   **A = NULL, **invA = NULL, **I, eps = 1.0e-6, **B;
        int       dim = 2, i, j, k, M, N, P, l, LDA, LDB, LWORK, INFO, indx, debug = NO, cv_indx;
        int       sten_indx, edge_indx[3][9], e_indx, N_cell, N_STEN;
        Locstate  st, nbst[3], st2;
        double    Bavg[30], *pcrds[3], dy, dx, len, cellavgB[2], xx1, yy1, sum, BBavg[10];
        int       side, tmp_side, Edge_side[30], N_edge, num_tris_vertex;
        double    **Lmass_matrix = tri->Lmass_matrix, *cent = fg_centroid(tri), **tmpA, **tmpB;
        double    OIx[8], OIy[8], alpha[8], wei[8], ansx[8][30], ansy[8][30], MB[2][12];
        int       use_central_sten = YES;
        float     nor[3], t[3];        
        float     nx, ny, v0x, v1x, v0y, v1y, x0, y0, v0x2, v1x2, v0y2, v1y2;
        int       irow;

        if(invA == NULL)
        {
            matrix(&(A), 3, 9, sizeof(double));
            matrix(&(B), 8, 9, sizeof(double));
            matrix(&(I), 12, 12, sizeof(double));
            matrix(&(invA), 12, 12, sizeof(double));
        }

        for(side = 0; side < 3; side++)
        {
            Bnbtri[side] = Tri_on_side(tri,side);
            pcrds[side] = Coords(Point_of_tri(tri)[side]);
        }

        if(rk_step == RK_STEP)
            st = tri->st;
        else
            st = midsoln[tri->id].st[rk_step];

        for(i = 0; i < 2; i++)
            cellavgB[i] = Mag(st)[i];

        if(alltri_HR_sten != NULL && alltri_HR_sten[tri->id].P2_B_2nd_recons_sten_set == NO)
        {
            alltri_HR_sten[tri->id].P2_B_2nd_recons_sten_set = YES;

            {
                N_edge = Mag_p2_6edge_central_sten_new(tri, tris, Edge_side);
                matrix(&tmpA,3,9,sizeof(double));
                matrix(&tmpB,8,9,sizeof(double));
                alltri_HR_sten[tri->id].P2_B_2nd_recons_sten_A = tmpA;
                alltri_HR_sten[tri->id].P2_B_2nd_recons_sten_B = tmpB;
                // first setup least square eqn for edge 3, 4 and 5
                // only use cell average
                for(i = 3; i < N_edge; i++)
                {
                    pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                    pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                    for(j = 0; j < dim; j++)
                        t[j] = fg_side_vector(tris[i])[Edge_side[i]][j];
                    nor[0] = t[1];
                    nor[1] = -t[0];

                    nx = nor[0]; ny = nor[1];
                    x0 = cent[0]; y0 = cent[1];
                    v0x = pcrds[0][0]; v0y = pcrds[0][1];
                    v1x = pcrds[1][0]; v1y = pcrds[1][1];
                    v0x2 = sqr(v0x); v0y2 = sqr(v0y);
                    v1x2 = sqr(v1x); v1y2 = sqr(v1y);

                    // eqn 0 for edge 3
                    // eqn 1 for edge 4
                    // eqn 2 for edge 5
                    tmpA[i-3][0] = 2.0*nx;
                    tmpA[i-3][1] = nx*(v0x + v1x - 2.0*x0);
                    tmpA[i-3][2] = nx*(v0y + v1y - 2.0*y0);
                    tmpA[i-3][3] = 2.0*nx*(v0x2/3.0 + v0x*v1x/3.0 + v1x2/3.0 - v0x*x0 - v1x*x0 + sqr(x0));
                    tmpA[i-3][4] = nx*(2.0*v0x*v0y/3.0 + v0y*v1x/3.0 + v0x*v1y/3.0 + 2.0*v1x*v1y/3.0 
                                       - v0y*x0 - v1y*x0 - v0x*y0 - v1x*y0 + 2.0*x0*y0);
                    tmpA[i-3][5] = 2.0*nx*(v0y2/3.0 + v0y*v1y/3.0 + v1y2/3.0 - v0y*y0 - v1y*y0 + sqr(y0));

                    tmpA[i-3][6] = 2.0*ny;    // b_0
                    tmpA[i-3][7] = ny*(v0x + v1x - 2.0*x0);  // b_1
                    tmpA[i-3][1] -= ny*(v0y + v1y - 2.0*y0);    // b_2 = - a_1
                    tmpA[i-3][8] = 2.0*ny*(v0x2/3.0 + v0x*v1x/3.0 + v1x2/3.0 - v0x*x0 - v1x*x0 + sqr(x0));   // b_3
                    tmpA[i-3][3] -= 2.0*ny*(2.0*v0x*v0y/3.0 + v0y*v1x/3.0 + v0x*v1y/3.0 + 2.0*v1x*v1y/3.0
                                             - v0y*x0 - v1y*x0 - v0x*y0 - v1x*y0 + 2.0*x0*y0);  // b_4 = -2 a_3
                    tmpA[i-3][4] -= 0.5*2.0*ny*(v0y2/3.0 + v0y*v1y/3.0 + v1y2/3.0 - v0y*y0 - v1y*y0 + sqr(y0));  // b_5 = -0.5 a_4

                    if(rk_step == RK_STEP)
                    {
                        Bavg[i-3] = 2.0*fg_side_dgB(tris[i])[Edge_side[i]][0];
                    }
                    else
                    {
                        Bavg[i-3] = 2.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][0][rk_step];
                    }
                }

                // Now setup constraint eqn for average
                for(i = 0; i < 2; i++)
                {
                    pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                    pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                    for(j = 0; j < dim; j++)
                        t[j] = fg_side_vector(tris[i])[Edge_side[i]][j];
                    nor[0] = t[1];
                    nor[1] = -t[0];

                    nx = nor[0]; ny = nor[1];
                    x0 = cent[0]; y0 = cent[1];
                    v0x = pcrds[0][0]; v0y = pcrds[0][1];
                    v1x = pcrds[1][0]; v1y = pcrds[1][1];
                    v0x2 = sqr(v0x); v0y2 = sqr(v0y);
                    v1x2 = sqr(v1x); v1y2 = sqr(v1y);

                    tmpB[i][0] = 2.0*nx;
                    tmpB[i][1] = nx*(v0x + v1x - 2.0*x0);
                    tmpB[i][2] = nx*(v0y + v1y - 2.0*y0);
                    tmpB[i][3] = 2.0*nx*(v0x2/3.0 + v0x*v1x/3.0 + v1x2/3.0 - v0x*x0 - v1x*x0 + sqr(x0));
                    tmpB[i][4] = nx*(2.0*v0x*v0y/3.0 + v0y*v1x/3.0 + v0x*v1y/3.0 + 2.0*v1x*v1y/3.0
                                        - v0y*x0 - v1y*x0 - v0x*y0 - v1x*y0 + 2.0*x0*y0);
                    tmpB[i][5] = 2.0*nx*(v0y2/3.0 + v0y*v1y/3.0 + v1y2/3.0 - v0y*y0 - v1y*y0 + sqr(y0));

                    tmpB[i][6] = 2.0*ny;    // b_0
                    tmpB[i][7] = ny*(v0x + v1x - 2.0*x0);  // b_1
                    tmpB[i][1] -= ny*(v0y + v1y - 2.0*y0);    // b_2 = - a_1
                    tmpB[i][8] = 2.0*ny*(v0x2/3.0 + v0x*v1x/3.0 + v1x2/3.0 - v0x*x0 - v1x*x0 + sqr(x0));   // b_3
                    tmpB[i][3] -= 2.0*ny*(2.0*v0x*v0y/3.0 + v0y*v1x/3.0 + v0x*v1y/3.0 + 2.0*v1x*v1y/3.0
                                             - v0y*x0 - v1y*x0 - v0x*y0 - v1x*y0 + 2.0*x0*y0);  // b_4 = -2 a_3
                    tmpB[i][4] -= 0.5*2.0*ny*(v0y2/3.0 + v0y*v1y/3.0 + v1y2/3.0 - v0y*y0 - v1y*y0 + sqr(y0));  // b_5 = -0.5 a_4

                    if(rk_step == RK_STEP)
                    {
                        BBavg[i] = 2.0*fg_side_dgB(tris[i])[Edge_side[i]][0];
                    }
                    else
                    {
                        BBavg[i] = 2.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][0][rk_step];
                    }
                }

                ////// first slope
                for(i = 0; i < 3; i++)
                {
                    pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                    pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                    for(j = 0; j < dim; j++)
                        t[j] = fg_side_vector(tris[i])[Edge_side[i]][j];
                    nor[0] = t[1];
                    nor[1] = -t[0];

                    nx = nor[0]; ny = nor[1];
                    x0 = cent[0]; y0 = cent[1];
                    v0x = pcrds[0][0]; v0y = pcrds[0][1];
                    v1x = pcrds[1][0]; v1y = pcrds[1][1];
                    v0x2 = sqr(v0x); v0y2 = sqr(v0y);
                    v1x2 = sqr(v1x); v1y2 = sqr(v1y);

                    tmpB[i+2][0] = 0.0;
                    tmpB[i+2][1] = nx*(v1x - v0x)/3.0;
                    tmpB[i+2][2] = nx*(v1y - v0y)/3.0;
                    tmpB[i+2][3] = nx*(v1x2 - v0x2 + 2.0*v0x*x0 - 2.0*v1x*x0)/3.0;
                    tmpB[i+2][4] = nx*(v1x*v1y - v0x*v0y + v0y*x0 - v1y*x0 + v0x*y0 - v1x*y0)/3.0;
                    tmpB[i+2][5] = nx*(v1y2 - v0y2 + 2.0*v0y*y0 - 2.0*v1y*y0)/3.0;

                    tmpB[i+2][6] = 0.0;    // b_0
                    tmpB[i+2][7] = ny*(v1x - v0x)/3.0;    // b_1
                    tmpB[i+2][1] -= ny*(v1y - v0y)/3.0;    // b_2 = - a_1
                    tmpB[i+2][8] = ny*(v1x2 - v0x2 + 2.0*v0x*x0 - 2.0*v1x*x0)/3.0;    // b_3
                    tmpB[i+2][3] -= 2.0*ny*(v1x*v1y - v0x*v0y + v0y*x0 - v1y*x0 + v0x*y0 - v1x*y0)/3.0;    // b_4 = -2 a_3
                    tmpB[i+2][4] -= 0.5*ny*(v1y2 - v0y2 + 2.0*v0y*y0 - 2.0*v1y*y0)/3.0;  // b_5 = -0.5 a_4

                    if(rk_step == RK_STEP)
                    {
                        BBavg[i+2] = 2.0/3.0*fg_side_dgB(tris[i])[Edge_side[i]][1];
                    }
                    else
                    {
                        BBavg[i+2] = 2.0/3.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][1][rk_step];
                    }
                }

                ////// second slope
                for(i = 0; i < 3; i++)
                {
                    pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                    pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                    for(j = 0; j < dim; j++)
                        t[j] = fg_side_vector(tris[i])[Edge_side[i]][j];
                    nor[0] = t[1];
                    nor[1] = -t[0];

                    nx = nor[0]; ny = nor[1];
                    x0 = cent[0]; y0 = cent[1];
                    v0x = pcrds[0][0]; v0y = pcrds[0][1];
                    v1x = pcrds[1][0]; v1y = pcrds[1][1];
                    v0x2 = sqr(v0x); v0y2 = sqr(v0y);
                    v1x2 = sqr(v1x); v1y2 = sqr(v1y);

                    tmpB[i+5][0] = 0.0;
                    tmpB[i+5][1] = 0.0;
                    tmpB[i+5][2] = 0.0;
                    tmpB[i+5][3] = nx*sqr(v0x - v1x)/15.0;
                    tmpB[i+5][4] = nx*(v0x - v1x)*(v0y - v1y)/15.0;
                    tmpB[i+5][5] = nx*sqr(v0y - v1y)/15.0;

                    tmpB[i+5][6] = 0.0;    // b_0
                    tmpB[i+5][7] = 0.0;    // b_1
                    tmpB[i+5][1] -= 0.0;    // b_2 = - a_1
                    tmpB[i+5][8] = ny*sqr(v0x - v1x)/15.0;    // b_3
                    tmpB[i+5][3] -= 2.0*ny*(v0x - v1x)*(v0y - v1y)/15.0;    // b_4 = -2 a_3
                    tmpB[i+5][4] -= 0.5*ny*sqr(v0y - v1y)/15.0;  // b_5 = -0.5 a_4

                    if(rk_step == RK_STEP)
                    {
                        BBavg[i+5] = 2.0/5.0*fg_side_dgB(tris[i])[Edge_side[i]][2];
                    }
                    else
                    {
                        BBavg[i+5] = 2.0/5.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][2][rk_step];
                    }
                }

                //// Now constrained least square 
                M = 3; N = 9; P = 8; LDA = M; LDB = P; LWORK = M+N+P;
                // N: number of unknown, M: number of least square eqn.
                // P: number of constraint eqn.
                l = 0;
                for(j = 0; j < N; j++)
                {
                    for(i = 0; i < M; i++)
                    {
                        AA[l] = tmpA[i][j];
                        l++;
                    }
                }
                l = 0;
                for(j = 0; j < N; j++)
                {
                    for(i = 0; i < P; i++)
                    {
                        BB[l] = tmpB[i][j];
                        l++;
                    }
                }
                for(i = 0; i < M; i++)  // right side for the least square part
                    CC[i] = Bavg[i];
                for(i = 0; i < P; i++)  // right side for the constrained part
                    DD[i] = BBavg[i];
                FORTRAN_NAME(dgglse)(&M, &N, &P, AA, &LDA, ///// int M,int N,int P, double array A, int LDA
                                     BB, &LDB, CC, DD, XX[0], ///// double array B, int LDB, double array C, D, X,
                                     work, &LWORK, &INFO);
            }
        }
        else if(alltri_HR_sten != NULL && alltri_HR_sten[tri->id].P2_B_2nd_recons_sten_set == YES)
        {
            {
                N_edge = Mag_p2_6edge_central_sten_new(tri, tris, Edge_side);
                tmpA = alltri_HR_sten[tri->id].P2_B_2nd_recons_sten_A;
                tmpB = alltri_HR_sten[tri->id].P2_B_2nd_recons_sten_B;

                //least square
                if(rk_step == RK_STEP)
                {
                    for(i = 3; i < N_edge; i++)
                    {
                        Bavg[i-3] = 2.0*fg_side_dgB(tris[i])[Edge_side[i]][0];
                    }
                }
                else
                {
                    for(i = 3; i < N_edge; i++)
                    {
                        Bavg[i-3] = 2.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][0][rk_step];
                    }
                }

                //constraint eqns
                if(rk_step == RK_STEP)
                {
                    for(i = 0; i < 2; i++)
                    {
                        BBavg[i] = 2.0*fg_side_dgB(tris[i])[Edge_side[i]][0];
                    }
                    for(i = 0; i < 3; i++)
                    {
                        BBavg[i+2] = 2.0/3.0*fg_side_dgB(tris[i])[Edge_side[i]][1];
                    }
                    for(i = 0; i < 3; i++)
                    {
                        BBavg[i+5] = 2.0/5.0*fg_side_dgB(tris[i])[Edge_side[i]][2];
                    }
                }
                else
                {
                    for(i = 0; i < 2; i++)
                    {
                        BBavg[i] = 2.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][0][rk_step];
                    }
                    for(i = 0; i < 3; i++)
                    {
                        BBavg[i+2] = 2.0/3.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][1][rk_step];
                    }
                    for(i = 0; i < 3; i++)
                    {
                        BBavg[i+5] = 2.0/5.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][2][rk_step];
                    }
                }
 
                //// Now constrained least square 
                M = 3; N = 9; P = 8; LDA = M; LDB = P; LWORK = M+N+P;
                // M: number of least square eqn.
                // N: number of unknown, 
                // P: number of constraint eqn.
                l = 0;
                for(j = 0; j < N; j++)
                {
                    for(i = 0; i < M; i++)
                    {
                        AA[l] = tmpA[i][j];
                        l++;
                    }
                }
                l = 0;
                for(j = 0; j < N; j++)
                {
                    for(i = 0; i < P; i++)
                    {
                        BB[l] = tmpB[i][j];
                        l++;
                    }
                }
                for(i = 0; i < M; i++)  // right side for the least square part
                    CC[i] = Bavg[i];
                for(i = 0; i < P; i++)  // right side for the constrained part
                    DD[i] = BBavg[i];
                FORTRAN_NAME(dgglse)(&M, &N, &P, AA, &LDA, ///// int M,int N,int P, double array A, int LDA
                                     BB, &LDB, CC, DD, XX[0], ///// double array B, int LDB, double array C, D, X,
                                     work, &LWORK, &INFO);
            }
        }

        if(alltri_HR_sten == NULL)
        {
            {
                N_edge = Mag_p2_6edge_central_sten_new(tri, tris, Edge_side);

                // first setup least square eqn for edge 3, 4 and 5
                for(i = 3; i < N_edge; i++)
                {
                    pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                    pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                    for(j = 0; j < dim; j++)
                        t[j] = fg_side_vector(tris[i])[Edge_side[i]][j];
                    nor[0] = t[1];
                    nor[1] = -t[0];

                    nx = nor[0]; ny = nor[1];
                    x0 = cent[0]; y0 = cent[1];
                    v0x = pcrds[0][0]; v0y = pcrds[0][1];
                    v1x = pcrds[1][0]; v1y = pcrds[1][1];
                    v0x2 = sqr(v0x); v0y2 = sqr(v0y);
                    v1x2 = sqr(v1x); v1y2 = sqr(v1y);

                    A[i-3][0] = 2.0*nx;
                    A[i-3][1] = nx*(v0x + v1x - 2.0*x0);
                    A[i-3][2] = nx*(v0y + v1y - 2.0*y0);
                    A[i-3][3] = 2.0*nx*(v0x2/3.0 + v0x*v1x/3.0 + v1x2/3.0 - v0x*x0 - v1x*x0 + sqr(x0));
                    A[i-3][4] = nx*(2.0*v0x*v0y/3.0 + v0y*v1x/3.0 + v0x*v1y/3.0 + 2.0*v1x*v1y/3.0
                                       - v0y*x0 - v1y*x0 - v0x*y0 - v1x*y0 + 2.0*x0*y0);
                    A[i-3][5] = 2.0*nx*(v0y2/3.0 + v0y*v1y/3.0 + v1y2/3.0 - v0y*y0 - v1y*y0 + sqr(y0));

                    A[i-3][6] = 2.0*ny;    // b_0
                    A[i-3][7] = ny*(v0x + v1x - 2.0*x0);  // b_1
                    A[i-3][1] -= ny*(v0y + v1y - 2.0*y0);    // b_2 = - a_1
                    A[i-3][8] = 2.0*ny*(v0x2/3.0 + v0x*v1x/3.0 + v1x2/3.0 - v0x*x0 - v1x*x0 + sqr(x0));   // b_3
                    A[i-3][3] -= 2.0*ny*(2.0*v0x*v0y/3.0 + v0y*v1x/3.0 + v0x*v1y/3.0 + 2.0*v1x*v1y/3.0
                                             - v0y*x0 - v1y*x0 - v0x*y0 - v1x*y0 + 2.0*x0*y0);  // b_4 = -2 a_3
                    A[i-3][4] -= 0.5*2.0*ny*(v0y2/3.0 + v0y*v1y/3.0 + v1y2/3.0 - v0y*y0 - v1y*y0 + sqr(y0));  // b_5 = -0.5 a_4

                    if(rk_step == RK_STEP)
                    {
                        Bavg[i-3] = 2.0*fg_side_dgB(tris[i])[Edge_side[i]][0];
                    }
                    else
                    {
                        Bavg[i-3] = 2.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][0][rk_step];
                    }
                }

                // Now setup constraint eqn for edge 0, 1 and 2
                for(i = 0; i < 2; i++)
                {
                    pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                    pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                    for(j = 0; j < dim; j++)
                        t[j] = fg_side_vector(tris[i])[Edge_side[i]][j];
                    nor[0] = t[1];
                    nor[1] = -t[0];

                    nx = nor[0]; ny = nor[1];
                    x0 = cent[0]; y0 = cent[1];
                    v0x = pcrds[0][0]; v0y = pcrds[0][1];
                    v1x = pcrds[1][0]; v1y = pcrds[1][1];
                    v0x2 = sqr(v0x); v0y2 = sqr(v0y);
                    v1x2 = sqr(v1x); v1y2 = sqr(v1y);

                    B[i][0] = 2.0*nx;
                    B[i][1] = nx*(v0x + v1x - 2.0*x0);
                    B[i][2] = nx*(v0y + v1y - 2.0*y0);
                    B[i][3] = 2.0*nx*(v0x2/3.0 + v0x*v1x/3.0 + v1x2/3.0 - v0x*x0 - v1x*x0 + sqr(x0));
                    B[i][4] = nx*(2.0*v0x*v0y/3.0 + v0y*v1x/3.0 + v0x*v1y/3.0 + 2.0*v1x*v1y/3.0
                                        - v0y*x0 - v1y*x0 - v0x*y0 - v1x*y0 + 2.0*x0*y0);
                    B[i][5] = 2.0*nx*(v0y2/3.0 + v0y*v1y/3.0 + v1y2/3.0 - v0y*y0 - v1y*y0 + sqr(y0));

                    B[i][6] = 2.0*ny;    // b_0
                    B[i][7] = ny*(v0x + v1x - 2.0*x0);  // b_1
                    B[i][1] -= ny*(v0y + v1y - 2.0*y0);    // b_2 = - a_1
                    B[i][8] = 2.0*ny*(v0x2/3.0 + v0x*v1x/3.0 + v1x2/3.0 - v0x*x0 - v1x*x0 + sqr(x0));   // b_3
                    B[i][3] -= 2.0*ny*(2.0*v0x*v0y/3.0 + v0y*v1x/3.0 + v0x*v1y/3.0 + 2.0*v1x*v1y/3.0
                                             - v0y*x0 - v1y*x0 - v0x*y0 - v1x*y0 + 2.0*x0*y0);  // b_4 = -2 a_3
                    B[i][4] -= 0.5*2.0*ny*(v0y2/3.0 + v0y*v1y/3.0 + v1y2/3.0 - v0y*y0 - v1y*y0 + sqr(y0));  // b_5 = -0.5 a_4

                    if(rk_step == RK_STEP)
                    {
                        BBavg[i] = 2.0*fg_side_dgB(tris[i])[Edge_side[i]][0];
                    }
                    else
                    {
                        BBavg[i] = 2.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][0][rk_step];
                    }
                }

                ////// first slope
                for(i = 0; i < 3; i++)
                {
                    pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                    pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                    for(j = 0; j < dim; j++)
                        t[j] = fg_side_vector(tris[i])[Edge_side[i]][j];
                    nor[0] = t[1];
                    nor[1] = -t[0];

                    nx = nor[0]; ny = nor[1];
                    x0 = cent[0]; y0 = cent[1];
                    v0x = pcrds[0][0]; v0y = pcrds[0][1];
                    v1x = pcrds[1][0]; v1y = pcrds[1][1];
                    v0x2 = sqr(v0x); v0y2 = sqr(v0y);
                    v1x2 = sqr(v1x); v1y2 = sqr(v1y);

                    B[i+2][0] = 0.0;
                    B[i+2][1] = nx*(v1x - v0x)/3.0;
                    B[i+2][2] = nx*(v1y - v0y)/3.0;
                    B[i+2][3] = nx*(v1x2 - v0x2 + 2.0*v0x*x0 - 2.0*v1x*x0)/3.0;
                    B[i+2][4] = nx*(v1x*v1y - v0x*v0y + v0y*x0 - v1y*x0 + v0x*y0 - v1x*y0)/3.0;
                    B[i+2][5] = nx*(v1y2 - v0y2 + 2.0*v0y*y0 - 2.0*v1y*y0)/3.0;

                    B[i+2][6] = 0.0;    // b_0
                    B[i+2][7] = ny*(v1x - v0x)/3.0;    // b_1
                    B[i+2][1] -= ny*(v1y - v0y)/3.0;    // b_2 = - a_1
                    B[i+2][8] = ny*(v1x2 - v0x2 + 2.0*v0x*x0 - 2.0*v1x*x0)/3.0;    // b_3
                    B[i+2][3] -= 2.0*ny*(v1x*v1y - v0x*v0y + v0y*x0 - v1y*x0 + v0x*y0 - v1x*y0)/3.0;    // b_4 = -2 a_3
                    B[i+2][4] -= 0.5*ny*(v1y2 - v0y2 + 2.0*v0y*y0 - 2.0*v1y*y0)/3.0;  // b_5 = -0.5 a_4

                    if(rk_step == RK_STEP)
                    {
                        BBavg[i+2] = 2.0/3.0*fg_side_dgB(tris[i])[Edge_side[i]][1];
                    }
                    else
                    {
                        BBavg[i+2] = 2.0/3.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][1][rk_step];
                    }
                }

                ////// second slope
                for(i = 0; i < 3; i++)
                {
                    pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                    pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                    for(j = 0; j < dim; j++)
                        t[j] = fg_side_vector(tris[i])[Edge_side[i]][j];
                    nor[0] = t[1];
                    nor[1] = -t[0];

                    nx = nor[0]; ny = nor[1];
                    x0 = cent[0]; y0 = cent[1];
                    v0x = pcrds[0][0]; v0y = pcrds[0][1];
                    v1x = pcrds[1][0]; v1y = pcrds[1][1];
                    v0x2 = sqr(v0x); v0y2 = sqr(v0y);
                    v1x2 = sqr(v1x); v1y2 = sqr(v1y);

                    B[i+5][0] = 0.0;
                    B[i+5][1] = 0.0;
                    B[i+5][2] = 0.0;
                    B[i+5][3] = nx*sqr(v0x - v1x)/15.0;
                    B[i+5][4] = nx*(v0x - v1x)*(v0y - v1y)/15.0;
                    B[i+5][5] = nx*sqr(v0y - v1y)/15.0;

                    B[i+5][6] = 0.0;    // b_0
                    B[i+5][7] = 0.0;    // b_1
                    B[i+5][1] -= 0.0;    // b_2 = - a_1
                    B[i+5][8] = ny*sqr(v0x - v1x)/15.0;    // b_3
                    B[i+5][3] -= 2.0*ny*(v0x - v1x)*(v0y - v1y)/15.0;    // b_4 = -2 a_3
                    B[i+5][4] -= 0.5*ny*sqr(v0y - v1y)/15.0;  // b_5 = -0.5 a_4

                    if(rk_step == RK_STEP)
                    {
                        BBavg[i+5] = 2.0/5.0*fg_side_dgB(tris[i])[Edge_side[i]][2];
                    }
                    else
                    {
                        BBavg[i+5] = 2.0/5.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][2][rk_step];
                    }
                }

                //// Now constrained least square 
                M = 3; N = 9; P = 8; LDA = M; LDB = P; LWORK = M+N+P;
                // N: number of unknown, M: number of least square eqn.
                // P: number of constraint eqn.
                l = 0;
                for(j = 0; j < N; j++)
                {
                    for(i = 0; i < M; i++)
                    {
                        AA[l] = A[i][j];
                        l++;
                    }
                }
                l = 0;
                for(j = 0; j < N; j++)
                {
                    for(i = 0; i < P; i++)
                    {
                        BB[l] = B[i][j];
                        l++;
                    }
                }
                for(i = 0; i < M; i++)  // right side for the least square part
                    CC[i] = Bavg[i];
                for(i = 0; i < P; i++)  // right side for the constrained part
                    DD[i] = BBavg[i];
                FORTRAN_NAME(dgglse)(&M, &N, &P, AA, &LDA, ///// int M,int N,int P, double array A, int LDA
                                     BB, &LDB, CC, DD, XX[0], ///// double array B, int LDB, double array C, D, X,
                                     work, &LWORK, &INFO);
            }
        }

        for(i =0; i < MAX_N_COEF; i++)
            ansx[0][i] = XX[0][i];
        ansy[0][0] = XX[0][6];          // b0
        ansy[0][1] = XX[0][7];          // b1
        ansy[0][2] = -ansx[0][1];       // b2;  b_2 = - a_1
        ansy[0][3] = XX[0][8];          // b3
        ansy[0][4] = -2.0*ansx[0][3];   // b4;  b_4 = -2*a_3
        ansy[0][5] = -0.5*ansx[0][4];   // b5;  b_5 = -0.5*a_4

        if(rk_step == RK_STEP)
            st = tri->st;
        else
            st = midsoln[tri->id].st[rk_step];

        for(i = 0; i < MAX_N_COEF; i++)
        {
            dg_B(st)[0][i] = ansx[0][i];
            dg_B(st)[1][i] = ansy[0][i];
        }

        Mag(st)[0] = Mag(st)[1] = 0.0;
        for(i= 0; i< MAX_N_COEF; i++)
        {
            Mag(st)[0] += dg_B(st)[0][i]*Lmass_matrix[0][i];
            Mag(st)[1] += dg_B(st)[1][i]*Lmass_matrix[0][i];
        }
        for(i = 0; i < 2; i++)
            Mag(st)[i] /= Lmass_matrix[0][0];
}


/* use condition: b_2 = -a_1
                  b_4 = -2 a_3
                  b_5 = -0.5*a_4
   to reduce unknowns to 9 variables. 
   They are: a0, a1, a2, a3, a4, a5, b0, b1, b3.
   We DO NOT conserve the Magnetic field during the reconstruction.
   Note that the solution is represented as:
   Bx = a0 + a1(x-x_c) + a2(y - y_c) ...
   By = b0 + b1(x-x_c) + b2(y - y_c) ...
   which is the Taylor expansion about tri center.
*/
LOCAL int tri_B_P2_polynomial_reconstruction_new(
         TRI       *tri,   
         Mid_soln *midsoln,  
         Limiting_store **limit_store,
         int      rk_step)
{
        TRI       *Bnbtri[3], *tris[30], *nbtri;
        double    AA[300], BB[300], CC[300], DD[300], XX[8][300], work[300], tmp;
        static double   **A = NULL, **invA = NULL, **I, eps = 1.0e-6, **B;
        int       dim = 2, i, j, k, M, N, P, l, LDA, LDB, LWORK, INFO, indx, debug = NO, cv_indx;
        int       sten_indx, edge_indx[3][9], e_indx, N_cell, N_STEN;
        Locstate  st, nbst[3], st2;
        double    Bavg[30], *pcrds[3], dy, dx, len, cellavgB[2], xx1, yy1, sum, BBavg[10];
        int       side, tmp_side, Edge_side[30], N_edge, num_tris_vertex;
        double    **Lmass_matrix = tri->Lmass_matrix, *cent = fg_centroid(tri), **tmpA, **tmpB;
        double    OIx[8], OIy[8], alpha[8], wei[8], ansx[8][30], ansy[8][30], MB[2][12];
        int       use_central_sten = YES;
        float     nor[3], t[3];        
        float     nx, ny, v0x, v1x, v0y, v1y, x0, y0, v0x2, v1x2, v0y2, v1y2;
        int       irow;

        if(invA == NULL)
        {
            matrix(&(A), 12, 12, sizeof(double));
            matrix(&(I), 12, 12, sizeof(double));
            matrix(&(invA), 12, 12, sizeof(double));
            matrix(&(B), 2, 12, sizeof(double));
        }

        for(side = 0; side < 3; side++)
        {
            Bnbtri[side] = Tri_on_side(tri,side);
            pcrds[side] = Coords(Point_of_tri(tri)[side]);
        }

        if(rk_step == RK_STEP)
            st = tri->st;
        else
            st = midsoln[tri->id].st[rk_step];

        for(i = 0; i < 2; i++)
            cellavgB[i] = Mag(st)[i];

        if(alltri_HR_sten != NULL && alltri_HR_sten[tri->id].Bsten_set == NO)
        {
            alltri_HR_sten[tri->id].Bsten_set = YES;

            ///////////////////////////////////////
            ///start central stencil reconstruction--- alltri_HR_sten != NULL && Bsten_set == NO
            ///////////////////////////////////////
            if(YES == use_central_sten)
            {
                for(side = 0; side < 3; side++)
                {
                    count_num_tris_vertex(tri,Point_of_tri(tri)[side], &num_tris_vertex);
                    if(num_tris_vertex <= 5)
                    {
                        use_central_sten = NO;
                        break;
                    } 
                }
            }
            if(YES == use_central_sten)
            {
                // use p1 central stencil for p2 reconstruction
                //N_edge = Mag_p2_11edge_central_sten(tri, tris, Edge_side);
                N_edge = Mag_p2_6edge_central_sten_new(tri, tris, Edge_side);
                matrix(&tmpA,3,9,sizeof(double));
                matrix(&tmpB,8,9,sizeof(double));
                alltri_HR_sten[tri->id].MB_A = tmpA;
                alltri_HR_sten[tri->id].MB_B = tmpB;
                // first setup least square eqn for edge 3, 4 and 5
                // only use cell average
                for(i = 3; i < N_edge; i++)
                {
                    pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                    pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                    for(j = 0; j < dim; j++)
                        t[j] = fg_side_vector(tris[i])[Edge_side[i]][j];
                    nor[0] = t[1];
                    nor[1] = -t[0];

                    nx = nor[0]; ny = nor[1];
                    x0 = cent[0]; y0 = cent[1];
                    v0x = pcrds[0][0]; v0y = pcrds[0][1];
                    v1x = pcrds[1][0]; v1y = pcrds[1][1];
                    v0x2 = sqr(v0x); v0y2 = sqr(v0y);
                    v1x2 = sqr(v1x); v1y2 = sqr(v1y);

                    // eqn 0 for edge 3
                    // eqn 1 for edge 4
                    // eqn 2 for edge 5
                    tmpA[i-3][0] = 2.0*nx;
                    tmpA[i-3][1] = nx*(v0x + v1x - 2.0*x0);
                    tmpA[i-3][2] = nx*(v0y + v1y - 2.0*y0);
                    tmpA[i-3][3] = 2.0*nx*(v0x2/3.0 + v0x*v1x/3.0 + v1x2/3.0 - v0x*x0 - v1x*x0 + sqr(x0));
                    tmpA[i-3][4] = nx*(2.0*v0x*v0y/3.0 + v0y*v1x/3.0 + v0x*v1y/3.0 + 2.0*v1x*v1y/3.0 
                                       - v0y*x0 - v1y*x0 - v0x*y0 - v1x*y0 + 2.0*x0*y0);
                    tmpA[i-3][5] = 2.0*nx*(v0y2/3.0 + v0y*v1y/3.0 + v1y2/3.0 - v0y*y0 - v1y*y0 + sqr(y0));

                    tmpA[i-3][6] = 2.0*ny;    // b_0
                    tmpA[i-3][7] = ny*(v0x + v1x - 2.0*x0);  // b_1
                    tmpA[i-3][1] -= ny*(v0y + v1y - 2.0*y0);    // b_2 = - a_1
                    tmpA[i-3][8] = 2.0*ny*(v0x2/3.0 + v0x*v1x/3.0 + v1x2/3.0 - v0x*x0 - v1x*x0 + sqr(x0));   // b_3
                    tmpA[i-3][3] -= 2.0*ny*(2.0*v0x*v0y/3.0 + v0y*v1x/3.0 + v0x*v1y/3.0 + 2.0*v1x*v1y/3.0
                                             - v0y*x0 - v1y*x0 - v0x*y0 - v1x*y0 + 2.0*x0*y0);  // b_4 = -2 a_3
                    tmpA[i-3][4] -= 0.5*2.0*ny*(v0y2/3.0 + v0y*v1y/3.0 + v1y2/3.0 - v0y*y0 - v1y*y0 + sqr(y0));  // b_5 = -0.5 a_4

                    if(rk_step == RK_STEP)
                    {
                        Bavg[i-3] = 2.0*fg_side_dgB(tris[i])[Edge_side[i]][0];
                    }
                    else
                    {
                        Bavg[i-3] = 2.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][0];
                    }

                    ////For first slop // Not used now
                    /****
                    irow ++;
                    tmpA[irow][0] = 0.0;
                    tmpA[irow][1] = nx*(v1x - v0x)/3.0;
                    tmpA[irow][2] = nx*(v1y - v0y)/3.0;
                    tmpA[irow][3] = nx*(v1x2 - v0x2 + 2.0*v0x*x0 - 2.0*v1x*x0)/3.0;
                    tmpA[irow][4] = nx*(v1x*v1y - v0x*v0y + v0y*x0 - v1y*x0 + v0x*y0 - v1x*y0)/3.0;
                    tmpA[irow][5] = nx*(v1y2 - v0y2 + 2.0*v0y*y0 - 2.0*v1y*y0)/3.0;

                    tmpA[irow][6] = 0.0;    // b_0
                    tmpA[irow][7] = ny*(v1x - v0x)/3.0;    // b_1
                    tmpA[irow][1] -= ny*(v1y - v0y)/3.0;    // b_2 = - a_1
                    tmpA[irow][8] = ny*(v1x2 - v0x2 + 2.0*v0x*x0 - 2.0*v1x*x0)/3.0;    // b_3
                    tmpA[irow][3] -= 2.0*ny*(v1x*v1y - v0x*v0y + v0y*x0 - v1y*x0 + v0x*y0 - v1x*y0)/3.0;    // b_4 = -2 a_3
                    tmpA[irow][4] -= 0.5*ny*(v1y2 - v0y2 + 2.0*v0y*y0 - 2.0*v1y*y0)/3.0;  // b_5 = -0.5 a_4

                    if(rk_step == RK_STEP)
                    {
                        Bavg[irow] = 2.0/3.0*fg_side_dgB(tris[i])[Edge_side[i]][1];
                    }
                    else
                    {
                        Bavg[irow] = 2.0/3.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][1];
                    }
                    ****/
                }

                // Now setup constraint eqn for edge 0, 1 and 2
                for(i = 0; i < 3; i++)
                {
                    pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                    pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                    for(j = 0; j < dim; j++)
                        t[j] = fg_side_vector(tris[i])[Edge_side[i]][j];
                    nor[0] = t[1];
                    nor[1] = -t[0];

                    nx = nor[0]; ny = nor[1];
                    x0 = cent[0]; y0 = cent[1];
                    v0x = pcrds[0][0]; v0y = pcrds[0][1];
                    v1x = pcrds[1][0]; v1y = pcrds[1][1];
                    v0x2 = sqr(v0x); v0y2 = sqr(v0y);
                    v1x2 = sqr(v1x); v1y2 = sqr(v1y);

                    //eqn 0, 1 and 2 for edge 0
                    //eqn 3, 4 and 5 for edge 1
                    //eqn 6 and 7 for edge 2
                    ////// average value
                    irow = 3*i;
                    if( i==0 || i==1 )
                    {
                        tmpB[irow][0] = 2.0*nx;
                        tmpB[irow][1] = nx*(v0x + v1x - 2.0*x0);
                        tmpB[irow][2] = nx*(v0y + v1y - 2.0*y0);
                        tmpB[irow][3] = 2.0*nx*(v0x2/3.0 + v0x*v1x/3.0 + v1x2/3.0 - v0x*x0 - v1x*x0 + sqr(x0));
                        tmpB[irow][4] = nx*(2.0*v0x*v0y/3.0 + v0y*v1x/3.0 + v0x*v1y/3.0 + 2.0*v1x*v1y/3.0
                                            - v0y*x0 - v1y*x0 - v0x*y0 - v1x*y0 + 2.0*x0*y0);
                        tmpB[irow][5] = 2.0*nx*(v0y2/3.0 + v0y*v1y/3.0 + v1y2/3.0 - v0y*y0 - v1y*y0 + sqr(y0));

                        tmpB[irow][6] = 2.0*ny;    // b_0
                        tmpB[irow][7] = ny*(v0x + v1x - 2.0*x0);  // b_1
                        tmpB[irow][1] -= ny*(v0y + v1y - 2.0*y0);    // b_2 = - a_1
                        tmpB[irow][8] = 2.0*ny*(v0x2/3.0 + v0x*v1x/3.0 + v1x2/3.0 - v0x*x0 - v1x*x0 + sqr(x0));   // b_3
                        tmpB[irow][3] -= 2.0*ny*(2.0*v0x*v0y/3.0 + v0y*v1x/3.0 + v0x*v1y/3.0 + 2.0*v1x*v1y/3.0
                                                 - v0y*x0 - v1y*x0 - v0x*y0 - v1x*y0 + 2.0*x0*y0);  // b_4 = -2 a_3
                        tmpB[irow][4] -= 0.5*2.0*ny*(v0y2/3.0 + v0y*v1y/3.0 + v1y2/3.0 - v0y*y0 - v1y*y0 + sqr(y0));  // b_5 = -0.5 a_4

                        if(rk_step == RK_STEP)
                        {
                            BBavg[irow] = 2.0*fg_side_dgB(tris[i])[Edge_side[i]][0];
                        }
                        else
                        {
                            BBavg[irow] = 2.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][0];
                        }
                    }

                    ////// first slope
                    irow ++;
                    if(i==2) irow = 6;
                    tmpB[irow][0] = 0.0;
                    tmpB[irow][1] = nx*(v1x - v0x)/3.0;
                    tmpB[irow][2] = nx*(v1y - v0y)/3.0;
                    tmpB[irow][3] = nx*(v1x2 - v0x2 + 2.0*v0x*x0 - 2.0*v1x*x0)/3.0;
                    tmpB[irow][4] = nx*(v1x*v1y - v0x*v0y + v0y*x0 - v1y*x0 + v0x*y0 - v1x*y0)/3.0;
                    tmpB[irow][5] = nx*(v1y2 - v0y2 + 2.0*v0y*y0 - 2.0*v1y*y0)/3.0;

                    tmpB[irow][6] = 0.0;    // b_0
                    tmpB[irow][7] = ny*(v1x - v0x)/3.0;    // b_1
                    tmpB[irow][1] -= ny*(v1y - v0y)/3.0;    // b_2 = - a_1
                    tmpB[irow][8] = ny*(v1x2 - v0x2 + 2.0*v0x*x0 - 2.0*v1x*x0)/3.0;    // b_3
                    tmpB[irow][3] -= 2.0*ny*(v1x*v1y - v0x*v0y + v0y*x0 - v1y*x0 + v0x*y0 - v1x*y0)/3.0;    // b_4 = -2 a_3
                    tmpB[irow][4] -= 0.5*ny*(v1y2 - v0y2 + 2.0*v0y*y0 - 2.0*v1y*y0)/3.0;  // b_5 = -0.5 a_4

                    if(rk_step == RK_STEP)
                    {
                        BBavg[irow] = 2.0/3.0*fg_side_dgB(tris[i])[Edge_side[i]][1];
                    }
                    else
                    {
                        BBavg[irow] = 2.0/3.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][1];
                    }

                    ////// second slope
                    irow ++;
                    if(i==2) irow = 7;
                    tmpB[irow][0] = 0.0;
                    tmpB[irow][1] = 0.0;
                    tmpB[irow][2] = 0.0;
                    tmpB[irow][3] = nx*sqr(v0x - v1x)/15.0;
                    tmpB[irow][4] = nx*(v0x - v1x)*(v0y - v1y)/15.0;
                    tmpB[irow][5] = nx*sqr(v0y - v1y)/15.0;

                    tmpB[irow][6] = 0.0;    // b_0
                    tmpB[irow][7] = 0.0;    // b_1
                    tmpB[irow][1] -= 0.0;    // b_2 = - a_1
                    tmpB[irow][8] = ny*sqr(v0x - v1x)/15.0;    // b_3
                    tmpB[irow][3] -= 2.0*ny*(v0x - v1x)*(v0y - v1y)/15.0;    // b_4 = -2 a_3
                    tmpB[irow][4] -= 0.5*ny*sqr(v0y - v1y)/15.0;  // b_5 = -0.5 a_4

                    if(rk_step == RK_STEP)
                    {
                        BBavg[irow] = 2.0/5.0*fg_side_dgB(tris[i])[Edge_side[i]][2];
                    }
                    else
                    {
                        BBavg[irow] = 2.0/5.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][2];
                    }
                }

                //// Now constrained least square 
                M = 3; N = 9; P = 8; LDA = M; LDB = P; LWORK = M+N+P;
                // N: number of unknown, M: number of least square eqn.
                // P: number of constraint eqn.
                l = 0;
                for(j = 0; j < N; j++)
                {
                    for(i = 0; i < M; i++)
                    {
                        AA[l] = tmpA[i][j];
                        l++;
                    }
                }
                l = 0;
                for(j = 0; j < N; j++)
                {
                    for(i = 0; i < P; i++)
                    {
                        BB[l] = tmpB[i][j];
                        l++;
                    }
                }
                for(i = 0; i < M; i++)  // right side for the least square part
                    CC[i] = Bavg[i];
                for(i = 0; i < P; i++)  // right side for the constrained part
                    DD[i] = BBavg[i];
                FORTRAN_NAME(dgglse)(&M, &N, &P, AA, &LDA, ///// int M,int N,int P, double array A, int LDA
                                     BB, &LDB, CC, DD, XX[6], ///// double array B, int LDB, double array C, D, X,
                                     work, &LWORK, &INFO);
            }
            ///////////////////////////////////////
            /// END: central stencil reconstruction -------alltri_HR_sten != NULL && Bsten_set == NO
            ///////////////////////////////////////

            ////////////////////////////////////////
            /// Start one-sided stencil reconstruction -------alltri_HR_sten != NULL && Bsten_set == NO
            ////////////////////////////////////////
            for(side = 0; side < 3; side++)
            {
                // matrix(&tmpA,10,9,sizeof(double));
                // alltri_HR_sten[tri->id].Bsten[side] = tmpA; 

                matrix(&tmpA,3,9,sizeof(double));
                matrix(&tmpB,8,9,sizeof(double));
                alltri_HR_sten[tri->id].MB_one_A[side] = tmpA;
                alltri_HR_sten[tri->id].MB_one_B[side] = tmpB;

                N_edge = Mag_p2_6edge_one_side_sten_new(tri,side,tris, Edge_side);

                //least square equations
                for(i = 3; i < N_edge; i++)
                {
                    pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                    pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                    for(j = 0; j < dim; j++)
                        t[j] = fg_side_vector(tris[i])[Edge_side[i]][j];
                    nor[0] = t[1];
                    nor[1] = -t[0];

                    nx = nor[0]; ny = nor[1];
                    x0 = cent[0]; y0 = cent[1];
                    v0x = pcrds[0][0]; v0y = pcrds[0][1];
                    v1x = pcrds[1][0]; v1y = pcrds[1][1];
                    v0x2 = sqr(v0x); v0y2 = sqr(v0y);
                    v1x2 = sqr(v1x); v1y2 = sqr(v1y);

                    //irow = 2*(i-3);
                    irow = i - 3;
                    tmpA[irow][0] = 2.0*nx;
                    tmpA[irow][1] = nx*(v0x + v1x - 2.0*x0);
                    tmpA[irow][2] = nx*(v0y + v1y - 2.0*y0);
                    tmpA[irow][3] = 2.0*nx*(v0x2/3.0 + v0x*v1x/3.0 + v1x2/3.0 - v0x*x0 - v1x*x0 + sqr(x0));
                    tmpA[irow][4] = nx*(2.0*v0x*v0y/3.0 + v0y*v1x/3.0 + v0x*v1y/3.0 + 2.0*v1x*v1y/3.0 
                                       - v0y*x0 - v1y*x0 - v0x*y0 - v1x*y0 + 2.0*x0*y0);
                    tmpA[irow][5] = 2.0*nx*(v0y2/3.0 + v0y*v1y/3.0 + v1y2/3.0 - v0y*y0 - v1y*y0 + sqr(y0));

                    tmpA[irow][6] = 2.0*ny;    // b_0
                    tmpA[irow][7] = ny*(v0x + v1x - 2.0*x0);  // b_1
                    tmpA[irow][1] -= ny*(v0y + v1y - 2.0*y0);    // b_2 = - a_1
                    tmpA[irow][8] = 2.0*ny*(v0x2/3.0 + v0x*v1x/3.0 + v1x2/3.0 - v0x*x0 - v1x*x0 + sqr(x0));   // b_3
                    tmpA[irow][3] -= 2.0*ny*(2.0*v0x*v0y/3.0 + v0y*v1x/3.0 + v0x*v1y/3.0 + 2.0*v1x*v1y/3.0
                                             - v0y*x0 - v1y*x0 - v0x*y0 - v1x*y0 + 2.0*x0*y0);  // b_4 = -2 a_3
                    tmpA[irow][4] -= 0.5*2.0*ny*(v0y2/3.0 + v0y*v1y/3.0 + v1y2/3.0 - v0y*y0 - v1y*y0 + sqr(y0));  // b_5 = -0.5 a_4

                    if(rk_step == RK_STEP)
                    {
                        Bavg[irow] = 2.0*fg_side_dgB(tris[i])[Edge_side[i]][0];
                    }
                    else
                    {
                        Bavg[irow] = 2.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][0];
                    }

                    /****
                    irow ++;
                    tmpA[irow][0] = 0.0;
                    tmpA[irow][1] = nx*(v1x - v0x)/3.0;
                    tmpA[irow][2] = nx*(v1y - v0y)/3.0;
                    tmpA[irow][3] = nx*(v1x2 - v0x2 + 2.0*v0x*x0 - 2.0*v1x*x0)/3.0;
                    tmpA[irow][4] = nx*(v1x*v1y - v0x*v0y + v0y*x0 - v1y*x0 + v0x*y0 - v1x*y0)/3.0;
                    tmpA[irow][5] = nx*(v1y2 - v0y2 + 2.0*v0y*y0 - 2.0*v1y*y0)/3.0;

                    tmpA[irow][6] = 0.0;    // b_0
                    tmpA[irow][7] = ny*(v1x - v0x)/3.0;    // b_1
                    tmpA[irow][1] -= ny*(v1y - v0y)/3.0;    // b_2 = - a_1
                    tmpA[irow][8] = ny*(v1x2 - v0x2 + 2.0*v0x*x0 - 2.0*v1x*x0)/3.0;    // b_3
                    tmpA[irow][3] -= 2.0*ny*(v1x*v1y - v0x*v0y + v0y*x0 - v1y*x0 + v0x*y0 - v1x*y0)/3.0;    // b_4 = -2 a_3
                    tmpA[irow][4] -= 0.5*ny*(v1y2 - v0y2 + 2.0*v0y*y0 - 2.0*v1y*y0)/3.0;  // b_5 = -0.5 a_4

                    if(rk_step == RK_STEP)
                    {
                        Bavg[irow] = 2.0/3.0*fg_side_dgB(tris[i])[Edge_side[i]][1];
                    }
                    else
                    {
                        Bavg[irow] = 2.0/3.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][1];
                    }
                    ****/
                }

                // Now setup constraint eqn.
                for(i = 0; i < 3; i++)
                {
                    pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                    pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                    for(j = 0; j < dim; j++)
                        t[j] = fg_side_vector(tris[i])[Edge_side[i]][j];
                    nor[0] = t[1];
                    nor[1] = -t[0];

                    nx = nor[0]; ny = nor[1];
                    x0 = cent[0]; y0 = cent[1];
                    v0x = pcrds[0][0]; v0y = pcrds[0][1];
                    v1x = pcrds[1][0]; v1y = pcrds[1][1];
                    v0x2 = sqr(v0x); v0y2 = sqr(v0y);
                    v1x2 = sqr(v1x); v1y2 = sqr(v1y);

                    // 0 1 2; 3 4 5; 6 7
                    // average
                    irow = 3*i;
                    if( i==0 || i==1 )
                    {
                        tmpB[irow][0] = 2.0*nx;
                        tmpB[irow][1] = nx*(v0x + v1x - 2.0*x0);
                        tmpB[irow][2] = nx*(v0y + v1y - 2.0*y0);
                        tmpB[irow][3] = 2.0*nx*(v0x2/3.0 + v0x*v1x/3.0 + v1x2/3.0 - v0x*x0 - v1x*x0 + sqr(x0));
                        tmpB[irow][4] = nx*(2.0*v0x*v0y/3.0 + v0y*v1x/3.0 + v0x*v1y/3.0 + 2.0*v1x*v1y/3.0
                                           - v0y*x0 - v1y*x0 - v0x*y0 - v1x*y0 + 2.0*x0*y0);
                        tmpB[irow][5] = 2.0*nx*(v0y2/3.0 + v0y*v1y/3.0 + v1y2/3.0 - v0y*y0 - v1y*y0 + sqr(y0));

                        tmpB[irow][6] = 2.0*ny;    // b_0
                        tmpB[irow][7] = ny*(v0x + v1x - 2.0*x0);  // b_1
                        tmpB[irow][1] -= ny*(v0y + v1y - 2.0*y0);    // b_2 = - a_1
                        tmpB[irow][8] = 2.0*ny*(v0x2/3.0 + v0x*v1x/3.0 + v1x2/3.0 - v0x*x0 - v1x*x0 + sqr(x0));   // b_3
                        tmpB[irow][3] -= 2.0*ny*(2.0*v0x*v0y/3.0 + v0y*v1x/3.0 + v0x*v1y/3.0 + 2.0*v1x*v1y/3.0
                                                 - v0y*x0 - v1y*x0 - v0x*y0 - v1x*y0 + 2.0*x0*y0);  // b_4 = -2 a_3
                        tmpB[irow][4] -= 0.5*2.0*ny*(v0y2/3.0 + v0y*v1y/3.0 + v1y2/3.0 - v0y*y0 - v1y*y0 + sqr(y0));  // b_5 = -0.5 a_4

                        if(rk_step == RK_STEP)
                        {
                            BBavg[irow] = 2.0*fg_side_dgB(tris[i])[Edge_side[i]][0];
                        }
                        else
                        {
                            BBavg[irow] = 2.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][0];
                        }
                    }

                    //first slope
                    irow ++;
                    if(i==2) irow = 6;
                    tmpB[irow][0] = 0.0;
                    tmpB[irow][1] = nx*(v1x - v0x)/3.0;
                    tmpB[irow][2] = nx*(v1y - v0y)/3.0;
                    tmpB[irow][3] = nx*(v1x2 - v0x2 + 2.0*v0x*x0 - 2.0*v1x*x0)/3.0;
                    tmpB[irow][4] = nx*(v1x*v1y - v0x*v0y + v0y*x0 - v1y*x0 + v0x*y0 - v1x*y0)/3.0;
                    tmpB[irow][5] = nx*(v1y2 - v0y2 + 2.0*v0y*y0 - 2.0*v1y*y0)/3.0;

                    tmpB[irow][6] = 0.0;    // b_0
                    tmpB[irow][7] = ny*(v1x - v0x)/3.0;    // b_1
                    tmpB[irow][1] -= ny*(v1y - v0y)/3.0;    // b_2 = - a_1
                    tmpB[irow][8] = ny*(v1x2 - v0x2 + 2.0*v0x*x0 - 2.0*v1x*x0)/3.0;    // b_3
                    tmpB[irow][3] -= 2.0*ny*(v1x*v1y - v0x*v0y + v0y*x0 - v1y*x0 + v0x*y0 - v1x*y0)/3.0;    // b_4 = -2 a_3
                    tmpB[irow][4] -= 0.5*ny*(v1y2 - v0y2 + 2.0*v0y*y0 - 2.0*v1y*y0)/3.0;  // b_5 = -0.5 a_4

                    if(rk_step == RK_STEP)
                    {
                        BBavg[irow] = 2.0/3.0*fg_side_dgB(tris[i])[Edge_side[i]][1];
                    }
                    else
                    {
                        BBavg[irow] = 2.0/3.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][1];
                    }

                    ////// second slope
                    irow ++;
                    if(i==2) irow = 7;
                    tmpB[irow][0] = 0.0;
                    tmpB[irow][1] = 0.0;
                    tmpB[irow][2] = 0.0;
                    tmpB[irow][3] = nx*sqr(v0x - v1x)/15.0;
                    tmpB[irow][4] = nx*(v0x - v1x)*(v0y - v1y)/15.0;
                    tmpB[irow][5] = nx*sqr(v0y - v1y)/15.0;

                    tmpB[irow][6] = 0.0;    // b_0
                    tmpB[irow][7] = 0.0;    // b_1
                    tmpB[irow][1] -= 0.0;    // b_2 = - a_1
                    tmpB[irow][8] = ny*sqr(v0x - v1x)/15.0;    // b_3
                    tmpB[irow][3] -= 2.0*ny*(v0x - v1x)*(v0y - v1y)/15.0;    // b_4 = -2 a_3
                    tmpB[irow][4] -= 0.5*ny*sqr(v0y - v1y)/15.0;  // b_5 = -0.5 a_4

                    if(rk_step == RK_STEP)
                    {
                        BBavg[irow] = 2.0/5.0*fg_side_dgB(tris[i])[Edge_side[i]][2];
                    }
                    else
                    {
                        BBavg[irow] = 2.0/5.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][2];
                    }
                }

                //// Now constrained least square 
                M = 3; N = 9; P = 8; LDA = M; LDB = P; LWORK = M+N+P;
                // N: number of unknown, M: number of least square eqn.
                // P: number of constraint eqn.
                l = 0;
                for(j = 0; j < N; j++)
                {
                    for(i = 0; i < M; i++)
                    {
                        AA[l] = tmpA[i][j];
                        l++;
                    }
                }
                l = 0;
                for(j = 0; j < N; j++)
                {
                    for(i = 0; i < P; i++)
                    {
                        BB[l] = tmpB[i][j];
                        l++;
                    }
                }
                for(i = 0; i < M; i++)  // right side for the least square part
                    CC[i] = Bavg[i];
                for(i = 0; i < P; i++)  // right side for the constrained part
                    DD[i] = BBavg[i];
                FORTRAN_NAME(dgglse)(&M, &N, &P, AA, &LDA, ///// int M,int N,int P, double array A, int LDA
                                     BB, &LDB, CC, DD, XX[side], ///// double array B, int LDB, double array C, D, X,
                                     work, &LWORK, &INFO);
            } // END of: for(side = 0; side < 3; side++)
            ////////////////////////////////////////
            /// END: one-sided stencil reconstruction -------alltri_HR_sten != NULL && Bsten_set == NO
            ////////////////////////////////////////

            ////////////////////////////////////////
            /// Start: reverse stencil reconstruction -------alltri_HR_sten != NULL && Bsten_set == NO
            ////////////////////////////////////////
            for(side = 0; side < 3; side++)
            {
                matrix(&tmpA,2,9,sizeof(double));
                matrix(&tmpB,8,9,sizeof(double));
                alltri_HR_sten[tri->id].Bsten_rev[side] = tmpA;
                alltri_HR_sten[tri->id].Bsten_rev_MB_B[side] = tmpB;

                count_num_tris_vertex(tri,Point_of_tri(tri)[(side+2)%3], &num_tris_vertex);
                if(num_tris_vertex == 5)
                {
                    // N_edge = Mag_p2_9edge_reverse_sten_5pt_vertex(tri,side,tris, Edge_side);
                    N_edge = Mag_p2_5edge_reverse_sten_5pt_vertex_new(tri,side,tris, Edge_side);
                }
                else if(num_tris_vertex < 5)
                {
                    printf("ERROR: tri_B_P2_polynomial_reconstruction_new, not enough tri at vertex\n");
                    clean_up(ERROR);
                }
                else
                {
                    // N_edge = Mag_p2_9edge_reverse_sten(tri,side,tris, Edge_side);
                    // N_edge = Mag_p2_6edge_reverse_sten_new(tri,side,tris, Edge_side);
                    N_edge = Mag_p2_5edge_reverse_sten_new(tri,side,tris, Edge_side);
                }

                for(i = 3; i < N_edge; i++)
                {
                    pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                    pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                    for(j = 0; j < dim; j++)
                        t[j] = fg_side_vector(tris[i])[Edge_side[i]][j];
                    nor[0] = t[1];
                    nor[1] = -t[0];

                    nx = nor[0]; ny = nor[1];
                    x0 = cent[0]; y0 = cent[1];
                    v0x = pcrds[0][0]; v0y = pcrds[0][1];
                    v1x = pcrds[1][0]; v1y = pcrds[1][1];
                    v0x2 = sqr(v0x); v0y2 = sqr(v0y);
                    v1x2 = sqr(v1x); v1y2 = sqr(v1y);

                    //irow = 2*(i-3);
                    irow = i - 3;
                    tmpA[irow][0] = 2.0*nx;
                    tmpA[irow][1] = nx*(v0x + v1x - 2.0*x0);
                    tmpA[irow][2] = nx*(v0y + v1y - 2.0*y0);
                    tmpA[irow][3] = 2.0*nx*(v0x2/3.0 + v0x*v1x/3.0 + v1x2/3.0 - v0x*x0 - v1x*x0 + sqr(x0));
                    tmpA[irow][4] = nx*(2.0*v0x*v0y/3.0 + v0y*v1x/3.0 + v0x*v1y/3.0 + 2.0*v1x*v1y/3.0 
                                       - v0y*x0 - v1y*x0 - v0x*y0 - v1x*y0 + 2.0*x0*y0);
                    tmpA[irow][5] = 2.0*nx*(v0y2/3.0 + v0y*v1y/3.0 + v1y2/3.0 - v0y*y0 - v1y*y0 + sqr(y0));

                    tmpA[irow][6] = 2.0*ny;    // b_0
                    tmpA[irow][7] = ny*(v0x + v1x - 2.0*x0);  // b_1
                    tmpA[irow][1] -= ny*(v0y + v1y - 2.0*y0);    // b_2 = - a_1
                    tmpA[irow][8] = 2.0*ny*(v0x2/3.0 + v0x*v1x/3.0 + v1x2/3.0 - v0x*x0 - v1x*x0 + sqr(x0));   // b_3
                    tmpA[irow][3] -= 2.0*ny*(2.0*v0x*v0y/3.0 + v0y*v1x/3.0 + v0x*v1y/3.0 + 2.0*v1x*v1y/3.0
                                             - v0y*x0 - v1y*x0 - v0x*y0 - v1x*y0 + 2.0*x0*y0);  // b_4 = -2 a_3
                    tmpA[irow][4] -= 0.5*2.0*ny*(v0y2/3.0 + v0y*v1y/3.0 + v1y2/3.0 - v0y*y0 - v1y*y0 + sqr(y0));  // b_5 = -0.5 a_4

                    if(rk_step == RK_STEP)
                    {
                        Bavg[irow] = 2.0*fg_side_dgB(tris[i])[Edge_side[i]][0];
                    }
                    else
                    {
                        Bavg[irow] = 2.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][0];
                    }

                    /****
                    irow ++;
                    tmpA[irow][0] = 0.0;
                    tmpA[irow][1] = nx*(v1x - v0x)/3.0;
                    tmpA[irow][2] = nx*(v1y - v0y)/3.0;
                    tmpA[irow][3] = nx*(v1x2 - v0x2 + 2.0*v0x*x0 - 2.0*v1x*x0)/3.0;
                    tmpA[irow][4] = nx*(v1x*v1y - v0x*v0y + v0y*x0 - v1y*x0 + v0x*y0 - v1x*y0)/3.0;
                    tmpA[irow][5] = nx*(v1y2 - v0y2 + 2.0*v0y*y0 - 2.0*v1y*y0)/3.0;

                    tmpA[irow][6] = 0.0;    // b_0
                    tmpA[irow][7] = ny*(v1x - v0x)/3.0;    // b_1
                    tmpA[irow][1] -= ny*(v1y - v0y)/3.0;    // b_2 = - a_1
                    tmpA[irow][8] = ny*(v1x2 - v0x2 + 2.0*v0x*x0 - 2.0*v1x*x0)/3.0;    // b_3
                    tmpA[irow][3] -= 2.0*ny*(v1x*v1y - v0x*v0y + v0y*x0 - v1y*x0 + v0x*y0 - v1x*y0)/3.0;    // b_4 = -2 a_3
                    tmpA[irow][4] -= 0.5*ny*(v1y2 - v0y2 + 2.0*v0y*y0 - 2.0*v1y*y0)/3.0;  // b_5 = -0.5 a_4

                    if(rk_step == RK_STEP)
                    {
                        Bavg[irow] = 2.0/3.0*fg_side_dgB(tris[i])[Edge_side[i]][1];
                    }
                    else
                    {
                        Bavg[irow] = 2.0/3.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][1];
                    }
                    ****/
                }

                // Now setup constraint eqn.
                for(i = 0; i < 3; i++)
                {
                    pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                    pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                    for(j = 0; j < dim; j++)
                        t[j] = fg_side_vector(tris[i])[Edge_side[i]][j];
                    nor[0] = t[1];
                    nor[1] = -t[0];

                    nx = nor[0]; ny = nor[1];
                    x0 = cent[0]; y0 = cent[1];
                    v0x = pcrds[0][0]; v0y = pcrds[0][1];
                    v1x = pcrds[1][0]; v1y = pcrds[1][1];
                    v0x2 = sqr(v0x); v0y2 = sqr(v0y);
                    v1x2 = sqr(v1x); v1y2 = sqr(v1y);

                    // 0 1 2; 3 4 5; 6 7
                    //// average
                    irow = 3*i;
                    if( i==0 || i==1 )
                    {
                        tmpB[irow][0] = 2.0*nx;
                        tmpB[irow][1] = nx*(v0x + v1x - 2.0*x0);
                        tmpB[irow][2] = nx*(v0y + v1y - 2.0*y0);
                        tmpB[irow][3] = 2.0*nx*(v0x2/3.0 + v0x*v1x/3.0 + v1x2/3.0 - v0x*x0 - v1x*x0 + sqr(x0));
                        tmpB[irow][4] = nx*(2.0*v0x*v0y/3.0 + v0y*v1x/3.0 + v0x*v1y/3.0 + 2.0*v1x*v1y/3.0
                                           - v0y*x0 - v1y*x0 - v0x*y0 - v1x*y0 + 2.0*x0*y0);
                        tmpB[irow][5] = 2.0*nx*(v0y2/3.0 + v0y*v1y/3.0 + v1y2/3.0 - v0y*y0 - v1y*y0 + sqr(y0));

                        tmpB[irow][6] = 2.0*ny;    // b_0
                        tmpB[irow][7] = ny*(v0x + v1x - 2.0*x0);  // b_1
                        tmpB[irow][1] -= ny*(v0y + v1y - 2.0*y0);    // b_2 = - a_1
                        tmpB[irow][8] = 2.0*ny*(v0x2/3.0 + v0x*v1x/3.0 + v1x2/3.0 - v0x*x0 - v1x*x0 + sqr(x0));   // b_3
                        tmpB[irow][3] -= 2.0*ny*(2.0*v0x*v0y/3.0 + v0y*v1x/3.0 + v0x*v1y/3.0 + 2.0*v1x*v1y/3.0
                                                 - v0y*x0 - v1y*x0 - v0x*y0 - v1x*y0 + 2.0*x0*y0);  // b_4 = -2 a_3
                        tmpB[irow][4] -= 0.5*2.0*ny*(v0y2/3.0 + v0y*v1y/3.0 + v1y2/3.0 - v0y*y0 - v1y*y0 + sqr(y0));  // b_5 = -0.5 a_4

                        if(rk_step == RK_STEP)
                        {
                            BBavg[irow] = 2.0*fg_side_dgB(tris[i])[Edge_side[i]][0];
                        }
                        else
                        {
                            BBavg[irow] = 2.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][0];
                        }
                    }

                    ///// first slope
                    irow ++;
                    if(i==2) irow = 6;
                    tmpB[irow][0] = 0.0;
                    tmpB[irow][1] = nx*(v1x - v0x)/3.0;
                    tmpB[irow][2] = nx*(v1y - v0y)/3.0;
                    tmpB[irow][3] = nx*(v1x2 - v0x2 + 2.0*v0x*x0 - 2.0*v1x*x0)/3.0;
                    tmpB[irow][4] = nx*(v1x*v1y - v0x*v0y + v0y*x0 - v1y*x0 + v0x*y0 - v1x*y0)/3.0;
                    tmpB[irow][5] = nx*(v1y2 - v0y2 + 2.0*v0y*y0 - 2.0*v1y*y0)/3.0;

                    tmpB[irow][6] = 0.0;    // b_0
                    tmpB[irow][7] = ny*(v1x - v0x)/3.0;    // b_1
                    tmpB[irow][1] -= ny*(v1y - v0y)/3.0;    // b_2 = - a_1
                    tmpB[irow][8] = ny*(v1x2 - v0x2 + 2.0*v0x*x0 - 2.0*v1x*x0)/3.0;    // b_3
                    tmpB[irow][3] -= 2.0*ny*(v1x*v1y - v0x*v0y + v0y*x0 - v1y*x0 + v0x*y0 - v1x*y0)/3.0;    // b_4 = -2 a_3
                    tmpB[irow][4] -= 0.5*ny*(v1y2 - v0y2 + 2.0*v0y*y0 - 2.0*v1y*y0)/3.0;  // b_5 = -0.5 a_4

                    if(rk_step == RK_STEP)
                    {
                        BBavg[irow] = 2.0/3.0*fg_side_dgB(tris[i])[Edge_side[i]][1];
                    }
                    else
                    {
                        BBavg[irow] = 2.0/3.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][1];
                    }

                    ////// second slope
                    irow ++;
                    if(i==2) irow = 7;
                    tmpB[irow][0] = 0.0;
                    tmpB[irow][1] = 0.0;
                    tmpB[irow][2] = 0.0;
                    tmpB[irow][3] = nx*sqr(v0x - v1x)/15.0;
                    tmpB[irow][4] = nx*(v0x - v1x)*(v0y - v1y)/15.0;
                    tmpB[irow][5] = nx*sqr(v0y - v1y)/15.0;

                    tmpB[irow][6] = 0.0;    // b_0
                    tmpB[irow][7] = 0.0;    // b_1
                    tmpB[irow][1] -= 0.0;    // b_2 = - a_1
                    tmpB[irow][8] = ny*sqr(v0x - v1x)/15.0;    // b_3
                    tmpB[irow][3] -= 2.0*ny*(v0x - v1x)*(v0y - v1y)/15.0;    // b_4 = -2 a_3
                    tmpB[irow][4] -= 0.5*ny*sqr(v0y - v1y)/15.0;  // b_5 = -0.5 a_4

                    if(rk_step == RK_STEP)
                    {
                        BBavg[irow] = 2.0/5.0*fg_side_dgB(tris[i])[Edge_side[i]][2];
                    }
                    else
                    {
                        BBavg[irow] = 2.0/5.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][2];
                    }
                }

                //// Now constrained least square 
                M = 2; N = 9; P = 8; LDA = M; LDB = P; LWORK = M+N+P;
                // N: number of unknown, M: number of least square eqn.
                // P: number of constraint eqn.
                /****
                if(num_tris_vertex == 5)
                {
                    M = 6; N = 9; P = 5; LDA = M; LDB = P; LWORK = M+N+P;
                }
                else if(num_tris_vertex < 5)
                {
                    printf("ERROR: tri_B_P2_polynomial_reconstruction_new, not enough tri at vertex\n");
                    clean_up(ERROR);
                }
                else
                {
                    M = 8; N = 9; P = 5; LDA = M; LDB = P; LWORK = M+N+P;
                }
                ****/

                l = 0;
                for(j = 0; j < N; j++)
                {
                    for(i = 0; i < M; i++)
                    {
                        AA[l] = tmpA[i][j];
                        l++;
                    }
                }
                l = 0;
                for(j = 0; j < N; j++)
                {
                    for(i = 0; i < P; i++)
                    {
                        BB[l] = tmpB[i][j];
                        l++;
                    }
                }
                for(i = 0; i < M; i++)  // right side for the least square part
                    CC[i] = Bavg[i];
                for(i = 0; i < P; i++)  // right side for the constrained part
                    DD[i] = BBavg[i];
                FORTRAN_NAME(dgglse)(&M, &N, &P, AA, &LDA, ///// int M,int N,int P, double array A, int LDA
                                     BB, &LDB, CC, DD, XX[side+3], ///// double array B, int LDB, double array C, D, X,
                                     work, &LWORK, &INFO);
            }
            ////////////////////////////////////////
            /// END: reverse stencil reconstruction -------alltri_HR_sten != NULL && Bsten_set == NO
            ////////////////////////////////////////
        }
        else if(alltri_HR_sten != NULL && alltri_HR_sten[tri->id].Bsten_set == YES)
        {
            ////////////////////////////////////////
            /// Start: central stencil reconstruction -------alltri_HR_sten != NULL && Bsten_set == YES
            ////////////////////////////////////////
            if(YES == use_central_sten)
            {
                for(side = 0; side < 3; side++)
                {
                    count_num_tris_vertex(tri,Point_of_tri(tri)[side], &num_tris_vertex);
                    if(num_tris_vertex <= 5)
                    {
                        use_central_sten = NO;
                        break;
                    }
                }
            }
            if(YES == use_central_sten)
            {
                //N_edge = Mag_p2_11edge_central_sten(tri, tris, Edge_side);
                N_edge = Mag_p2_6edge_central_sten_new(tri, tris, Edge_side);
                tmpA = alltri_HR_sten[tri->id].MB_A;
                tmpB = alltri_HR_sten[tri->id].MB_B;

                //least square
                if(rk_step == RK_STEP)
                {
                    for(i = 3; i < N_edge; i++)
                    {
                        // Bavg[i-2] = fg_side_B(tris[i])[Edge_side[i]];
                        //irow = 2*(i-3);
                        irow = i-3;
                        Bavg[irow] = 2.0*fg_side_dgB(tris[i])[Edge_side[i]][0];
                        //irow++;
                        //Bavg[irow] = 2.0/3.0*fg_side_dgB(tris[i])[Edge_side[i]][1];
                    }
                }
                else
                {
                    for(i = 3; i < N_edge; i++)
                    {
                        // Bavg[i-2] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                        //irow = 2*(i-3);
                        irow = i-3;
                        Bavg[irow] = 2.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][0];
                        // irow++;
                        // Bavg[irow] = 2.0/3.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][1];
                    }
                }

                //constraint eqns
                if(rk_step == RK_STEP)
                {
                    for(i = 0; i < 3; i++)
                    {
                        // BBavg[i] = fg_side_B(tris[i])[Edge_side[i]];
                        irow = 3*i;
                        if( i==0 || i==1 )
                            BBavg[irow] = 2.0*fg_side_dgB(tris[i])[Edge_side[i]][0];
                        irow++;
                        if(i==2) irow = 6;
                        BBavg[irow] = 2.0/3.0*fg_side_dgB(tris[i])[Edge_side[i]][1];
                        irow++;
                        if(i==2) irow = 7;
                        BBavg[irow] = 2.0/5.0*fg_side_dgB(tris[i])[Edge_side[i]][2];
                    }
                }
                else
                {
                    for(i = 0; i < 3; i++)
                    {
                        // BBavg[i] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                        irow = 2*i;
                        if( i==0 || i==1 )
                            BBavg[irow] = 2.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][0];
                        irow++;
                        if(i==2) irow = 6;
                        BBavg[irow] = 2.0/3.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][1];
                        irow++;
                        if(i==2) irow = 7;
                        BBavg[irow] = 2.0/5.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][2];
                    }
                }
 
                //// Now constrained least square 
                M = 3; N = 9; P = 8; LDA = M; LDB = P; LWORK = M+N+P;
                // M: number of least square eqn.
                // N: number of unknown, 
                // P: number of constraint eqn.
                l = 0;
                for(j = 0; j < N; j++)
                {
                    for(i = 0; i < M; i++)
                    {
                        AA[l] = tmpA[i][j];
                        l++;
                    }
                }
                l = 0;
                for(j = 0; j < N; j++)
                {
                    for(i = 0; i < P; i++)
                    {
                        BB[l] = tmpB[i][j];
                        l++;
                    }
                }
                for(i = 0; i < M; i++)  // right side for the least square part
                    CC[i] = Bavg[i];
                for(i = 0; i < P; i++)  // right side for the constrained part
                    DD[i] = BBavg[i];
                FORTRAN_NAME(dgglse)(&M, &N, &P, AA, &LDA, ///// int M,int N,int P, double array A, int LDA
                                     BB, &LDB, CC, DD, XX[6], ///// double array B, int LDB, double array C, D, X,
                                     work, &LWORK, &INFO);
            }
            ////////////////////////////////////////
            /// END: central stencil reconstruction-------alltri_HR_sten != NULL && Bsten_set == YES
            ////////////////////////////////////////

            ////////////////////////////////////////
            /// Start: one-sided stencil reconstruction------alltri_HR_sten != NULL && Bsten_set == YES
            ////////////////////////////////////////
            for(side = 0; side < 3; side++)
            {
                N_edge = Mag_p2_6edge_one_side_sten_new(tri, side, tris, Edge_side);
                tmpA = alltri_HR_sten[tri->id].MB_one_A[side];
                tmpB = alltri_HR_sten[tri->id].MB_one_B[side];

                //least square
                if(rk_step == RK_STEP)
                {
                    for(i = 3; i < N_edge; i++)
                    {
                        // Bavg[i-2] = fg_side_B(tris[i])[Edge_side[i]];
                        //irow = 2*(i-3);
                        irow = i-3;
                        Bavg[irow] = 2.0*fg_side_dgB(tris[i])[Edge_side[i]][0];
                        //irow++;
                        //Bavg[irow] = 2.0/3.0*fg_side_dgB(tris[i])[Edge_side[i]][1];
                    }
                }
                else
                {
                    for(i = 3; i < N_edge; i++)
                    {
                        // Bavg[i-2] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                        //irow = 2*(i-3);
                        irow = i-3;
                        Bavg[irow] = 2.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][0];
                        // irow++;
                        // Bavg[irow] = 2.0/3.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][1];
                    }
                }

                //constraint eqns
                if(rk_step == RK_STEP)
                {
                    for(i = 0; i < 3; i++)
                    {
                        // BBavg[i] = fg_side_B(tris[i])[Edge_side[i]];
                        irow = 3*i;
                        if( i==0 || i==1 )
                            BBavg[irow] = 2.0*fg_side_dgB(tris[i])[Edge_side[i]][0];
                        irow++;
                        if(i==2) irow = 6;
                        BBavg[irow] = 2.0/3.0*fg_side_dgB(tris[i])[Edge_side[i]][1];
                        irow++;
                        if(i==2) irow = 7;
                        BBavg[irow] = 2.0/5.0*fg_side_dgB(tris[i])[Edge_side[i]][2];
                    }
                }
                else
                {
                    for(i = 0; i < 3; i++)
                    {
                        // BBavg[i] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                        irow = 2*i;
                        if( i==0 || i==1 )
                            BBavg[irow] = 2.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][0];
                        irow++;
                        if(i==2) irow = 6;
                        BBavg[irow] = 2.0/3.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][1];
                        irow++;
                        if(i==2) irow = 7;
                        BBavg[irow] = 2.0/5.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][2];
                    }
                }


                //// Now constrained least square 
                M = 3; N = 9; P = 8; LDA = M; LDB = P; LWORK = M+N+P;
                // M: number of least square eqn.
                // N: number of unknown, 
                // P: number of constraint eqn.
                l = 0;
                for(j = 0; j < N; j++)
                {
                    for(i = 0; i < M; i++)
                    {
                        AA[l] = tmpA[i][j];
                        l++;
                    }
                }
                l = 0;
                for(j = 0; j < N; j++)
                {
                    for(i = 0; i < P; i++)
                    {
                        BB[l] = tmpB[i][j];
                        l++;
                    }
                }
                for(i = 0; i < M; i++)  // right side for the least square part
                    CC[i] = Bavg[i];
                for(i = 0; i < P; i++)  // right side for the constrained part
                    DD[i] = BBavg[i];
                FORTRAN_NAME(dgglse)(&M, &N, &P, AA, &LDA, ///// int M,int N,int P, double array A, int LDA
                                     BB, &LDB, CC, DD, XX[side], ///// double array B, int LDB, double array C, D, X,
                                     work, &LWORK, &INFO);
            }
            ////////////////////////////////////////
            /// END: one-sided stencil reconstruction -------alltri_HR_sten != NULL && Bsten_set == YES
            ////////////////////////////////////////

            ////////////////////////////////////////
            /// Start: reverse stencil reconstruction -------alltri_HR_sten != NULL && Bsten_set == YES
            ////////////////////////////////////////
            for(side = 0; side < 3; side++)
            {
                tmpA = alltri_HR_sten[tri->id].Bsten_rev[side];
                tmpB = alltri_HR_sten[tri->id].Bsten_rev_MB_B[side];
                count_num_tris_vertex(tri,Point_of_tri(tri)[(side+2)%3], &num_tris_vertex);
                if(num_tris_vertex == 5)
                {
                    // N_edge = Mag_p2_9edge_reverse_sten_5pt_vertex(tri,side,tris, Edge_side);
                    N_edge = Mag_p2_5edge_reverse_sten_5pt_vertex_new(tri,side,tris, Edge_side);
                }
                else if(num_tris_vertex < 5)
                {
                    printf("ERROR: tri_B_P2_polynomial_reconstruction_new, not enough tri at vertex\n");
                    clean_up(ERROR);
                }
                else
                {
                    // N_edge = Mag_p2_9edge_reverse_sten(tri,side,tris, Edge_side);
                    // N_edge = Mag_p2_6edge_reverse_sten_new(tri,side,tris, Edge_side);
                    N_edge = Mag_p2_5edge_reverse_sten_new(tri,side,tris, Edge_side);
                }

                //least square
                if(rk_step == RK_STEP)
                {
                    for(i = 3; i < N_edge; i++)
                    {
                        // Bavg[i-2] = fg_side_B(tris[i])[Edge_side[i]];
                        //irow = 2*(i-3);
                        irow = i-3;
                        Bavg[irow] = 2.0*fg_side_dgB(tris[i])[Edge_side[i]][0];
                        //irow++;
                        //Bavg[irow] = 2.0/3.0*fg_side_dgB(tris[i])[Edge_side[i]][1];
                    }
                }
                else
                {
                    for(i = 3; i < N_edge; i++)
                    {
                        // Bavg[i-2] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                        //irow = 2*(i-3);
                        irow = i-3;
                        Bavg[irow] = 2.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][0];
                        // irow++;
                        // Bavg[irow] = 2.0/3.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][1];
                    }
                }

                //constraint eqns
                if(rk_step == RK_STEP)
                {
                    for(i = 0; i < 3; i++)
                    {
                        // BBavg[i] = fg_side_B(tris[i])[Edge_side[i]];
                        irow = 3*i;
                        if( i==0 || i==1 )
                            BBavg[irow] = 2.0*fg_side_dgB(tris[i])[Edge_side[i]][0];
                        irow++;
                        if(i==2) irow = 6;
                        BBavg[irow] = 2.0/3.0*fg_side_dgB(tris[i])[Edge_side[i]][1];
                        irow++;
                        if(i==2) irow = 7;
                        BBavg[irow] = 2.0/5.0*fg_side_dgB(tris[i])[Edge_side[i]][2];
                    }
                }
                else
                {
                    for(i = 0; i < 3; i++)
                    {
                        // BBavg[i] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                        irow = 2*i;
                        if( i==0 || i==1 )
                            BBavg[irow] = 2.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][0];
                        irow++;
                        if(i==2) irow = 6;
                        BBavg[irow] = 2.0/3.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][1];
                        irow++;
                        if(i==2) irow = 7;
                        BBavg[irow] = 2.0/5.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][2];
                    }
                }


                //// Now constrained least square 
                M = 2; N = 9; P = 8; LDA = M; LDB = P; LWORK = M+N+P;
                // M: number of least square eqn.
                // N: number of unknown, 
                // P: number of constraint eqn.
                /****
                if(num_tris_vertex == 5)
                {
                    M = 6; N = 9; P = 5; LDA = M; LDB = P; LWORK = M+N+P;
                }
                else if(num_tris_vertex < 5)
                {
                    printf("ERROR: tri_B_P2_polynomial_reconstruction_new, not enough tri at vertex\n");
                    clean_up(ERROR);
                }
                else
                {
                    M = 8; N = 9; P = 5; LDA = M; LDB = P; LWORK = M+N+P;
                }
                ****/

                l = 0;
                for(j = 0; j < N; j++)
                {
                    for(i = 0; i < M; i++)
                    {
                        AA[l] = tmpA[i][j];
                        l++;
                    }
                }
                l = 0;
                for(j = 0; j < N; j++)
                {
                    for(i = 0; i < P; i++)
                    {
                        BB[l] = tmpB[i][j];
                        l++;
                    }
                }
                for(i = 0; i < M; i++)  // right side for the least square part
                    CC[i] = Bavg[i];
                for(i = 0; i < P; i++)  // right side for the constrained part
                    DD[i] = BBavg[i];
                FORTRAN_NAME(dgglse)(&M, &N, &P, AA, &LDA, ///// int M,int N,int P, double array A, int LDA
                                     BB, &LDB, CC, DD, XX[side+3], ///// double array B, int LDB, double array C, D, X,
                                     work, &LWORK, &INFO);
            }
            ////////////////////////////////////////
            /// END: reverse stencil reconstruction -------alltri_HR_sten != NULL && Bsten_set == YES
            ////////////////////////////////////////
        }

        if(alltri_HR_sten == NULL)
        {
            ////////////////////////////////////////
            /// Start: central stencil reconstruction ----alltri_HR_sten = NULL
            ////////////////////////////////////////
            if(YES == use_central_sten)
            {
                for(side = 0; side < 3; side++)
                {
                    count_num_tris_vertex(tri,Point_of_tri(tri)[side], &num_tris_vertex);
                    if(num_tris_vertex <= 5)
                    {
                        use_central_sten = NO;
                        break;
                    }
                }
            }

            if(YES == use_central_sten)
            {
                N_edge = Mag_p2_6edge_central_sten_new(tri, tris, Edge_side);

                // first setup least square eqn for edge 3, 4 and 5
                for(i = 3; i < N_edge; i++)
                {
                    pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                    pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                    for(j = 0; j < dim; j++)
                        t[j] = fg_side_vector(tris[i])[Edge_side[i]][j];
                    nor[0] = t[1];
                    nor[1] = -t[0];

                    nx = nor[0]; ny = nor[1];
                    x0 = cent[0]; y0 = cent[1];
                    v0x = pcrds[0][0]; v0y = pcrds[0][1];
                    v1x = pcrds[1][0]; v1y = pcrds[1][1];
                    v0x2 = sqr(v0x); v0y2 = sqr(v0y);
                    v1x2 = sqr(v1x); v1y2 = sqr(v1y);

                    // eqn 0 for edge 3
                    // eqn 1 for edge 4
                    // eqn 2 for edge 5
                    //irow = 2*(i-3);
                    irow = i-3;
                    tmpA[irow][0] = 2.0*nx;
                    tmpA[irow][1] = nx*(v0x + v1x - 2.0*x0);
                    tmpA[irow][2] = nx*(v0y + v1y - 2.0*y0);
                    tmpA[irow][3] = 2.0*nx*(v0x2/3.0 + v0x*v1x/3.0 + v1x2/3.0 - v0x*x0 - v1x*x0 + sqr(x0));
                    tmpA[irow][4] = nx*(2.0*v0x*v0y/3.0 + v0y*v1x/3.0 + v0x*v1y/3.0 + 2.0*v1x*v1y/3.0
                                       - v0y*x0 - v1y*x0 - v0x*y0 - v1x*y0 + 2.0*x0*y0);
                    tmpA[irow][5] = 2.0*nx*(v0y2/3.0 + v0y*v1y/3.0 + v1y2/3.0 - v0y*y0 - v1y*y0 + sqr(y0));

                    tmpA[irow][6] = 2.0*ny;    // b_0
                    tmpA[irow][7] = ny*(v0x + v1x - 2.0*x0);  // b_1
                    tmpA[irow][1] -= ny*(v0y + v1y - 2.0*y0);    // b_2 = - a_1
                    tmpA[irow][8] = 2.0*ny*(v0x2/3.0 + v0x*v1x/3.0 + v1x2/3.0 - v0x*x0 - v1x*x0 + sqr(x0));   // b_3
                    tmpA[irow][3] -= 2.0*ny*(2.0*v0x*v0y/3.0 + v0y*v1x/3.0 + v0x*v1y/3.0 + 2.0*v1x*v1y/3.0
                                             - v0y*x0 - v1y*x0 - v0x*y0 - v1x*y0 + 2.0*x0*y0);  // b_4 = -2 a_3
                    tmpA[irow][4] -= 0.5*2.0*ny*(v0y2/3.0 + v0y*v1y/3.0 + v1y2/3.0 - v0y*y0 - v1y*y0 + sqr(y0));  // b_5 = -0.5 a_4

                    if(rk_step == RK_STEP)
                    {
                        Bavg[irow] = 2.0*fg_side_dgB(tris[i])[Edge_side[i]][0];
                    }
                    else
                    {
                        Bavg[irow] = 2.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][0];
                    }

                    /****
                    irow ++;
                    tmpA[irow][0] = 0.0;
                    tmpA[irow][1] = nx*(v1x - v0x)/3.0;
                    tmpA[irow][2] = nx*(v1y - v0y)/3.0;
                    tmpA[irow][3] = nx*(v1x2 - v0x2 + 2.0*v0x*x0 - 2.0*v1x*x0)/3.0;
                    tmpA[irow][4] = nx*(v1x*v1y - v0x*v0y + v0y*x0 - v1y*x0 + v0x*y0 - v1x*y0)/3.0;
                    tmpA[irow][5] = nx*(v1y2 - v0y2 + 2.0*v0y*y0 - 2.0*v1y*y0)/3.0;

                    tmpA[irow][6] = 0.0;    // b_0
                    tmpA[irow][7] = ny*(v1x - v0x)/3.0;    // b_1
                    tmpA[irow][1] -= ny*(v1y - v0y)/3.0;    // b_2 = - a_1
                    tmpA[irow][8] = ny*(v1x2 - v0x2 + 2.0*v0x*x0 - 2.0*v1x*x0)/3.0;    // b_3
                    tmpA[irow][3] -= 2.0*ny*(v1x*v1y - v0x*v0y + v0y*x0 - v1y*x0 + v0x*y0 - v1x*y0)/3.0;    // b_4 = -2 a_3
                    tmpA[irow][4] -= 0.5*ny*(v1y2 - v0y2 + 2.0*v0y*y0 - 2.0*v1y*y0)/3.0;  // b_5 = -0.5 a_4

                    if(rk_step == RK_STEP)
                    {
                        Bavg[irow] = 2.0/3.0*fg_side_dgB(tris[i])[Edge_side[i]][1];
                    }
                    else
                    {
                        Bavg[irow] = 2.0/3.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][1];
                    }
                    ****/
                }

                // Now setup constraint eqn for edge 0, 1 and 2
                for(i = 0; i < 3; i++)
                {
                    pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                    pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                    for(j = 0; j < dim; j++)
                        t[j] = fg_side_vector(tris[i])[Edge_side[i]][j];
                    nor[0] = t[1];
                    nor[1] = -t[0];

                    nx = nor[0]; ny = nor[1];
                    x0 = cent[0]; y0 = cent[1];
                    v0x = pcrds[0][0]; v0y = pcrds[0][1];
                    v1x = pcrds[1][0]; v1y = pcrds[1][1];
                    v0x2 = sqr(v0x); v0y2 = sqr(v0y);
                    v1x2 = sqr(v1x); v1y2 = sqr(v1y);

                    //eqn 0 and 1 for edge 0
                    //eqn 2 and 3 for edge 1
                    //eqn 4 for edge 2
                    irow = 3*i;
                    if( i==0 || i==1 )
                    {
                        tmpB[irow][0] = 2.0*nx;
                        tmpB[irow][1] = nx*(v0x + v1x - 2.0*x0);
                        tmpB[irow][2] = nx*(v0y + v1y - 2.0*y0);
                        tmpB[irow][3] = 2.0*nx*(v0x2/3.0 + v0x*v1x/3.0 + v1x2/3.0 - v0x*x0 - v1x*x0 + sqr(x0));
                        tmpB[irow][4] = nx*(2.0*v0x*v0y/3.0 + v0y*v1x/3.0 + v0x*v1y/3.0 + 2.0*v1x*v1y/3.0
                                            - v0y*x0 - v1y*x0 - v0x*y0 - v1x*y0 + 2.0*x0*y0);
                        tmpB[irow][5] = 2.0*nx*(v0y2/3.0 + v0y*v1y/3.0 + v1y2/3.0 - v0y*y0 - v1y*y0 + sqr(y0));

                        tmpB[irow][6] = 2.0*ny;    // b_0
                        tmpB[irow][7] = ny*(v0x + v1x - 2.0*x0);  // b_1
                        tmpB[irow][1] -= ny*(v0y + v1y - 2.0*y0);    // b_2 = - a_1
                        tmpB[irow][8] = 2.0*ny*(v0x2/3.0 + v0x*v1x/3.0 + v1x2/3.0 - v0x*x0 - v1x*x0 + sqr(x0));   // b_3
                        tmpB[irow][3] -= 2.0*ny*(2.0*v0x*v0y/3.0 + v0y*v1x/3.0 + v0x*v1y/3.0 + 2.0*v1x*v1y/3.0
                                                 - v0y*x0 - v1y*x0 - v0x*y0 - v1x*y0 + 2.0*x0*y0);  // b_4 = -2 a_3
                        tmpB[irow][4] -= 0.5*2.0*ny*(v0y2/3.0 + v0y*v1y/3.0 + v1y2/3.0 - v0y*y0 - v1y*y0 + sqr(y0));  // b_5 = -0.5 a_4

                        if(rk_step == RK_STEP)
                        {
                            BBavg[irow] = 2.0*fg_side_dgB(tris[i])[Edge_side[i]][0];
                        }
                        else
                        {
                            BBavg[irow] = 2.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][0];
                        }
                    }

                    ////// first slope
                    irow ++;
                    if(i==2) irow = 6;
                    tmpB[irow][0] = 0.0;
                    tmpB[irow][1] = nx*(v1x - v0x)/3.0;
                    tmpB[irow][2] = nx*(v1y - v0y)/3.0;
                    tmpB[irow][3] = nx*(v1x2 - v0x2 + 2.0*v0x*x0 - 2.0*v1x*x0)/3.0;
                    tmpB[irow][4] = nx*(v1x*v1y - v0x*v0y + v0y*x0 - v1y*x0 + v0x*y0 - v1x*y0)/3.0;
                    tmpB[irow][5] = nx*(v1y2 - v0y2 + 2.0*v0y*y0 - 2.0*v1y*y0)/3.0;

                    tmpB[irow][6] = 0.0;    // b_0
                    tmpB[irow][7] = ny*(v1x - v0x)/3.0;    // b_1
                    tmpB[irow][1] -= ny*(v1y - v0y)/3.0;    // b_2 = - a_1
                    tmpB[irow][8] = ny*(v1x2 - v0x2 + 2.0*v0x*x0 - 2.0*v1x*x0)/3.0;    // b_3
                    tmpB[irow][3] -= 2.0*ny*(v1x*v1y - v0x*v0y + v0y*x0 - v1y*x0 + v0x*y0 - v1x*y0)/3.0;    // b_4 = -2 a_3
                    tmpB[irow][4] -= 0.5*ny*(v1y2 - v0y2 + 2.0*v0y*y0 - 2.0*v1y*y0)/3.0;  // b_5 = -0.5 a_4

                    if(rk_step == RK_STEP)
                    {
                        BBavg[irow] = 2.0/3.0*fg_side_dgB(tris[i])[Edge_side[i]][1];
                    }
                    else
                    {
                        BBavg[irow] = 2.0/3.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][1];
                    }

                    ////// second slope
                    irow ++;
                    if(i==2) irow = 7;
                    tmpB[irow][0] = 0.0;
                    tmpB[irow][1] = 0.0;
                    tmpB[irow][2] = 0.0;
                    tmpB[irow][3] = nx*sqr(v0x - v1x)/15.0;
                    tmpB[irow][4] = nx*(v0x - v1x)*(v0y - v1y)/15.0;
                    tmpB[irow][5] = nx*sqr(v0y - v1y)/15.0;

                    tmpB[irow][6] = 0.0;    // b_0
                    tmpB[irow][7] = 0.0;    // b_1
                    tmpB[irow][1] -= 0.0;    // b_2 = - a_1
                    tmpB[irow][8] = ny*sqr(v0x - v1x)/15.0;    // b_3
                    tmpB[irow][3] -= 2.0*ny*(v0x - v1x)*(v0y - v1y)/15.0;    // b_4 = -2 a_3
                    tmpB[irow][4] -= 0.5*ny*sqr(v0y - v1y)/15.0;  // b_5 = -0.5 a_4

                    if(rk_step == RK_STEP)
                    {
                        BBavg[irow] = 2.0/5.0*fg_side_dgB(tris[i])[Edge_side[i]][2];
                    }
                    else
                    {
                        BBavg[irow] = 2.0/5.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][2];
                    }
                }

                //// Now constrained least square 
                M = 3; N = 9; P = 8; LDA = M; LDB = P; LWORK = M+N+P;
                // N: number of unknown, M: number of least square eqn.
                // P: number of constraint eqn.
                l = 0;
                for(j = 0; j < N; j++)
                {
                    for(i = 0; i < M; i++)
                    {
                        AA[l] = tmpA[i][j];
                        l++;
                    }
                }
                l = 0;
                for(j = 0; j < N; j++)
                {
                    for(i = 0; i < P; i++)
                    {
                        BB[l] = tmpB[i][j];
                        l++;
                    }
                }
                for(i = 0; i < M; i++)  // right side for the least square part
                    CC[i] = Bavg[i];
                for(i = 0; i < P; i++)  // right side for the constrained part
                    DD[i] = BBavg[i];
                FORTRAN_NAME(dgglse)(&M, &N, &P, AA, &LDA, ///// int M,int N,int P, double array A, int LDA
                                     BB, &LDB, CC, DD, XX[6], ///// double array B, int LDB, double array C, D, X,
                                     work, &LWORK, &INFO);
            }
            ////////////////////////////////////////////////
            /* End of central stencil reconstruction ----alltri_HR_sten = NULL     */
            ////////////////////////////////////////////////

            ////////////////////////////////////////
            /// Start: one-sided stencil reconstruction ----alltri_HR_sten = NULL
            ////////////////////////////////////////
            for(side = 0; side < 3; side++)
            {
                N_edge = Mag_p2_6edge_one_side_sten_new(tri,side,tris, Edge_side);

                for(i = 3; i < N_edge; i++)
                {
                    pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                    pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                    for(j = 0; j < dim; j++)
                        t[j] = fg_side_vector(tris[i])[Edge_side[i]][j];
                    nor[0] = t[1];
                    nor[1] = -t[0];

                    nx = nor[0]; ny = nor[1];
                    x0 = cent[0]; y0 = cent[1];
                    v0x = pcrds[0][0]; v0y = pcrds[0][1];
                    v1x = pcrds[1][0]; v1y = pcrds[1][1];
                    v0x2 = sqr(v0x); v0y2 = sqr(v0y);
                    v1x2 = sqr(v1x); v1y2 = sqr(v1y);

                    //irow = 2*(i-3);
                    irow = i-3;
                    tmpA[irow][0] = 2.0*nx;
                    tmpA[irow][1] = nx*(v0x + v1x - 2.0*x0);
                    tmpA[irow][2] = nx*(v0y + v1y - 2.0*y0);
                    tmpA[irow][3] = 2.0*nx*(v0x2/3.0 + v0x*v1x/3.0 + v1x2/3.0 - v0x*x0 - v1x*x0 + sqr(x0));
                    tmpA[irow][4] = nx*(2.0*v0x*v0y/3.0 + v0y*v1x/3.0 + v0x*v1y/3.0 + 2.0*v1x*v1y/3.0
                                       - v0y*x0 - v1y*x0 - v0x*y0 - v1x*y0 + 2.0*x0*y0);
                    tmpA[irow][5] = 2.0*nx*(v0y2/3.0 + v0y*v1y/3.0 + v1y2/3.0 - v0y*y0 - v1y*y0 + sqr(y0));

                    tmpA[irow][6] = 2.0*ny;    // b_0
                    tmpA[irow][7] = ny*(v0x + v1x - 2.0*x0);  // b_1
                    tmpA[irow][1] -= ny*(v0y + v1y - 2.0*y0);    // b_2 = - a_1
                    tmpA[irow][8] = 2.0*ny*(v0x2/3.0 + v0x*v1x/3.0 + v1x2/3.0 - v0x*x0 - v1x*x0 + sqr(x0));   // b_3
                    tmpA[irow][3] -= 2.0*ny*(2.0*v0x*v0y/3.0 + v0y*v1x/3.0 + v0x*v1y/3.0 + 2.0*v1x*v1y/3.0
                                             - v0y*x0 - v1y*x0 - v0x*y0 - v1x*y0 + 2.0*x0*y0);  // b_4 = -2 a_3
                    tmpA[irow][4] -= 0.5*2.0*ny*(v0y2/3.0 + v0y*v1y/3.0 + v1y2/3.0 - v0y*y0 - v1y*y0 + sqr(y0));  // b_5 = -0.5 a_4

                    if(rk_step == RK_STEP)
                    {
                        Bavg[irow] = 2.0*fg_side_dgB(tris[i])[Edge_side[i]][0];
                    }
                    else
                    {
                        Bavg[irow] = 2.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][0];
                    }

                    /***
                    irow ++;
                    tmpA[irow][0] = 0.0;
                    tmpA[irow][1] = nx*(v1x - v0x)/3.0;
                    tmpA[irow][2] = nx*(v1y - v0y)/3.0;
                    tmpA[irow][3] = nx*(v1x2 - v0x2 + 2.0*v0x*x0 - 2.0*v1x*x0)/3.0;
                    tmpA[irow][4] = nx*(v1x*v1y - v0x*v0y + v0y*x0 - v1y*x0 + v0x*y0 - v1x*y0)/3.0;
                    tmpA[irow][5] = nx*(v1y2 - v0y2 + 2.0*v0y*y0 - 2.0*v1y*y0)/3.0;

                    tmpA[irow][6] = 0.0;    // b_0
                    tmpA[irow][7] = ny*(v1x - v0x)/3.0;    // b_1
                    tmpA[irow][1] -= ny*(v1y - v0y)/3.0;    // b_2 = - a_1
                    tmpA[irow][8] = ny*(v1x2 - v0x2 + 2.0*v0x*x0 - 2.0*v1x*x0)/3.0;    // b_3
                    tmpA[irow][3] -= 2.0*ny*(v1x*v1y - v0x*v0y + v0y*x0 - v1y*x0 + v0x*y0 - v1x*y0)/3.0;    // b_4 = -2 a_3
                    tmpA[irow][4] -= 0.5*ny*(v1y2 - v0y2 + 2.0*v0y*y0 - 2.0*v1y*y0)/3.0;  // b_5 = -0.5 a_4

                    if(rk_step == RK_STEP)
                    {
                        Bavg[irow] = 2.0/3.0*fg_side_dgB(tris[i])[Edge_side[i]][1];
                    }
                    else
                    {
                        Bavg[irow] = 2.0/3.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][1];
                    }
                    ***/
                }

                // Now setup constraint eqn.
                for(i = 0; i < 3; i++)
                {
                    pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                    pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                    for(j = 0; j < dim; j++)
                        t[j] = fg_side_vector(tris[i])[Edge_side[i]][j];
                    nor[0] = t[1];
                    nor[1] = -t[0];

                    nx = nor[0]; ny = nor[1];
                    x0 = cent[0]; y0 = cent[1];
                    v0x = pcrds[0][0]; v0y = pcrds[0][1];
                    v1x = pcrds[1][0]; v1y = pcrds[1][1];
                    v0x2 = sqr(v0x); v0y2 = sqr(v0y);
                    v1x2 = sqr(v1x); v1y2 = sqr(v1y);

                    // 0 1 2; 3 4 5; 6 7
                    // average
                    irow = 3*i;
                    if( i==0 || i==1 )
                    {
                        tmpB[irow][0] = 2.0*nx;
                        tmpB[irow][1] = nx*(v0x + v1x - 2.0*x0);
                        tmpB[irow][2] = nx*(v0y + v1y - 2.0*y0);
                        tmpB[irow][3] = 2.0*nx*(v0x2/3.0 + v0x*v1x/3.0 + v1x2/3.0 - v0x*x0 - v1x*x0 + sqr(x0));
                        tmpB[irow][4] = nx*(2.0*v0x*v0y/3.0 + v0y*v1x/3.0 + v0x*v1y/3.0 + 2.0*v1x*v1y/3.0
                                           - v0y*x0 - v1y*x0 - v0x*y0 - v1x*y0 + 2.0*x0*y0);
                        tmpB[irow][5] = 2.0*nx*(v0y2/3.0 + v0y*v1y/3.0 + v1y2/3.0 - v0y*y0 - v1y*y0 + sqr(y0));

                        tmpB[irow][6] = 2.0*ny;    // b_0
                        tmpB[irow][7] = ny*(v0x + v1x - 2.0*x0);  // b_1
                        tmpB[irow][1] -= ny*(v0y + v1y - 2.0*y0);    // b_2 = - a_1
                        tmpB[irow][8] = 2.0*ny*(v0x2/3.0 + v0x*v1x/3.0 + v1x2/3.0 - v0x*x0 - v1x*x0 + sqr(x0));   // b_3
                        tmpB[irow][3] -= 2.0*ny*(2.0*v0x*v0y/3.0 + v0y*v1x/3.0 + v0x*v1y/3.0 + 2.0*v1x*v1y/3.0
                                                 - v0y*x0 - v1y*x0 - v0x*y0 - v1x*y0 + 2.0*x0*y0);  // b_4 = -2 a_3
                        tmpB[irow][4] -= 0.5*2.0*ny*(v0y2/3.0 + v0y*v1y/3.0 + v1y2/3.0 - v0y*y0 - v1y*y0 + sqr(y0));  // b_5 = -0.5 a_4

                        if(rk_step == RK_STEP)
                        {
                            BBavg[irow] = 2.0*fg_side_dgB(tris[i])[Edge_side[i]][0];
                        }
                        else
                        {
                            BBavg[irow] = 2.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][0];
                        }
                    }

                    //first slope
                    irow ++;
                    if(i==2) irow = 6;
                    tmpB[irow][0] = 0.0;
                    tmpB[irow][1] = nx*(v1x - v0x)/3.0;
                    tmpB[irow][2] = nx*(v1y - v0y)/3.0;
                    tmpB[irow][3] = nx*(v1x2 - v0x2 + 2.0*v0x*x0 - 2.0*v1x*x0)/3.0;
                    tmpB[irow][4] = nx*(v1x*v1y - v0x*v0y + v0y*x0 - v1y*x0 + v0x*y0 - v1x*y0)/3.0;
                    tmpB[irow][5] = nx*(v1y2 - v0y2 + 2.0*v0y*y0 - 2.0*v1y*y0)/3.0;

                    tmpB[irow][6] = 0.0;    // b_0
                    tmpB[irow][7] = ny*(v1x - v0x)/3.0;    // b_1
                    tmpB[irow][1] -= ny*(v1y - v0y)/3.0;    // b_2 = - a_1
                    tmpB[irow][8] = ny*(v1x2 - v0x2 + 2.0*v0x*x0 - 2.0*v1x*x0)/3.0;    // b_3
                    tmpB[irow][3] -= 2.0*ny*(v1x*v1y - v0x*v0y + v0y*x0 - v1y*x0 + v0x*y0 - v1x*y0)/3.0;    // b_4 = -2 a_3
                    tmpB[irow][4] -= 0.5*ny*(v1y2 - v0y2 + 2.0*v0y*y0 - 2.0*v1y*y0)/3.0;  // b_5 = -0.5 a_4

                    if(rk_step == RK_STEP)
                    {
                        BBavg[irow] = 2.0/3.0*fg_side_dgB(tris[i])[Edge_side[i]][1];
                    }
                    else
                    {
                        BBavg[irow] = 2.0/3.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][1];
                    }

                    ////// second slope
                    irow ++;
                    if(i==2) irow = 7;
                    tmpB[irow][0] = 0.0;
                    tmpB[irow][1] = 0.0;
                    tmpB[irow][2] = 0.0;
                    tmpB[irow][3] = nx*sqr(v0x - v1x)/15.0;
                    tmpB[irow][4] = nx*(v0x - v1x)*(v0y - v1y)/15.0;
                    tmpB[irow][5] = nx*sqr(v0y - v1y)/15.0;

                    tmpB[irow][6] = 0.0;    // b_0
                    tmpB[irow][7] = 0.0;    // b_1
                    tmpB[irow][1] -= 0.0;    // b_2 = - a_1
                    tmpB[irow][8] = ny*sqr(v0x - v1x)/15.0;    // b_3
                    tmpB[irow][3] -= 2.0*ny*(v0x - v1x)*(v0y - v1y)/15.0;    // b_4 = -2 a_3
                    tmpB[irow][4] -= 0.5*ny*sqr(v0y - v1y)/15.0;  // b_5 = -0.5 a_4

                    if(rk_step == RK_STEP)
                    {
                        BBavg[irow] = 2.0/5.0*fg_side_dgB(tris[i])[Edge_side[i]][2];
                    }
                    else
                    {
                        BBavg[irow] = 2.0/5.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][2];
                    }
                }

                //// Now constrained least square 
                M = 3; N = 9; P = 8; LDA = M; LDB = P; LWORK = M+N+P;
                // N: number of unknown, M: number of least square eqn.
                // P: number of constraint eqn.
                l = 0;
                for(j = 0; j < N; j++)
                {
                    for(i = 0; i < M; i++)
                    {
                        AA[l] = tmpA[i][j];
                        l++;
                    }
                }
                l = 0;
                for(j = 0; j < N; j++)
                {
                    for(i = 0; i < P; i++)
                    {
                        BB[l] = tmpB[i][j];
                        l++;
                    }
                }
                for(i = 0; i < M; i++)  // right side for the least square part
                    CC[i] = Bavg[i];
                for(i = 0; i < P; i++)  // right side for the constrained part
                    DD[i] = BBavg[i];
                FORTRAN_NAME(dgglse)(&M, &N, &P, AA, &LDA, ///// int M,int N,int P, double array A, int LDA
                                     BB, &LDB, CC, DD, XX[side], ///// double array B, int LDB, double array C, D, X,
                                     work, &LWORK, &INFO);
            } // END of: for(side = 0; side < 3; side++)
            ////////////////////////////////////////
            /// END: one-sided stencil reconstruction ----alltri_HR_sten = NULL
            ////////////////////////////////////////

            ////////////////////////////////////////
            /// Start: reverse stencil reconstruction ----alltri_HR_sten = NULL
            ////////////////////////////////////////
            for(side = 0; side < 3; side++)
            {
                count_num_tris_vertex(tri,Point_of_tri(tri)[(side+2)%3], &num_tris_vertex);
                if(num_tris_vertex == 5)
                {
                    // N_edge = Mag_p2_9edge_reverse_sten_5pt_vertex(tri,side,tris, Edge_side);
                    N_edge = Mag_p2_5edge_reverse_sten_5pt_vertex_new(tri,side,tris, Edge_side);
                }
                else if(num_tris_vertex < 5)
                {
                    printf("ERROR: tri_B_P2_polynomial_reconstruction_new, not enough tri at vertex\n");
                    clean_up(ERROR);
                }
                else
                {
                    // N_edge = Mag_p2_9edge_reverse_sten(tri,side,tris, Edge_side);
                    //N_edge = Mag_p2_6edge_reverse_sten_new(tri,side,tris, Edge_side);
                    N_edge = Mag_p2_5edge_reverse_sten_new(tri,side,tris, Edge_side);
                }

                for(i = 3; i < N_edge; i++)
                {
                    pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                    pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                    for(j = 0; j < dim; j++)
                        t[j] = fg_side_vector(tris[i])[Edge_side[i]][j];
                    nor[0] = t[1];
                    nor[1] = -t[0];

                    nx = nor[0]; ny = nor[1];
                    x0 = cent[0]; y0 = cent[1];
                    v0x = pcrds[0][0]; v0y = pcrds[0][1];
                    v1x = pcrds[1][0]; v1y = pcrds[1][1];
                    v0x2 = sqr(v0x); v0y2 = sqr(v0y);
                    v1x2 = sqr(v1x); v1y2 = sqr(v1y);

                    //irow = 2*(i-3);
                    irow = i - 3;
                    tmpA[irow][0] = 2.0*nx;
                    tmpA[irow][1] = nx*(v0x + v1x - 2.0*x0);
                    tmpA[irow][2] = nx*(v0y + v1y - 2.0*y0);
                    tmpA[irow][3] = 2.0*nx*(v0x2/3.0 + v0x*v1x/3.0 + v1x2/3.0 - v0x*x0 - v1x*x0 + sqr(x0));
                    tmpA[irow][4] = nx*(2.0*v0x*v0y/3.0 + v0y*v1x/3.0 + v0x*v1y/3.0 + 2.0*v1x*v1y/3.0
                                       - v0y*x0 - v1y*x0 - v0x*y0 - v1x*y0 + 2.0*x0*y0);
                    tmpA[irow][5] = 2.0*nx*(v0y2/3.0 + v0y*v1y/3.0 + v1y2/3.0 - v0y*y0 - v1y*y0 + sqr(y0));

                    tmpA[irow][6] = 2.0*ny;    // b_0
                    tmpA[irow][7] = ny*(v0x + v1x - 2.0*x0);  // b_1
                    tmpA[irow][1] -= ny*(v0y + v1y - 2.0*y0);    // b_2 = - a_1
                    tmpA[irow][8] = 2.0*ny*(v0x2/3.0 + v0x*v1x/3.0 + v1x2/3.0 - v0x*x0 - v1x*x0 + sqr(x0));   // b_3
                    tmpA[irow][3] -= 2.0*ny*(2.0*v0x*v0y/3.0 + v0y*v1x/3.0 + v0x*v1y/3.0 + 2.0*v1x*v1y/3.0
                                             - v0y*x0 - v1y*x0 - v0x*y0 - v1x*y0 + 2.0*x0*y0);  // b_4 = -2 a_3
                    tmpA[irow][4] -= 0.5*2.0*ny*(v0y2/3.0 + v0y*v1y/3.0 + v1y2/3.0 - v0y*y0 - v1y*y0 + sqr(y0));  // b_5 = -0.5 a_4

                    if(rk_step == RK_STEP)
                    {
                        Bavg[irow] = 2.0*fg_side_dgB(tris[i])[Edge_side[i]][0];
                    }
                    else
                    {
                        Bavg[irow] = 2.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][0];
                    }

                    /****
                    irow ++;
                    tmpA[irow][0] = 0.0;
                    tmpA[irow][1] = nx*(v1x - v0x)/3.0;
                    tmpA[irow][2] = nx*(v1y - v0y)/3.0;
                    tmpA[irow][3] = nx*(v1x2 - v0x2 + 2.0*v0x*x0 - 2.0*v1x*x0)/3.0;
                    tmpA[irow][4] = nx*(v1x*v1y - v0x*v0y + v0y*x0 - v1y*x0 + v0x*y0 - v1x*y0)/3.0;
                    tmpA[irow][5] = nx*(v1y2 - v0y2 + 2.0*v0y*y0 - 2.0*v1y*y0)/3.0;

                    tmpA[irow][6] = 0.0;    // b_0
                    tmpA[irow][7] = ny*(v1x - v0x)/3.0;    // b_1
                    tmpA[irow][1] -= ny*(v1y - v0y)/3.0;    // b_2 = - a_1
                    tmpA[irow][8] = ny*(v1x2 - v0x2 + 2.0*v0x*x0 - 2.0*v1x*x0)/3.0;    // b_3
                    tmpA[irow][3] -= 2.0*ny*(v1x*v1y - v0x*v0y + v0y*x0 - v1y*x0 + v0x*y0 - v1x*y0)/3.0;    // b_4 = -2 a_3
                    tmpA[irow][4] -= 0.5*ny*(v1y2 - v0y2 + 2.0*v0y*y0 - 2.0*v1y*y0)/3.0;  // b_5 = -0.5 a_4

                    if(rk_step == RK_STEP)
                    {
                        Bavg[irow] = 2.0/3.0*fg_side_dgB(tris[i])[Edge_side[i]][1];
                    }
                    else
                    {
                        Bavg[irow] = 2.0/3.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][1];
                    }
                    ****/
                }

                // Now setup constraint eqn.
                for(i = 0; i < 3; i++)
                {
                    pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                    pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                    for(j = 0; j < dim; j++)
                        t[j] = fg_side_vector(tris[i])[Edge_side[i]][j];
                    nor[0] = t[1];
                    nor[1] = -t[0];

                    nx = nor[0]; ny = nor[1];
                    x0 = cent[0]; y0 = cent[1];
                    v0x = pcrds[0][0]; v0y = pcrds[0][1];
                    v1x = pcrds[1][0]; v1y = pcrds[1][1];
                    v0x2 = sqr(v0x); v0y2 = sqr(v0y);
                    v1x2 = sqr(v1x); v1y2 = sqr(v1y);

                    // 0 1 2; 3 4 5; 6 7
                    //// average
                    irow = 3*i;
                    if( i==0 || i==1 )
                    {
                        tmpB[irow][0] = 2.0*nx;
                        tmpB[irow][1] = nx*(v0x + v1x - 2.0*x0);
                        tmpB[irow][2] = nx*(v0y + v1y - 2.0*y0);
                        tmpB[irow][3] = 2.0*nx*(v0x2/3.0 + v0x*v1x/3.0 + v1x2/3.0 - v0x*x0 - v1x*x0 + sqr(x0));
                        tmpB[irow][4] = nx*(2.0*v0x*v0y/3.0 + v0y*v1x/3.0 + v0x*v1y/3.0 + 2.0*v1x*v1y/3.0
                                           - v0y*x0 - v1y*x0 - v0x*y0 - v1x*y0 + 2.0*x0*y0);
                        tmpB[irow][5] = 2.0*nx*(v0y2/3.0 + v0y*v1y/3.0 + v1y2/3.0 - v0y*y0 - v1y*y0 + sqr(y0));

                        tmpB[irow][6] = 2.0*ny;    // b_0
                        tmpB[irow][7] = ny*(v0x + v1x - 2.0*x0);  // b_1
                        tmpB[irow][1] -= ny*(v0y + v1y - 2.0*y0);    // b_2 = - a_1
                        tmpB[irow][8] = 2.0*ny*(v0x2/3.0 + v0x*v1x/3.0 + v1x2/3.0 - v0x*x0 - v1x*x0 + sqr(x0));   // b_3
                        tmpB[irow][3] -= 2.0*ny*(2.0*v0x*v0y/3.0 + v0y*v1x/3.0 + v0x*v1y/3.0 + 2.0*v1x*v1y/3.0
                                                 - v0y*x0 - v1y*x0 - v0x*y0 - v1x*y0 + 2.0*x0*y0);  // b_4 = -2 a_3
                        tmpB[irow][4] -= 0.5*2.0*ny*(v0y2/3.0 + v0y*v1y/3.0 + v1y2/3.0 - v0y*y0 - v1y*y0 + sqr(y0));  // b_5 = -0.5 a_4

                        if(rk_step == RK_STEP)
                        {
                            BBavg[irow] = 2.0*fg_side_B(tris[i])[Edge_side[i]];
                        }
                        else
                        {
                            BBavg[irow] = 2.0*midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                        }
                    }

                    ///// first slope
                    irow ++;
                    if(i==2) irow = 6;
                    tmpB[irow][0] = 0.0;
                    tmpB[irow][1] = nx*(v1x - v0x)/3.0;
                    tmpB[irow][2] = nx*(v1y - v0y)/3.0;
                    tmpB[irow][3] = nx*(v1x2 - v0x2 + 2.0*v0x*x0 - 2.0*v1x*x0)/3.0;
                    tmpB[irow][4] = nx*(v1x*v1y - v0x*v0y + v0y*x0 - v1y*x0 + v0x*y0 - v1x*y0)/3.0;
                    tmpB[irow][5] = nx*(v1y2 - v0y2 + 2.0*v0y*y0 - 2.0*v1y*y0)/3.0;

                    tmpB[irow][6] = 0.0;    // b_0
                    tmpB[irow][7] = ny*(v1x - v0x)/3.0;    // b_1
                    tmpB[irow][1] -= ny*(v1y - v0y)/3.0;    // b_2 = - a_1
                    tmpB[irow][8] = ny*(v1x2 - v0x2 + 2.0*v0x*x0 - 2.0*v1x*x0)/3.0;    // b_3
                    tmpB[irow][3] -= 2.0*ny*(v1x*v1y - v0x*v0y + v0y*x0 - v1y*x0 + v0x*y0 - v1x*y0)/3.0;    // b_4 = -2 a_3
                    tmpB[irow][4] -= 0.5*ny*(v1y2 - v0y2 + 2.0*v0y*y0 - 2.0*v1y*y0)/3.0;  // b_5 = -0.5 a_4

                    if(rk_step == RK_STEP)
                    {
                        BBavg[irow] = 2.0/3.0*fg_side_dgB(tris[i])[Edge_side[i]][1];
                    }
                    else
                    {
                        BBavg[irow] = 2.0/3.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][1];
                    }

                    ////// second slope
                    irow ++;
                    if(i==2) irow = 7;
                    tmpB[irow][0] = 0.0;
                    tmpB[irow][1] = 0.0;
                    tmpB[irow][2] = 0.0;
                    tmpB[irow][3] = nx*sqr(v0x - v1x)/15.0;
                    tmpB[irow][4] = nx*(v0x - v1x)*(v0y - v1y)/15.0;
                    tmpB[irow][5] = nx*sqr(v0y - v1y)/15.0;

                    tmpB[irow][6] = 0.0;    // b_0
                    tmpB[irow][7] = 0.0;    // b_1
                    tmpB[irow][1] -= 0.0;    // b_2 = - a_1
                    tmpB[irow][8] = ny*sqr(v0x - v1x)/15.0;    // b_3
                    tmpB[irow][3] -= 2.0*ny*(v0x - v1x)*(v0y - v1y)/15.0;    // b_4 = -2 a_3
                    tmpB[irow][4] -= 0.5*ny*sqr(v0y - v1y)/15.0;  // b_5 = -0.5 a_4

                    if(rk_step == RK_STEP)
                    {
                        BBavg[irow] = 2.0/5.0*fg_side_dgB(tris[i])[Edge_side[i]][2];
                    }
                    else
                    {
                        BBavg[irow] = 2.0/5.0*midsoln[tris[i]->id].edge_dgBn[Edge_side[i]][rk_step][2];
                    }
                }

                //// Now constrained least square 
                M = 2; N = 9; P = 8; LDA = M; LDB = P; LWORK = M+N+P;
                // N: number of unknown, M: number of least square eqn.
                // P: number of constraint eqn.
                /****
                if(num_tris_vertex == 5)
                {
                    M = 6; N = 9; P = 5; LDA = M; LDB = P; LWORK = M+N+P;
                }
                else if(num_tris_vertex < 5)
                {
                    printf("ERROR: tri_B_P2_polynomial_reconstruction_new, not enough tri at vertex\n");
                    clean_up(ERROR);
                }
                else
                {
                    M = 8; N = 9; P = 5; LDA = M; LDB = P; LWORK = M+N+P;
                }
                ****/

                l = 0;
                for(j = 0; j < N; j++)
                {
                    for(i = 0; i < M; i++)
                    {
                        AA[l] = tmpA[i][j];
                        l++;
                    }
                }
                l = 0;
                for(j = 0; j < N; j++)
                {
                    for(i = 0; i < P; i++)
                    {
                        BB[l] = tmpB[i][j];
                        l++;
                    }
                }
                for(i = 0; i < M; i++)  // right side for the least square part
                    CC[i] = Bavg[i];
                for(i = 0; i < P; i++)  // right side for the constrained part
                    DD[i] = BBavg[i];
                FORTRAN_NAME(dgglse)(&M, &N, &P, AA, &LDA, ///// int M,int N,int P, double array A, int LDA
                                     BB, &LDB, CC, DD, XX[side+3], ///// double array B, int LDB, double array C, D, X,
                                     work, &LWORK, &INFO);
            }
        }

        ///////////////////////////////////////////
        //// Start WENO
        ///////////////////////////////////////////

        if(YES == use_central_sten)
            N_STEN = 7;
        else
            N_STEN = 6;
        sum = 0.0;
        for(cv_indx = 0; cv_indx < N_STEN; cv_indx++)
        {
            for(i =0; i < MAX_N_COEF; i++) 
                ansx[cv_indx][i] = XX[cv_indx][i];
            ansy[cv_indx][0] = XX[cv_indx][6];          // b0
            ansy[cv_indx][1] = XX[cv_indx][7];          // b1
            ansy[cv_indx][2] = -ansx[cv_indx][1];       // b2 = - a1
            ansy[cv_indx][3] = XX[cv_indx][8];          // b3
            ansy[cv_indx][4] = -2.0*ansx[cv_indx][3];   // b4 = -2 a3
            ansy[cv_indx][5] = -0.5*ansx[cv_indx][4];   // b5 = -0.5 a4

            OIx[cv_indx] = weno_weight_P2(tri, ansx[cv_indx]);
            OIy[cv_indx] = weno_weight_P2(tri, ansy[cv_indx]);
            tmp = (eps + OIx[cv_indx] + OIy[cv_indx]);
            if(6 == cv_indx)   /// central stencil
                // alpha[cv_indx] = 10.0/(quad(sqr(eps + OIx[cv_indx] + OIy[cv_indx])));
                alpha[cv_indx] = 10.0/(quad(tmp));
            else
                // alpha[cv_indx] = 1.0/(quad(sqr(eps + OIx[cv_indx] + OIy[cv_indx])));
                alpha[cv_indx] = 1.0/(quad(tmp));
            sum += alpha[cv_indx];
        }

        /***
        if(debug == YES)
        {
            for(cv_indx = 0; cv_indx < N_STEN; cv_indx++)
            {
                printf("++++++Sten[%d]+++++:::\n", cv_indx);
                verify_edge_B(tri,rk_step, midsoln, ansx[cv_indx], ansy[cv_indx]);
                printf("Bx coeff: ");
                for(i = 0; i < MAX_N_COEF; i++)
                    printf("%g ", ansx[cv_indx][i]);
                printf("\n");
                printf("By coeff: ");
                for(i = 0; i < MAX_N_COEF; i++)
                    printf("%g ", ansy[cv_indx][i]);
                printf("\n");
            }
        }
        ***/ 

        /***
        if(debug == YES)
        {
            for(cv_indx = 0; cv_indx < 3; cv_indx++)
            {
                printf("********On sten[%d], preliminary:\n", cv_indx);
                printf("Bx coeff: ");
                for(i = 0; i < MAX_N_COEF; i++)
                    printf("%g ", ansx[cv_indx][i]);
                printf("\n");
                printf("By coeff: ");
                for(i = 0; i < MAX_N_COEF; i++)
                    printf("%g ", ansy[cv_indx][i]);
                printf("\n");
            }
        } 
        ***/

        for(cv_indx = 0; cv_indx < N_STEN; cv_indx++)
        {
            wei[cv_indx] = alpha[cv_indx]/sum;
            // wei[cv_indx] = 1.0/3.0;
        }

        for(i = 0; i < MAX_N_COEF; i++)
        {
            MB[0][i] = MB[1][i] = 0.0;
        }

        for(cv_indx = 0; cv_indx < N_STEN; cv_indx++)
        {
            for(i = 0; i < MAX_N_COEF; i++)
            {
                MB[0][i] += wei[cv_indx]*ansx[cv_indx][i];
                MB[1][i] += wei[cv_indx]*ansy[cv_indx][i];
            }
        }

        for(i = 0; i < MAX_N_COEF; i++)
        {
            dg_B(st)[0][i] = MB[0][i];
            dg_B(st)[1][i] = MB[1][i];
        }

        Mag(st)[0] = Mag(st)[1] = 0.0;
        for(i= 0; i< MAX_N_COEF; i++)
        {
            Mag(st)[0] += dg_B(st)[0][i]*Lmass_matrix[0][i];
            Mag(st)[1] += dg_B(st)[1][i]*Lmass_matrix[0][i];
        }
        for(i = 0; i < 2; i++)
            Mag(st)[i] /= Lmass_matrix[0][0];

        /***
        if(debugging("blast_MHD"))
        {  
            if(debug)
            {
                printf("State after B-reconstruction\n");
                g_verbose_print_state(st);
            } 

            if(POLY_thermal_pressure_MHD(st) < 0.0)
            {
                for(i = 0; i < 2; i++)
                    Mag(st)[i] = cellavgB[i];
                {
                    if(debug)
                    {
                        printf("tri[%d] negative pressure\n", tri->id);
                    }
                }
                return NO;
            }
            else 
                return YES;
        }
        ***/

        if(debug == YES)
        {
            printf("++++++Weighted Combine+++++:::\n");
            verify_edge_B(tri,rk_step, midsoln, MB[0], MB[1]);

            printf("Bx coeff: ");
            for(i = 0; i < MAX_N_COEF; i++)
                printf("%g ", dg_B(st)[0][i]);
            printf("\n");
            printf("By coeff: ");
            for(i = 0; i < MAX_N_COEF; i++)
                printf("%g ", dg_B(st)[1][i]);
            printf("\n");
            // clean_up(0);
        }

        return YES;
}


/* use condition: b_2 = -a_1
                  b_4 = -2 a_3
                  b_5 = -0.5*a_4
   to reduce unknowns to 9 variables. 
   They are: a0, a1, a2, a3, a4, a5, b0, b1, b3.
   We DO NOT conserve the Magnetic field during the reconstruction.
   Note that the solution is represented as:
   Bx = a0 + a1(x-x_c) + a2(y - y_c) ...
   By = b0 + b1(x-x_c) + b2(y - y_c) ...
   which is the Taylor expansion about tri center.
*/
LOCAL int tri_B_P2_polynomial_reconstruction_9edge_1(
         TRI       *tri,   
         Mid_soln *midsoln,  
         Limiting_store **limit_store,
         int      rk_step)
{
        TRI       *Bnbtri[3], *tris[30], *nbtri;
        double    AA[300], BB[300], CC[300], DD[300], XX[8][300], work[300], tmp;
        static double   **A = NULL, **invA = NULL, **I, eps = 1.0e-6, **B;
        int       dim = 2, i, j, k, M, N, P, l, LDA, LDB, LWORK, INFO, indx, debug = NO, cv_indx;
        int       sten_indx, edge_indx[3][9], e_indx, N_cell, N_STEN;
        Locstate  st, nbst[3], st2;
        double    Bavg[30], *pcrds[3], dy, dx, len, cellavgB[2], xx1, yy1, sum, BBavg[10];
        int       side, tmp_side, Edge_side[30], N_edge, num_tris_vertex;
        double    **Lmass_matrix = tri->Lmass_matrix, *cent = fg_centroid(tri), **tmpA, **tmpB;
        double    OIx[8], OIy[8], alpha[8], wei[8], ansx[8][30], ansy[8][30], MB[2][12];
        int       use_central_sten = YES;
        

        if(invA == NULL)
        {
            matrix(&(A), 12, 12, sizeof(double));
            matrix(&(I), 12, 12, sizeof(double));
            matrix(&(invA), 12, 12, sizeof(double));
            matrix(&(B), 2, 12, sizeof(double));
        }

        for(side = 0; side < 3; side++)
        {
            Bnbtri[side] = Tri_on_side(tri,side);
            pcrds[side] = Coords(Point_of_tri(tri)[side]);
        }

        // if(tri->id == 6778)
        // if(tri->id == 403)
        // if(tri->id == 439)
        // if(tri->id == 15574)
        /***
        if(tri->id == 27087)
        {
            double tmp;
            if(rk_step == RK_STEP)
            {
                for(i = 0; i < 3; i++)
                {
                    Bavg[i] = fg_side_B(tri)[i];
                }
            }
            else
            {
                for(i = 0; i < 3; i++)
                {
                    Bavg[i] = midsoln[tri->id].edge_Bn[i][rk_step];
                }
            }
            if(rk_step == RK_STEP)
                st = tri->st;
            else
                st = midsoln[tri->id].st[rk_step];

            tmp = Bavg[0]*fg_length_side(tri)[0] +
                  Bavg[1]*fg_length_side(tri)[1] +
                  Bavg[2]*fg_length_side(tri)[2];
            printf("\n\n &&&&&&&&&&&  tri[%d] enter tri_B_P2_polynomial_reconstruction_9edge_1(), initial divg = %g\n",
                     tri->id, tmp);
            printf("Bavg[%12.11g, %12.11g, %12.11g]\n", Bavg[0], Bavg[1], Bavg[2]);
            printf("Evolved cell avg Bx, By = [%g, %g]\n", Mag(st)[0],   Mag(st)[1]);
            print_tri_crds(tri);
            debug = YES;
        }
        ***/

        if(rk_step == RK_STEP)
            st = tri->st;
        else
            st = midsoln[tri->id].st[rk_step];

        for(i = 0; i < 2; i++)
            cellavgB[i] = Mag(st)[i];

        if(alltri_HR_sten != NULL && alltri_HR_sten[tri->id].Bsten_set == NO)
        {
            alltri_HR_sten[tri->id].Bsten_set = YES;

            ///////////////////////////////////////
            ///start central stencil reconstruction--- alltri_HR_sten != NULL && Bsten_set == NO
            ///////////////////////////////////////
            if(YES == use_central_sten)
            {
                for(side = 0; side < 3; side++)
                {
                    count_num_tris_vertex(tri,Point_of_tri(tri)[side], &num_tris_vertex);
                    if(num_tris_vertex <= 5)
                    {
                        use_central_sten = NO;
                        break;
                    } 
                }
            }
            if(YES == use_central_sten)
            {
                N_edge = Mag_p2_11edge_central_sten(tri, tris, Edge_side);
                matrix(&tmpA,9,9,sizeof(double));
                matrix(&tmpB,2,9,sizeof(double));
                alltri_HR_sten[tri->id].MB_A = tmpA;
                alltri_HR_sten[tri->id].MB_B = tmpB;
                // first setup least square eqn.
                for(i = 2; i < N_edge; i++)
                {
                    pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                    pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                    dx = pcrds[1][0] - pcrds[0][0];
                    dy = pcrds[1][1] - pcrds[0][1];
                    xx1 = pcrds[0][0] - cent[0];
                    yy1 = pcrds[0][1] - cent[1];

                    len = fg_length_side(tris[i])[Edge_side[i]];

                    tmpA[i-2][0] = dy;
                    tmpA[i-2][1] = dy*(xx1 + 0.5*dx);
                    tmpA[i-2][2] = dy*(yy1 + 0.5*dy);
                    tmpA[i-2][3] = dy*(sqr(xx1) + xx1*dx + sqr(dx)/3.0);
                    tmpA[i-2][4] = dy*(xx1*yy1 + 0.5*xx1*dy +0.5*yy1*dx + dx*dy/3.0);
                    tmpA[i-2][5] = dy*(sqr(yy1) + yy1*dy + sqr(dy)/3.0);

                    tmpA[i-2][6] = -dx;    // b_0
                    tmpA[i-2][7] = -dx*(xx1 + 0.5*dx);  // b_1
                    tmpA[i-2][1] += dx*(yy1 + 0.5*dy);    // b_2 = - a_1
                    tmpA[i-2][8] = -dx*(sqr(xx1) + xx1*dx + sqr(dx)/3.0);   // b_3
                    tmpA[i-2][3] += 2.0*dx*(xx1*yy1 +0.5*xx1*dy + 0.5*yy1*dx +  dx*dy/3.0);  // b_4
                    tmpA[i-2][4] += 0.5*dx*(sqr(yy1) + yy1*dy + sqr(dy)/3.0);  // b_5

                    for(j = 0; j < 9; j++)
                        tmpA[i-2][j] /= len;
                }
                if(rk_step == RK_STEP)
                {
                    for(i = 2; i < N_edge; i++)
                        Bavg[i-2] = fg_side_B(tris[i])[Edge_side[i]];
                }
                else
                {
                    for(i = 2; i < N_edge; i++)
                        Bavg[i-2] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                }
                // Now setup constraint eqn.
                for(i = 0; i < 2; i++)
                {
                    pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                    pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                    dx = pcrds[1][0] - pcrds[0][0];
                    dy = pcrds[1][1] - pcrds[0][1];
                    xx1 = pcrds[0][0] - cent[0];
                    yy1 = pcrds[0][1] - cent[1];

                    len = fg_length_side(tris[i])[Edge_side[i]];

                    tmpB[i][0] = dy;
                    tmpB[i][1] = dy*(xx1 + 0.5*dx);
                    tmpB[i][2] = dy*(yy1 + 0.5*dy);
                    tmpB[i][3] = dy*(sqr(xx1) + xx1*dx + sqr(dx)/3.0);
                    tmpB[i][4] = dy*(xx1*yy1 + 0.5*xx1*dy +0.5*yy1*dx + dx*dy/3.0);
                    tmpB[i][5] = dy*(sqr(yy1) + yy1*dy + sqr(dy)/3.0);

                    tmpB[i][6] = -dx;    // b_0
                    tmpB[i][7] = -dx*(xx1 + 0.5*dx);  // b_1
                    tmpB[i][1] += dx*(yy1 + 0.5*dy);    // b_2 = - a_1
                    tmpB[i][8] = -dx*(sqr(xx1) + xx1*dx + sqr(dx)/3.0);   // b_3
                    tmpB[i][3] += 2.0*dx*(xx1*yy1 +0.5*xx1*dy + 0.5*yy1*dx +  dx*dy/3.0);  // b_4
                    tmpB[i][4] += 0.5*dx*(sqr(yy1) + yy1*dy + sqr(dy)/3.0);  // b_5

                    for(j = 0; j < 9; j++)
                        tmpB[i][j] /= len;
                }
                if(rk_step == RK_STEP)
                {
                    for(i = 0; i < 2; i++)
                    {
                        BBavg[i] = fg_side_B(tris[i])[Edge_side[i]];
                    }
                }
                else
                {
                    for(i = 0; i < 2; i++)
                    {
                        BBavg[i] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                    }
                }
                //// Now constrained least square 
                M = 9; N = 9; P = 2; LDA = M; LDB = P; LWORK = M+N+P;
                // N: number of unknown, M: number of least square eqn.
                // P: number of constraint eqn.
                l = 0;
                for(j = 0; j < N; j++)
                {
                    for(i = 0; i < M; i++)
                    {
                        AA[l] = tmpA[i][j];
                        l++;
                    }
                }
                l = 0;
                for(j = 0; j < N; j++)
                {
                    for(i = 0; i < P; i++)
                    {
                        BB[l] = tmpB[i][j];
                        l++;
                    }
                }
                for(i = 0; i < M; i++)  // right side for the least square part
                    CC[i] = Bavg[i];
                for(i = 0; i < P; i++)  // right side for the constrained part
                    DD[i] = BBavg[i];
                FORTRAN_NAME(dgglse)(&M, &N, &P, AA, &LDA, ///// int M,int N,int P, double array A, int LDA
                                     BB, &LDB, CC, DD, XX[6], ///// double array B, int LDB, double array C, D, X,
                                     work, &LWORK, &INFO);
            }
            ///////////////////////////////////////
            /// END: central stencil reconstruction -------alltri_HR_sten != NULL && Bsten_set == NO
            ///////////////////////////////////////

            ////////////////////////////////////////
            /// Start one-sided stencil reconstruction -------alltri_HR_sten != NULL && Bsten_set == NO
            ////////////////////////////////////////
            for(side = 0; side < 3; side++)
            {
                matrix(&tmpA,10,9,sizeof(double));
                alltri_HR_sten[tri->id].Bsten[side] = tmpA; 

                ///// check 5 tris at vertex case
                nbtri = Tri_on_side(tri,side);
                for(tmp_side = 0; tmp_side < 3; tmp_side++)
                {
                    if(tri == Tri_on_side(nbtri,tmp_side))
                        break;
                }
                count_num_tris_vertex(nbtri,Point_of_tri(nbtri)[(tmp_side+2)%3], &num_tris_vertex);
                if(num_tris_vertex == 5)
                {
                    N_edge = Mag_p2_9edge_sten_2(tri,side,tris, Edge_side);
                    // printf("ERROR: tri_B_P2_polynomial_reconstruction_9edge_1,"
                    //     " 5 tris at vertex, one_sided case tri[%d]\n", tri->id);
                    // clean_up(ERROR);
                }
                else if(num_tris_vertex < 5)
                {
                    printf("ERROR: tri_B_P2_polynomial_reconstruction_9edge_1,"
                        " not enough tri at vertex, one_sided case\n");
                    clean_up(ERROR);
                }
                else
                    // N_edge = Mag_p2_edge_sten(tri,side,tris, Edge_side);
                    N_edge = Mag_p2_9edge_sten_1(tri,side,tris, Edge_side);

                for(i = 0; i < N_edge; i++)
                {
                    pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                    pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);
    
                    dx = pcrds[1][0] - pcrds[0][0];
                    dy = pcrds[1][1] - pcrds[0][1];
                    xx1 = pcrds[0][0] - cent[0]; 
                    yy1 = pcrds[0][1] - cent[1]; 

                    len = fg_length_side(tris[i])[Edge_side[i]];

                    tmpA[i][0] = dy;
                    tmpA[i][1] = dy*(xx1 + 0.5*dx);
                    tmpA[i][2] = dy*(yy1 + 0.5*dy);
                    tmpA[i][3] = dy*(sqr(xx1) + xx1*dx + sqr(dx)/3.0);
                    tmpA[i][4] = dy*(xx1*yy1 + 0.5*xx1*dy +0.5*yy1*dx + dx*dy/3.0);
                    tmpA[i][5] = dy*(sqr(yy1) + yy1*dy + sqr(dy)/3.0);

                    tmpA[i][6] = -dx;    // b_0
                    tmpA[i][7] = -dx*(xx1 + 0.5*dx);  // b_1
                    tmpA[i][1] += dx*(yy1 + 0.5*dy);    // b_2 = - a_1
                    tmpA[i][8] = -dx*(sqr(xx1) + xx1*dx + sqr(dx)/3.0);   // b_3
                    tmpA[i][3] += 2.0*dx*(xx1*yy1 +0.5*xx1*dy + 0.5*yy1*dx +  dx*dy/3.0);  // b_4
                    tmpA[i][4] += 0.5*dx*(sqr(yy1) + yy1*dy + sqr(dy)/3.0);  // b_5

                    for(j = 0; j < 9; j++)
                        tmpA[i][j] /= len;
                }

                if(rk_step == RK_STEP)
                {
                    for(i = 0; i < N_edge; i++)
                        Bavg[i] = fg_side_B(tris[i])[Edge_side[i]];
                }
                else
                {
                    for(i = 0; i < N_edge; i++)
                        Bavg[i] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                }
                solve_by_gj(tmpA,9,Bavg,XX[side]);
                ///// TMP
                /**
                if(debug == YES)
                {
                    printf("On tri[%d] one-sided[%d], N_edge = %d, tris_on_vertex %d\n",
                         tri->id, side, N_edge, num_tris_vertex);
                    printf("computed Bx: ");
                    for(i = 0; i < MAX_N_COEF; i++)
                        printf("%g, ", XX[side+3][i]);
                    printf("\n");
                }
                **/  
            } // END of: for(side = 0; side < 3; side++)
            ////////////////////////////////////////
            /// END: one-sided stencil reconstruction -------alltri_HR_sten != NULL && Bsten_set == NO
            ////////////////////////////////////////

            ////////////////////////////////////////
            /// Start: reverse stencil reconstruction -------alltri_HR_sten != NULL && Bsten_set == NO
            ////////////////////////////////////////
            for(side = 0; side < 3; side++)
            {
                matrix(&tmpA,10,9,sizeof(double));
                alltri_HR_sten[tri->id].Bsten_rev[side] = tmpA;

                count_num_tris_vertex(tri,Point_of_tri(tri)[(side+2)%3], &num_tris_vertex);
                if(num_tris_vertex == 5)
                {
                    N_edge = Mag_p2_9edge_reverse_sten_5pt_vertex(tri,side,tris, Edge_side);
                }
                else if(num_tris_vertex < 5)
                {
                    printf("ERROR: tri_B_P2_polynomial_reconstruction_9edge_1, not enough tri at vertex\n");
                    clean_up(ERROR);
                }
                else
                    N_edge = Mag_p2_9edge_reverse_sten(tri,side,tris, Edge_side);

                for(i = 0; i < N_edge; i++)
                {
                    pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                    pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                    dx = pcrds[1][0] - pcrds[0][0];
                    dy = pcrds[1][1] - pcrds[0][1];
                    xx1 = pcrds[0][0] - cent[0];
                    yy1 = pcrds[0][1] - cent[1];

                    len = fg_length_side(tris[i])[Edge_side[i]];

                    tmpA[i][0] = dy;
                    tmpA[i][1] = dy*(xx1 + 0.5*dx);
                    tmpA[i][2] = dy*(yy1 + 0.5*dy);
                    tmpA[i][3] = dy*(sqr(xx1) + xx1*dx + sqr(dx)/3.0);
                    tmpA[i][4] = dy*(xx1*yy1 + 0.5*xx1*dy +0.5*yy1*dx + dx*dy/3.0);
                    tmpA[i][5] = dy*(sqr(yy1) + yy1*dy + sqr(dy)/3.0);

                    tmpA[i][6] = -dx;    // b_0
                    tmpA[i][7] = -dx*(xx1 + 0.5*dx);  // b_1
                    tmpA[i][1] += dx*(yy1 + 0.5*dy);    // b_2 = - a_1
                    tmpA[i][8] = -dx*(sqr(xx1) + xx1*dx + sqr(dx)/3.0);   // b_3
                    tmpA[i][3] += 2.0*dx*(xx1*yy1 +0.5*xx1*dy + 0.5*yy1*dx +  dx*dy/3.0);  // b_4
                    tmpA[i][4] += 0.5*dx*(sqr(yy1) + yy1*dy + sqr(dy)/3.0);  // b_5

                    for(j = 0; j < 9; j++)
                        tmpA[i][j] /= len;
                }
                if(rk_step == RK_STEP)
                {
                    for(i = 0; i < N_edge; i++)
                        Bavg[i] = fg_side_B(tris[i])[Edge_side[i]];
                }
                else
                {
                    for(i = 0; i < N_edge; i++)
                        Bavg[i] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                }

                solve_by_gj(tmpA,9,Bavg,XX[side+3]);

                ///// TMP
                /***
                if(debug == YES) 
                {
                    printf("On tri[%d] reverse side[%d], N_edge = %d, tris_on_vertex %d\n",
                         tri->id, side, N_edge, num_tris_vertex);
                    printf("computed Bx: ");
                    for(i = 0; i < MAX_N_COEF; i++)
                        printf("%g, ", XX[side+3][i]);
                    printf("\n"); 
                }
                ***/
            }
            ////////////////////////////////////////
            /// END: reverse stencil reconstruction -------alltri_HR_sten != NULL && Bsten_set == NO
            ////////////////////////////////////////
        }
        else if(alltri_HR_sten != NULL && alltri_HR_sten[tri->id].Bsten_set == YES)
        {
            ////////////////////////////////////////
            /// Start: central stencil reconstruction -------alltri_HR_sten != NULL && Bsten_set == YES
            ////////////////////////////////////////
            if(YES == use_central_sten)
            {
                for(side = 0; side < 3; side++)
                {
                    count_num_tris_vertex(tri,Point_of_tri(tri)[side], &num_tris_vertex);
                    if(num_tris_vertex <= 5)
                    {
                        use_central_sten = NO;
                        break;
                    }
                }
            }
            if(YES == use_central_sten)
            {
                N_edge = Mag_p2_11edge_central_sten(tri, tris, Edge_side);
                tmpA = alltri_HR_sten[tri->id].MB_A;
                tmpB = alltri_HR_sten[tri->id].MB_B;
                if(rk_step == RK_STEP)
                {
                    for(i = 2; i < N_edge; i++)
                        Bavg[i-2] = fg_side_B(tris[i])[Edge_side[i]];
                }
                else
                {
                    for(i = 2; i < N_edge; i++)
                        Bavg[i-2] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                }

                if(rk_step == RK_STEP)
                {
                    for(i = 0; i < 2; i++)
                        BBavg[i] = fg_side_B(tris[i])[Edge_side[i]];
                }
                else
                {
                    for(i = 0; i < 2; i++)
                        BBavg[i] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                }
 
                //// Now constrained least square 
                M = 9; N = 9; P = 2; LDA = M; LDB = P; LWORK = M+N+P;
                // M: number of least square eqn.
                // N: number of unknown, 
                // P: number of constraint eqn.
                l = 0;
                for(j = 0; j < N; j++)
                {
                    for(i = 0; i < M; i++)
                    {
                        AA[l] = tmpA[i][j];
                        l++;
                    }
                }
                l = 0;
                for(j = 0; j < N; j++)
                {
                    for(i = 0; i < P; i++)
                    {
                        BB[l] = tmpB[i][j];
                        l++;
                    }
                }
                for(i = 0; i < M; i++)  // right side for the least square part
                    CC[i] = Bavg[i];
                for(i = 0; i < P; i++)  // right side for the constrained part
                    DD[i] = BBavg[i];
                FORTRAN_NAME(dgglse)(&M, &N, &P, AA, &LDA, ///// int M,int N,int P, double array A, int LDA
                                     BB, &LDB, CC, DD, XX[6], ///// double array B, int LDB, double array C, D, X,
                                     work, &LWORK, &INFO);
            }
            ////////////////////////////////////////
            /// END: central stencil reconstruction-------alltri_HR_sten != NULL && Bsten_set == YES
            ////////////////////////////////////////

            ////////////////////////////////////////
            /// Start: one-sided stencil reconstruction------alltri_HR_sten != NULL && Bsten_set == YES
            ////////////////////////////////////////
            for(side = 0; side < 3; side++)
            {
                tmpA = alltri_HR_sten[tri->id].Bsten[side];

                ///// check 5 tris at vertex case
                nbtri = Tri_on_side(tri,side);
                for(tmp_side = 0; tmp_side < 3; tmp_side++)
                {
                    if(tri == Tri_on_side(nbtri,tmp_side))
                        break;
                }
                count_num_tris_vertex(nbtri,Point_of_tri(nbtri)[(tmp_side+2)%3], &num_tris_vertex);
                if(num_tris_vertex == 5)
                    N_edge = Mag_p2_9edge_sten_2(tri,side,tris, Edge_side);
                else if(num_tris_vertex < 5)
                {
                    printf("ERROR: tri_B_P2_polynomial_reconstruction_9edge_1,"
                        " not enough tri at vertex, one_sided case\n");
                    clean_up(ERROR);
                }
                else
                    N_edge = Mag_p2_9edge_sten_1(tri,side,tris, Edge_side);

                if(rk_step == RK_STEP)
                {
                    for(i = 0; i < N_edge; i++)
                        Bavg[i] = fg_side_B(tris[i])[Edge_side[i]];
                }
                else
                {
                    for(i = 0; i < N_edge; i++)
                        Bavg[i] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                }
                solve_by_gj(tmpA,9,Bavg,XX[side]);
            }
 
            //// TMP
            /***
            if(debug == YES)
            {
                for(cv_indx = 0; cv_indx < 3; cv_indx++)
                {
                    for(i =0; i < MAX_N_COEF; i++)
                        ansx[cv_indx][i] = XX[cv_indx][i];
                    ansy[cv_indx][0] = XX[cv_indx][6];          // b0
                    ansy[cv_indx][1] = XX[cv_indx][7];          // b1
                    ansy[cv_indx][2] = -ansx[cv_indx][1];       // b2
                    ansy[cv_indx][3] = XX[cv_indx][8];          // b3
                    ansy[cv_indx][4] = -2.0*ansx[cv_indx][3];   // b4
                    ansy[cv_indx][5] = -0.5*ansx[cv_indx][4];   // b5

                    printf("Side[%d] Sten Bx: ", cv_indx);
                    for(i =0; i < MAX_N_COEF; i++)
                        printf("%g, ", ansx[cv_indx][i]);
                    printf("\n");
                    printf("Side[%d] Sten By: ", cv_indx);
                    for(i =0; i < MAX_N_COEF; i++)
                        printf("%g, ", ansy[cv_indx][i]);
                    printf("\n");
                }
            }
            ***/
            //// END TMP
            ////////////////////////////////////////
            /// END: one-sided stencil reconstruction -------alltri_HR_sten != NULL && Bsten_set == YES
            ////////////////////////////////////////
            ////////////////////////////////////////
            /// Start: reverse stencil reconstruction -------alltri_HR_sten != NULL && Bsten_set == YES
            ////////////////////////////////////////
            for(side = 0; side < 3; side++)
            {
                tmpA = alltri_HR_sten[tri->id].Bsten_rev[side];
                count_num_tris_vertex(tri,Point_of_tri(tri)[(side+2)%3], &num_tris_vertex);
                if(num_tris_vertex == 5)
                {
                    N_edge = Mag_p2_9edge_reverse_sten_5pt_vertex(tri,side,tris, Edge_side);
                }
                else if(num_tris_vertex < 5)
                {
                    printf("ERROR: tri_B_P2_polynomial_reconstruction_9edge_1, not enough tri at vertex\n");
                    clean_up(ERROR);
                }
                else
                    N_edge = Mag_p2_9edge_reverse_sten(tri,side,tris, Edge_side);

                if(rk_step == RK_STEP)
                {
                    for(i = 0; i < N_edge; i++)
                        Bavg[i] = fg_side_B(tris[i])[Edge_side[i]];
                }
                else
                {
                    for(i = 0; i < N_edge; i++)
                        Bavg[i] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                }
                solve_by_gj(tmpA,9,Bavg,XX[side+3]);
            }
            ////////////////////////////////////////
            /// END: reverse stencil reconstruction -------alltri_HR_sten != NULL && Bsten_set == YES
            ////////////////////////////////////////
        }

        if(alltri_HR_sten == NULL)
        {
            ////////////////////////////////////////
            /// Start: central stencil reconstruction ----alltri_HR_sten = NULL
            ////////////////////////////////////////
            if(YES == use_central_sten)
            {
                for(side = 0; side < 3; side++)
                {
                    count_num_tris_vertex(tri,Point_of_tri(tri)[side], &num_tris_vertex);
                    if(num_tris_vertex <= 5)
                    {
                        use_central_sten = NO;
                        break;
                    }
                }
            }

            if(YES == use_central_sten)
            {
                N_edge = Mag_p2_11edge_central_sten(tri, tris, Edge_side);
                // first setup least square eqn.
                for(i = 2; i < N_edge; i++)
                {
                    pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                    pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                    dx = pcrds[1][0] - pcrds[0][0];
                    dy = pcrds[1][1] - pcrds[0][1];
                    xx1 = pcrds[0][0] - cent[0];
                    yy1 = pcrds[0][1] - cent[1];

                    len = fg_length_side(tris[i])[Edge_side[i]];

                    A[i-2][0] = dy;
                    A[i-2][1] = dy*(xx1 + 0.5*dx);
                    A[i-2][2] = dy*(yy1 + 0.5*dy);
                    A[i-2][3] = dy*(sqr(xx1) + xx1*dx + sqr(dx)/3.0);
                    A[i-2][4] = dy*(xx1*yy1 + 0.5*xx1*dy +0.5*yy1*dx + dx*dy/3.0);
                    A[i-2][5] = dy*(sqr(yy1) + yy1*dy + sqr(dy)/3.0);

                    A[i-2][6] = -dx;    // b_0
                    A[i-2][7] = -dx*(xx1 + 0.5*dx);  // b_1
                    A[i-2][1] += dx*(yy1 + 0.5*dy);    // b_2 = - a_1
                    A[i-2][8] = -dx*(sqr(xx1) + xx1*dx + sqr(dx)/3.0);   // b_3
                    A[i-2][3] += 2.0*dx*(xx1*yy1 +0.5*xx1*dy + 0.5*yy1*dx +  dx*dy/3.0);  // b_4
                    A[i-2][4] += 0.5*dx*(sqr(yy1) + yy1*dy + sqr(dy)/3.0);  // b_5

                    for(j = 0; j < 9; j++)
                        A[i-2][j] /= len;
                }
                if(rk_step == RK_STEP)
                {
                    for(i = 2; i < N_edge; i++)
                        Bavg[i-2] = fg_side_B(tris[i])[Edge_side[i]];
                }
                else
                {
                    for(i = 2; i < N_edge; i++)
                        Bavg[i-2] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                }

                // Now setup constraint eqn.
                for(i = 0; i < 2; i++)
                {
                    pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                    pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                    dx = pcrds[1][0] - pcrds[0][0];
                    dy = pcrds[1][1] - pcrds[0][1];
                    xx1 = pcrds[0][0] - cent[0];
                    yy1 = pcrds[0][1] - cent[1];

                    len = fg_length_side(tris[i])[Edge_side[i]];

                    B[i][0] = dy;
                    B[i][1] = dy*(xx1 + 0.5*dx);
                    B[i][2] = dy*(yy1 + 0.5*dy);
                    B[i][3] = dy*(sqr(xx1) + xx1*dx + sqr(dx)/3.0);
                    B[i][4] = dy*(xx1*yy1 + 0.5*xx1*dy +0.5*yy1*dx + dx*dy/3.0);
                    B[i][5] = dy*(sqr(yy1) + yy1*dy + sqr(dy)/3.0);

                    B[i][6] = -dx;    // b_0
                    B[i][7] = -dx*(xx1 + 0.5*dx);  // b_1
                    B[i][1] += dx*(yy1 + 0.5*dy);    // b_2 = - a_1
                    B[i][8] = -dx*(sqr(xx1) + xx1*dx + sqr(dx)/3.0);   // b_3
                    B[i][3] += 2.0*dx*(xx1*yy1 +0.5*xx1*dy + 0.5*yy1*dx +  dx*dy/3.0);  // b_4
                    B[i][4] += 0.5*dx*(sqr(yy1) + yy1*dy + sqr(dy)/3.0);  // b_5

                    for(j = 0; j < 9; j++)
                        B[i][j] /= len;
                }
                if(rk_step == RK_STEP)
                {
                    for(i = 0; i < 2; i++)
                        BBavg[i] = fg_side_B(tris[i])[Edge_side[i]];
                }
                else
                {
                    for(i = 0; i < 2; i++)
                        BBavg[i] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                }
                //// Now constrained least square 
                M = 9; N = 9; P = 2; LDA = M; LDB = P; LWORK = M+N+P; 
                // N: number of unknown, M: number of least square eqn.
                // P: number of constraint eqn.
                l = 0;
                for(j = 0; j < N; j++)
                {
                    for(i = 0; i < M; i++)
                    {
                        AA[l] = A[i][j];
                        l++;
                    }
                }
                l = 0;
                for(j = 0; j < N; j++)
                {
                    for(i = 0; i < P; i++)
                    {
                        BB[l] = B[i][j];
                        l++;
                    }
                }
                for(i = 0; i < M; i++)  // right side for the least square part
                    CC[i] = Bavg[i];
                for(i = 0; i < P; i++)  // right side for the constrained part
                    DD[i] = BBavg[i];
                FORTRAN_NAME(dgglse)(&M, &N, &P, AA, &LDA, ///// int M,int N,int P, double array A, int LDA
                                     BB, &LDB, CC, DD, XX[6], ///// double array B, int LDB, double array C, D, X,
                                     work, &LWORK, &INFO);
            }
            ////////////////////////////////////////////////
            /* End of central stencil reconstruction ----alltri_HR_sten = NULL     */
            ////////////////////////////////////////////////

            ////////////////////////////////////////
            /// Start: one-sided stencil reconstruction ----alltri_HR_sten = NULL
            ////////////////////////////////////////
            for(side = 0; side < 3; side++)
            {
                ///// check 5 tris at vertex case
                nbtri = Tri_on_side(tri,side);
                for(tmp_side = 0; tmp_side < 3; tmp_side++)
                {
                    if(tri == Tri_on_side(nbtri,tmp_side))
                        break;
                }
                count_num_tris_vertex(nbtri,Point_of_tri(nbtri)[(tmp_side+2)%3], &num_tris_vertex);
                if(num_tris_vertex == 5)
                    N_edge = Mag_p2_9edge_sten_2(tri,side,tris, Edge_side);
                else if(num_tris_vertex < 5)
                {
                    printf("ERROR: tri_B_P2_polynomial_reconstruction_9edge_1,"
                        " not enough tri at vertex, one_sided case\n");
                    clean_up(ERROR);
                }
                else
                    N_edge = Mag_p2_9edge_sten_1(tri,side,tris, Edge_side);

                for(i = 0; i < N_edge; i++)
                {
                    pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                    pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);
    
                    dx = pcrds[1][0] - pcrds[0][0];
                    dy = pcrds[1][1] - pcrds[0][1];
                    xx1 = pcrds[0][0] - cent[0]; 
                    yy1 = pcrds[0][1] - cent[1]; 

                    len = fg_length_side(tris[i])[Edge_side[i]];

                    A[i][0] = dy;
                    A[i][1] = dy*(xx1 + 0.5*dx);
                    A[i][2] = dy*(yy1 + 0.5*dy);
                    A[i][3] = dy*(sqr(xx1) + xx1*dx + sqr(dx)/3.0);
                    A[i][4] = dy*(xx1*yy1 + 0.5*xx1*dy +0.5*yy1*dx + dx*dy/3.0);
                    A[i][5] = dy*(sqr(yy1) + yy1*dy + sqr(dy)/3.0);

                    A[i][6] = -dx;    // b_0
                    A[i][7] = -dx*(xx1 + 0.5*dx);  // b_1
                    A[i][1] += dx*(yy1 + 0.5*dy);    // b_2 = - a_1
                    A[i][8] = -dx*(sqr(xx1) + xx1*dx + sqr(dx)/3.0);   // b_3
                    A[i][3] += 2.0*dx*(xx1*yy1 +0.5*xx1*dy + 0.5*yy1*dx +  dx*dy/3.0);  // b_4
                    A[i][4] += 0.5*dx*(sqr(yy1) + yy1*dy + sqr(dy)/3.0);  // b_5

                    for(j = 0; j < 9; j++)
                        A[i][j] /= len;
 
                    //// TMP
                    /***
                    if(debug == YES)
                    {
                        printf("edge[%d], from (%g; %g) to (%g; %g) on tri[%d] with cent(%g, %g)\n", 
                                 i, pcrds[0][0], pcrds[0][1],  pcrds[1][0], pcrds[1][1], tris[i]->id,
                             fg_centroid(tris[i])[0], fg_centroid(tris[i])[1] );
                    }
                    ***/
                    //// END: TMP
                }

                if(rk_step == RK_STEP)
                {
                    for(i = 0; i < N_edge; i++)
                        Bavg[i] = fg_side_B(tris[i])[Edge_side[i]];
                }
                else
                {
                    for(i = 0; i < N_edge; i++)
                        Bavg[i] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                }

                solve_by_gj(A,9,Bavg,XX[side]);
            }
            ////////////////////////////////////////
            /// END: one-sided stencil reconstruction ----alltri_HR_sten = NULL
            ////////////////////////////////////////

            ////////////////////////////////////////
            /// Start: reverse stencil reconstruction ----alltri_HR_sten = NULL
            ////////////////////////////////////////
            for(side = 0; side < 3; side++)
            {
                count_num_tris_vertex(tri,Point_of_tri(tri)[(side+2)%3], &num_tris_vertex);
                if(num_tris_vertex == 5)
                {
                    N_edge = Mag_p2_9edge_reverse_sten_5pt_vertex(tri,side,tris, Edge_side);
                }
                else if(num_tris_vertex < 5)
                {
                    printf("ERROR: tri_B_P2_polynomial_reconstruction_9edge_1, not enough tri at vertex\n");
                    clean_up(ERROR);
                }
                else
                    N_edge = Mag_p2_9edge_reverse_sten(tri,side,tris, Edge_side);

                for(i = 0; i < N_edge; i++)
                {
                    pcrds[0] = Coords(Point_of_tri(tris[i])[Edge_side[i]]);
                    pcrds[1] = Coords(Point_of_tri(tris[i])[ (Edge_side[i] +1)%3 ]);

                    dx = pcrds[1][0] - pcrds[0][0];
                    dy = pcrds[1][1] - pcrds[0][1];
                    xx1 = pcrds[0][0] - cent[0];
                    yy1 = pcrds[0][1] - cent[1];

                    len = fg_length_side(tris[i])[Edge_side[i]];

                    A[i][0] = dy;
                    A[i][1] = dy*(xx1 + 0.5*dx);
                    A[i][2] = dy*(yy1 + 0.5*dy);
                    A[i][3] = dy*(sqr(xx1) + xx1*dx + sqr(dx)/3.0);
                    A[i][4] = dy*(xx1*yy1 + 0.5*xx1*dy +0.5*yy1*dx + dx*dy/3.0);
                    A[i][5] = dy*(sqr(yy1) + yy1*dy + sqr(dy)/3.0);

                    A[i][6] = -dx;    // b_0
                    A[i][7] = -dx*(xx1 + 0.5*dx);  // b_1
                    A[i][1] += dx*(yy1 + 0.5*dy);    // b_2 = - a_1
                    A[i][8] = -dx*(sqr(xx1) + xx1*dx + sqr(dx)/3.0);   // b_3
                    A[i][3] += 2.0*dx*(xx1*yy1 +0.5*xx1*dy + 0.5*yy1*dx +  dx*dy/3.0);  // b_4
                    A[i][4] += 0.5*dx*(sqr(yy1) + yy1*dy + sqr(dy)/3.0);  // b_5

                    for(j = 0; j < 9; j++)
                        A[i][j] /= len;
                }
                if(rk_step == RK_STEP)
                {
                    for(i = 0; i < N_edge; i++)
                        Bavg[i] = fg_side_B(tris[i])[Edge_side[i]];
                }
                else
                {
                    for(i = 0; i < N_edge; i++)
                        Bavg[i] = midsoln[tris[i]->id].edge_Bn[Edge_side[i]][rk_step];
                }
                solve_by_gj(A,9,Bavg,XX[side+3]);
            }
            ////////////////////////////////////////
            /// END: reverse stencil reconstruction ----alltri_HR_sten = NULL
            ////////////////////////////////////////
        }

        ///////////////////////////////////////////
        //// Start WENO
        ///////////////////////////////////////////

        if(YES == use_central_sten)
            N_STEN = 7;
        else
            N_STEN = 6;
        sum = 0.0;
        for(cv_indx = 0; cv_indx < N_STEN; cv_indx++)
        {
            for(i =0; i < MAX_N_COEF; i++) 
                ansx[cv_indx][i] = XX[cv_indx][i];
            ansy[cv_indx][0] = XX[cv_indx][6];          // b0
            ansy[cv_indx][1] = XX[cv_indx][7];          // b1
            ansy[cv_indx][2] = -ansx[cv_indx][1];       // b2
            ansy[cv_indx][3] = XX[cv_indx][8];          // b3
            ansy[cv_indx][4] = -2.0*ansx[cv_indx][3];   // b4
            ansy[cv_indx][5] = -0.5*ansx[cv_indx][4];   // b5

            OIx[cv_indx] = weno_weight_P2(tri, ansx[cv_indx]);
            OIy[cv_indx] = weno_weight_P2(tri, ansy[cv_indx]);
            tmp = (eps + OIx[cv_indx] + OIy[cv_indx]);
            if(6 == cv_indx)   /// central stencil
                // alpha[cv_indx] = 10.0/(quad(sqr(eps + OIx[cv_indx] + OIy[cv_indx])));
                alpha[cv_indx] = 10.0/(quad(tmp));
            else
                // alpha[cv_indx] = 1.0/(quad(sqr(eps + OIx[cv_indx] + OIy[cv_indx])));
                alpha[cv_indx] = 1.0/(quad(tmp));
            sum += alpha[cv_indx];
        }

        /***
        if(debug == YES)
        {
            for(cv_indx = 0; cv_indx < N_STEN; cv_indx++)
            {
                printf("++++++Sten[%d]+++++:::\n", cv_indx);
                verify_edge_B(tri,rk_step, midsoln, ansx[cv_indx], ansy[cv_indx]);
                printf("Bx coeff: ");
                for(i = 0; i < MAX_N_COEF; i++)
                    printf("%g ", ansx[cv_indx][i]);
                printf("\n");
                printf("By coeff: ");
                for(i = 0; i < MAX_N_COEF; i++)
                    printf("%g ", ansy[cv_indx][i]);
                printf("\n");
            }
        }
        ***/ 

        /***
        if(debug == YES)
        {
            for(cv_indx = 0; cv_indx < 3; cv_indx++)
            {
                printf("********On sten[%d], preliminary:\n", cv_indx);
                printf("Bx coeff: ");
                for(i = 0; i < MAX_N_COEF; i++)
                    printf("%g ", ansx[cv_indx][i]);
                printf("\n");
                printf("By coeff: ");
                for(i = 0; i < MAX_N_COEF; i++)
                    printf("%g ", ansy[cv_indx][i]);
                printf("\n");
            }
        } 
        ***/

        for(cv_indx = 0; cv_indx < N_STEN; cv_indx++)
        {
            wei[cv_indx] = alpha[cv_indx]/sum;
            // wei[cv_indx] = 1.0/3.0;
        }

        for(i = 0; i < MAX_N_COEF; i++)
        {
            MB[0][i] = MB[1][i] = 0.0;
        }

        for(cv_indx = 0; cv_indx < N_STEN; cv_indx++)
        {
            for(i = 0; i < MAX_N_COEF; i++)
            {
                MB[0][i] += wei[cv_indx]*ansx[cv_indx][i];
                MB[1][i] += wei[cv_indx]*ansy[cv_indx][i];
            }
        }

        for(i = 0; i < MAX_N_COEF; i++)
        {
            dg_B(st)[0][i] = MB[0][i];
            dg_B(st)[1][i] = MB[1][i];
        }

        Mag(st)[0] = Mag(st)[1] = 0.0;
        for(i= 0; i< MAX_N_COEF; i++)
        {
            Mag(st)[0] += dg_B(st)[0][i]*Lmass_matrix[0][i];
            Mag(st)[1] += dg_B(st)[1][i]*Lmass_matrix[0][i];
        }
        for(i = 0; i < 2; i++)
            Mag(st)[i] /= Lmass_matrix[0][0];

        /***
        if(debugging("blast_MHD"))
        {  
            if(debug)
            {
                printf("State after B-reconstruction\n");
                g_verbose_print_state(st);
            } 

            if(POLY_thermal_pressure_MHD(st) < 0.0)
            {
                for(i = 0; i < 2; i++)
                    Mag(st)[i] = cellavgB[i];
                {
                    if(debug)
                    {
                        printf("tri[%d] negative pressure\n", tri->id);
                    }
                }
                return NO;
            }
            else 
                return YES;
        }
        ***/

        if(debug == YES)
        {
            printf("++++++Weighted Combine+++++:::\n");
            verify_edge_B(tri,rk_step, midsoln, MB[0], MB[1]);

            printf("Bx coeff: ");
            for(i = 0; i < MAX_N_COEF; i++)
                printf("%g ", dg_B(st)[0][i]);
            printf("\n");
            printf("By coeff: ");
            for(i = 0; i < MAX_N_COEF; i++)
                printf("%g ", dg_B(st)[1][i]);
            printf("\n");
            // clean_up(0);
        }

        return YES;
}


LOCAL void verify_edge_B(
         TRI       *tri,
         int       rk_step,
         Mid_soln *midsoln,
         double    *Bx,
         double    *By)
{
         int       side, i;
         double    sum, dx, dy, xx1, yy1, len, *pcrds[3], Bn;
         double    *cent = fg_centroid(tri);
 
 
         if(MAX_N_COEF == 6)
         {
             for(side = 0; side < 3; side++)
             { 

                 pcrds[0] = Coords(Point_of_tri(tri)[side]);
                 pcrds[1] = Coords(Point_of_tri(tri)[ (side +1)%3 ]);

                 xx1 = pcrds[0][0] - cent[0];
                 yy1 = pcrds[0][1] - cent[1];

                 dy = pcrds[1][1] - pcrds[0][1];
                 dx = pcrds[1][0] - pcrds[0][0];
                 len = fg_length_side(tri)[side];

                 sum = dy*Bx[0] + (0.5*dy*dx + dy*xx1)*Bx[1] +
                                  (dy*(yy1 + 0.5*dy))*Bx[2] +
                      (dy*(sqr(xx1) + xx1*dx + sqr(dx)/3.0))*Bx[3] +
                      (dy*(xx1*yy1 + 0.5*xx1*dy +0.5*yy1*dx + dx*dy/3.0))*Bx[4] +
                      (dy*(sqr(yy1) + yy1*dy + sqr(dy)/3.0))*Bx[5] +
                        -dx*By[0] + (-dx*(xx1 + 0.5*dx))*By[1] +
                        (-dx*(yy1 + 0.5*dy))*By[2] +
                        (-dx*(sqr(xx1) + xx1*dx + sqr(dx)/3.0))*By[3] +
                         (-dx*(xx1*yy1 +0.5*xx1*dy + 0.5*yy1*dx +  dx*dy/3.0))*By[4] +
                         (-dx*(sqr(yy1) + yy1*dy + sqr(dy)/3.0))*By[5];

                  sum /= len;
                  printf("Side[%d], Compute Bn %15.13g ", side, sum); 
                  if(rk_step == RK_STEP)
                      printf("evolved Bn = %15.13g\n", fg_side_B(tri)[side]);
                  else
                      printf("evolved Bn = %15.13g\n", midsoln[tri->id].edge_Bn[side][rk_step]);

             }
         }
         else if(MAX_N_COEF == 3)
         {
             for(side = 0; side < 3; side++)
             {

                 pcrds[0] = Coords(Point_of_tri(tri)[side]);
                 pcrds[1] = Coords(Point_of_tri(tri)[ (side +1)%3 ]);

                 xx1 = pcrds[0][0] - cent[0];
                 yy1 = pcrds[0][1] - cent[1];

                 dy = pcrds[1][1] - pcrds[0][1];
                 dx = pcrds[1][0] - pcrds[0][0];
                 len = fg_length_side(tri)[side];

                 sum = dy*Bx[0] + (0.5*dy*dx + dy*xx1)*Bx[1] +
                                  (dy*(yy1 + 0.5*dy))*Bx[2] +
                        -dx*By[0] + (-dx*(xx1 + 0.5*dx))*By[1] +
                        (-dx*(yy1 + 0.5*dy))*By[2];

                  sum /= len;

                  if(rk_step == RK_STEP)
                      Bn = fg_side_B(tri)[side];
                  else
                      Bn = midsoln[tri->id].edge_Bn[side][rk_step];

                  if(fabs(sum - Bn) > 6.0e-13)
                  {
                      printf("ERROR: verify_edge_B(), Bn =  %15.13g, reconstruct Bn = %15.13g\n",
                                 Bn, sum);
                      clean_up(ERROR);
                  }
                  /**
                  printf("Side[%d], Compute Bn %15.13g ", side, sum);
                  if(rk_step == RK_STEP)
                      printf("evolved Bn = %15.13g\n", fg_side_B(tri)[side]);
                  else
                      printf("evolved Bn = %15.13g\n", midsoln[tri->id].edge_Bn[side][rk_step]);
                  **/
             }
         }
}


LOCAL int Mag_p1_3edge_central_sten_new(
         TRI       *tri,
         TRI       *tris[],
         int       *Edge_side)
{
         int       N_edge = 0, tmp_side, n_side, side, n_side2;
         TRI       *nbtri, *tmp_tri;

         /// edge 0
         N_edge = 0;
         tris[N_edge] = tri;
         Edge_side[N_edge] = 0;
         N_edge++;

         /// edge 1
         tris[N_edge] = tri;
         Edge_side[N_edge] = 1;
         N_edge++;

         /// edge 2
         tris[N_edge] = tri;
         Edge_side[N_edge] = 2;
         N_edge++;

         return N_edge;
}


LOCAL int Mag_p1_5edge_central_sten(
         TRI       *tri,
         TRI       *tris[],
         int       *Edge_side)
{
         int       N_edge = 0, tmp_side, n_side, side, n_side2;
         TRI       *nbtri, *tmp_tri;

         /// edge 0
         N_edge = 0;
         tris[N_edge] = tri;
         Edge_side[N_edge] = 0;
         N_edge++;

         /// edge 1
         tris[N_edge] = tri;
         Edge_side[N_edge] = 1;
         N_edge++;

         for(side = 0; side < 3; side++)
         {
             nbtri = Tri_on_side(tri,side);
             for(tmp_side = 0; tmp_side < 3; tmp_side++)
             {
                 if(tri == Tri_on_side(nbtri,tmp_side))
                     break;
             }
             /// edge 2
             tris[N_edge] = nbtri;
             Edge_side[N_edge] = (tmp_side+2)%3;
             N_edge++;
         }

         return N_edge;
}

LOCAL int Mag_p2_6edge_central_sten_new(
         TRI       *tri,
         TRI       *tris[],
         int       *Edge_side)
{
         int       N_edge = 0, tmp_side, n_side, side, n_side2;
         TRI       *nbtri, *tmp_tri;

         /// edge 0
         N_edge = 0;
         tris[N_edge] = tri;
         Edge_side[N_edge] = 0;
         N_edge++;

         /// edge 1
         tris[N_edge] = tri;
         Edge_side[N_edge] = 1;
         N_edge++;

         /// edge 2
         tris[N_edge] = tri;
         Edge_side[N_edge] = 2;
         N_edge++;

         for(side = 0; side < 3; side++)
         {
             nbtri = Tri_on_side(tri,side);
             for(tmp_side = 0; tmp_side < 3; tmp_side++)
             {
                 if(tri == Tri_on_side(nbtri,tmp_side))
                     break;
             }
             /// edge 3
             tris[N_edge] = nbtri;
             Edge_side[N_edge] = (tmp_side+2)%3;
             N_edge++;
         }

         return N_edge;
}

LOCAL int Mag_p2_5edge_reverse_sten_5pt_vertex_new(
         TRI       *tri,
         int       side,
         TRI       *tris[],
         int       *Edge_side)
{
         int       N_edge = 0, tmp_side, n_side, old_tmp_side, side3, side4, side5;
         TRI       *nbtri, *tmp_tri, *old_tmp_tri, *tri1, *tri3, *tri4, *tri5;

         /// edge 0 
         N_edge = 0;
         tris[N_edge] = tri;
         Edge_side[N_edge] = (side+1)%3;
         N_edge++;

         /// edge 1
         tris[N_edge] = tri;
         Edge_side[N_edge] = (side+2)%3;
         N_edge++;

         /// edge 2
         tris[N_edge] = tri;
         Edge_side[N_edge] = (side+3)%3;
         N_edge++;

         /// tri 1
         nbtri = Tri_on_side(tri,(side+2)%3);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }
         n_side = (tmp_side+1)%3;
         tri1 = tmp_tri = Tri_on_side(nbtri,n_side);

         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(nbtri == Tri_on_side(tmp_tri,tmp_side))
                 break;
         }

         /// edge 3
         tris[N_edge] = tmp_tri;
         Edge_side[N_edge] = tmp_side;
         N_edge++;

         /// tri 2
         nbtri = Tri_on_side(tri,(side+1)%3);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }
         n_side = (tmp_side+2)%3;
         tmp_tri = Tri_on_side(nbtri,n_side);

         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(nbtri == Tri_on_side(tmp_tri,tmp_side))
                 break;
         }

         //// edge 4
         tris[N_edge] = tmp_tri;
         Edge_side[N_edge] = tmp_side;
         N_edge++;

         return N_edge;

         // OLD version

         /// edge 0 
         N_edge = 0;
         tris[N_edge] = tri;
         Edge_side[N_edge] = (side+1)%3;
         N_edge++;

         /// edge 1
         tris[N_edge] = tri;
         Edge_side[N_edge] = (side+2)%3;
         N_edge++;

         /// edge 2
         tris[N_edge] = tri;
         Edge_side[N_edge] = (side+3)%3;
         N_edge++;

         /// tri 1
         nbtri = Tri_on_side(tri,(side+2)%3);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }
         n_side = (tmp_side+1)%3;
         tri1 = tmp_tri = Tri_on_side(nbtri,n_side);

         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(nbtri == Tri_on_side(tmp_tri,tmp_side))
                 break;
         }

         /// edge 2
         tris[N_edge] = tmp_tri;
         Edge_side[N_edge] = (tmp_side+1)%3;
         N_edge++;

         /// edge 3
         tris[N_edge] = tmp_tri;
         Edge_side[N_edge] = (tmp_side+2)%3;
         N_edge++;

         /// tri 2
         nbtri = Tri_on_side(tri,(side+1)%3);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }
         n_side = (tmp_side+2)%3;
         tmp_tri = Tri_on_side(nbtri,n_side);

         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(nbtri == Tri_on_side(tmp_tri,tmp_side))
                 break;
         }

         //// edge 4
         tris[N_edge] = tmp_tri;
         Edge_side[N_edge] = (tmp_side+1)%3;
         N_edge++;

         return N_edge;
}


LOCAL int Mag_p1_5edge_reverse_sten_5pt_vertex(
         TRI       *tri,
         int       side,
         TRI       *tris[],
         int       *Edge_side)
{
         int       N_edge = 0, tmp_side, n_side, old_tmp_side, side3, side4, side5;
         TRI       *nbtri, *tmp_tri, *old_tmp_tri, *tri1, *tri3, *tri4, *tri5;

         /// edge 0 
         N_edge = 0;
         tris[N_edge] = tri;
         Edge_side[N_edge] = (side+1)%3;
         N_edge++;

         /// edge 1
         tris[N_edge] = tri;
         Edge_side[N_edge] = (side+2)%3;
         N_edge++;

         /// tri 1
         nbtri = Tri_on_side(tri,(side+2)%3);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }
         n_side = (tmp_side+1)%3;
         tri1 = tmp_tri = Tri_on_side(nbtri,n_side);

         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(nbtri == Tri_on_side(tmp_tri,tmp_side))
                 break;
         }

         /// edge 2
         tris[N_edge] = tmp_tri;
         Edge_side[N_edge] = (tmp_side+1)%3;
         N_edge++;

         /// edge 3
         tris[N_edge] = tmp_tri;
         Edge_side[N_edge] = (tmp_side+2)%3;
         N_edge++;

         /// tri 2
         nbtri = Tri_on_side(tri,(side+1)%3);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }
         n_side = (tmp_side+2)%3;
         tmp_tri = Tri_on_side(nbtri,n_side);

         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(nbtri == Tri_on_side(tmp_tri,tmp_side))
                 break;
         }

         //// edge 4
         tris[N_edge] = tmp_tri;
         Edge_side[N_edge] = (tmp_side+1)%3;
         N_edge++;
 
         return N_edge;
}

LOCAL int Mag_p2_9edge_reverse_sten_5pt_vertex(
         TRI       *tri,
         int       side,
         TRI       *tris[],
         int       *Edge_side)
{
         int       N_edge = 0, tmp_side, n_side, old_tmp_side, side3, side4, side5;
         TRI       *nbtri, *tmp_tri, *old_tmp_tri, *tri1, *tri3, *tri4, *tri5;

         /// edge 0 
         N_edge = 0;
         tris[N_edge] = tri;
         Edge_side[N_edge] = (side+1)%3;
         N_edge++;

         /// edge 1
         tris[N_edge] = tri;
         Edge_side[N_edge] = (side+2)%3;
         N_edge++;

         /// tri 1
         nbtri = Tri_on_side(tri,(side+2)%3);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }
         n_side = (tmp_side+1)%3;
         tri1 = tmp_tri = Tri_on_side(nbtri,n_side);

         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(nbtri == Tri_on_side(tmp_tri,tmp_side))
                 break;
         }

         /// edge 2
         tris[N_edge] = tmp_tri;
         Edge_side[N_edge] = (tmp_side+1)%3;
         N_edge++;

         /// edge 3
         tris[N_edge] = tmp_tri;
         Edge_side[N_edge] = (tmp_side+2)%3;
         N_edge++;

         /// tri 2
         nbtri = Tri_on_side(tri,(side+1)%3);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }
         n_side = (tmp_side+2)%3;
         tmp_tri = Tri_on_side(nbtri,n_side);

         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(nbtri == Tri_on_side(tmp_tri,tmp_side))
                 break;
         }

         //// edge 4
         tris[N_edge] = tmp_tri;
         Edge_side[N_edge] = (tmp_side+1)%3;
         N_edge++;

         //// tri 3
         tri3 = nbtri = Tri_on_side(tmp_tri,(tmp_side+1)%3);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(tmp_tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }

         //// edge 5
         tris[N_edge] = nbtri;
         Edge_side[N_edge] = (tmp_side+2)%3;
         N_edge++;
         side3 = (tmp_side+1)%3;

         //// tri 4
         tri4 = tmp_tri = Tri_on_side(nbtri,(tmp_side+2)%3);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(nbtri == Tri_on_side(tmp_tri,tmp_side))
                 break;
         }

         //// edge 6
         tris[N_edge] = tmp_tri;
         Edge_side[N_edge] = (tmp_side+2)%3;
         N_edge++;
         side4 = (tmp_side+1)%3;


         //// tri 5
         tri5 = nbtri = Tri_on_side(tmp_tri,(tmp_side+2)%3);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(tmp_tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }
         side5 = (tmp_side+1)%3;

         if(tri5 != tri1)
         {
             //// edge 7
             tris[N_edge] = nbtri;
             Edge_side[N_edge] = (tmp_side+2)%3;
             N_edge++;

             nbtri = Tri_on_side(tri3,side3);
             for(tmp_side = 0; tmp_side < 3; tmp_side++)
             {
                 if(tri3 == Tri_on_side(nbtri,tmp_side))
                     break;
             }
             //// edge 8
             tris[N_edge] = nbtri;
             Edge_side[N_edge] = (tmp_side+2)%3;
             N_edge++;

             /***
             nbtri = Tri_on_side(tri4,side4);
              for(tmp_side = 0; tmp_side < 3; tmp_side++)
             {
                 if(tri4 == Tri_on_side(nbtri,tmp_side))
                     break;
             }
             //// edge 9
             tris[N_edge] = nbtri;
             Edge_side[N_edge] = (tmp_side+1)%3;
             N_edge++;
             ***/
         }
         else 
         {
             //// edge 7
             nbtri = Tri_on_side(tri5,side5);
             for(tmp_side = 0; tmp_side < 3; tmp_side++)
             {
                 if(tri5 == Tri_on_side(nbtri,tmp_side))
                     break;
             }
             tris[N_edge] = nbtri;
             Edge_side[N_edge] = (tmp_side+1)%3;
             N_edge++;

             nbtri = Tri_on_side(tri3,side3);
             for(tmp_side = 0; tmp_side < 3; tmp_side++)
             {
                 if(tri3 == Tri_on_side(nbtri,tmp_side))
                     break;
             }
             //// edge 8
             tris[N_edge] = nbtri;
             Edge_side[N_edge] = (tmp_side+2)%3;
             N_edge++;

             /****
             nbtri = Tri_on_side(tri4,side4);
              for(tmp_side = 0; tmp_side < 3; tmp_side++)
             {
                 if(tri4 == Tri_on_side(nbtri,tmp_side))
                     break;
             }
             //// edge 9
             tris[N_edge] = nbtri;
             Edge_side[N_edge] = (tmp_side+1)%3;
             N_edge++;
             ***/
         }

         return N_edge;
}

//// return 5 edges.
LOCAL int Mag_p2_5edge_reverse_sten_new(
         TRI       *tri,
         int       side,
         TRI       *tris[],
         int       *Edge_side)
{
         int       N_edge = 0, tmp_side, n_side, old_tmp_side;
         TRI       *nbtri, *tmp_tri, *old_tmp_tri;

         /// edge 0 
         N_edge = 0;
         tris[N_edge] = tri;
         Edge_side[N_edge] = (side+1)%3;
         N_edge++;

         /// edge 1
         tris[N_edge] = tri;
         Edge_side[N_edge] = (side+2)%3;
         N_edge++;

         /// edge 2
         tris[N_edge] = tri;
         Edge_side[N_edge] = (side+3)%3;
         N_edge++;

         /// tri 1
         nbtri = Tri_on_side(tri,(side+2)%3);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }
         n_side = (tmp_side+1)%3;
         tmp_tri = Tri_on_side(nbtri,n_side);

         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(nbtri == Tri_on_side(tmp_tri,tmp_side))
                 break;
         }
         /// edge 2
         tris[N_edge] = tmp_tri;
         Edge_side[N_edge] = tmp_side;
         N_edge++;

         /// tri 3
         nbtri = Tri_on_side(tri,(side+1)%3);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }
         n_side = (tmp_side+2)%3;
         tmp_tri = Tri_on_side(nbtri,n_side);

         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(nbtri == Tri_on_side(tmp_tri,tmp_side))
                 break;
         }

         tris[N_edge] = tmp_tri;
         Edge_side[N_edge] = tmp_side;
         N_edge++;

         return N_edge;
}

/// return 6 edges.
LOCAL int Mag_p2_6edge_reverse_sten_new(
         TRI       *tri,
         int       side,
         TRI       *tris[],
         int       *Edge_side)
{
         int       N_edge = 0, tmp_side, n_side, old_tmp_side;
         TRI       *nbtri, *tmp_tri, *old_tmp_tri;

         /// edge 0 
         N_edge = 0;
         tris[N_edge] = tri;
         Edge_side[N_edge] = (side+1)%3;
         N_edge++;

         /// edge 1
         tris[N_edge] = tri;
         Edge_side[N_edge] = (side+2)%3;
         N_edge++;

         /// edge 2
         tris[N_edge] = tri;
         Edge_side[N_edge] = (side+3)%3;
         N_edge++;

         /// tri 1
         nbtri = Tri_on_side(tri,(side+2)%3);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }
         n_side = (tmp_side+1)%3;
         tmp_tri = Tri_on_side(nbtri,n_side);

         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(nbtri == Tri_on_side(tmp_tri,tmp_side))
                 break;
         }
         /// edge 2
         tris[N_edge] = tmp_tri;
         Edge_side[N_edge] = (tmp_side+1)%3;
         N_edge++;

         /// edge 3
         tris[N_edge] = tmp_tri;
         Edge_side[N_edge] = (tmp_side+2)%3;
         N_edge++;

         /// tri 3
         nbtri = Tri_on_side(tri,(side+1)%3);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }
         n_side = (tmp_side+2)%3;
         tmp_tri = Tri_on_side(nbtri,n_side);

         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(nbtri == Tri_on_side(tmp_tri,tmp_side))
                 break;
         }

         /// edge 4 --- match with figures for P2 reverse stencil B field reconstruciton
         tris[N_edge] = tmp_tri;
         Edge_side[N_edge] = (tmp_side+2)%3;
         N_edge++;

         //// edge 5
         // tris[N_edge] = tmp_tri;
         // Edge_side[N_edge] = (tmp_side+1)%3;
         // N_edge++;

         return N_edge;
}


//// return 6 edges.
LOCAL int Mag_p1_5edge_reverse_sten(
         TRI       *tri,
         int       side,
         TRI       *tris[],
         int       *Edge_side)
{
         int       N_edge = 0, tmp_side, n_side, old_tmp_side;
         TRI       *nbtri, *tmp_tri, *old_tmp_tri;

         /// edge 0 
         N_edge = 0;
         tris[N_edge] = tri;
         Edge_side[N_edge] = (side+1)%3;
         N_edge++;

         /// edge 1
         tris[N_edge] = tri;
         Edge_side[N_edge] = (side+2)%3;
         N_edge++;

         /// tri 1
         nbtri = Tri_on_side(tri,(side+2)%3);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }
         n_side = (tmp_side+1)%3;
         tmp_tri = Tri_on_side(nbtri,n_side);

         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(nbtri == Tri_on_side(tmp_tri,tmp_side))
                 break;
         }

         /// edge 2
         tris[N_edge] = tmp_tri;
         Edge_side[N_edge] = (tmp_side+1)%3;
         N_edge++;

         /// edge 3
         tris[N_edge] = tmp_tri;
         Edge_side[N_edge] = (tmp_side+2)%3;
         N_edge++;

         /// tri 3
         nbtri = Tri_on_side(tri,(side+1)%3);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }
         n_side = (tmp_side+2)%3;
         tmp_tri = Tri_on_side(nbtri,n_side);

         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(nbtri == Tri_on_side(tmp_tri,tmp_side))
                 break;
         }

         /// edge 4 --- match with figures for P2 reverse stencil B field reconstruciton
         tris[N_edge] = tmp_tri;
         Edge_side[N_edge] = (tmp_side+2)%3;
         N_edge++;

         //// edge 5
         // tris[N_edge] = tmp_tri;
         // Edge_side[N_edge] = (tmp_side+1)%3;
         // N_edge++;

         return N_edge;
}

LOCAL int Mag_p2_9edge_reverse_sten(
         TRI       *tri,   
         int       side, 
         TRI       *tris[],
         int       *Edge_side)
{
         int       N_edge = 0, tmp_side, n_side, old_tmp_side;
         TRI       *nbtri, *tmp_tri, *old_tmp_tri;

         /// edge 0 
         N_edge = 0; 
         tris[N_edge] = tri;
         Edge_side[N_edge] = (side+1)%3;
         N_edge++;

         /// edge 1
         tris[N_edge] = tri;
         Edge_side[N_edge] = (side+2)%3;
         N_edge++;

         /// tri 1
         nbtri = Tri_on_side(tri,(side+2)%3);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }
         n_side = (tmp_side+1)%3;
         tmp_tri = Tri_on_side(nbtri,n_side);

         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(nbtri == Tri_on_side(tmp_tri,tmp_side))
                 break; 
         }
         
         /// edge 2
         tris[N_edge] = tmp_tri;
         Edge_side[N_edge] = (tmp_side+1)%3;
         N_edge++;

         /// edge 3
         tris[N_edge] = tmp_tri;
         Edge_side[N_edge] = (tmp_side+2)%3;
         N_edge++;

         /// tri 2
         n_side = (tmp_side+1)%3;
         nbtri = Tri_on_side(tmp_tri,n_side);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(tmp_tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }
         old_tmp_side = tmp_side;
         old_tmp_tri = nbtri;

         /// tri 3
         nbtri = Tri_on_side(tri,(side+1)%3);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }
         n_side = (tmp_side+2)%3;
         tmp_tri = Tri_on_side(nbtri,n_side);

         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(nbtri == Tri_on_side(tmp_tri,tmp_side))
                 break;
         }
 
         //// edge 4
         tris[N_edge] = tmp_tri;
         Edge_side[N_edge] = (tmp_side+1)%3;
         N_edge++;

         /// edge 5
         tris[N_edge] = tmp_tri;
         Edge_side[N_edge] = (tmp_side+2)%3;
         N_edge++;

         //// tri 4
         n_side = (old_tmp_side+2)%3;
         nbtri = Tri_on_side(old_tmp_tri,n_side);

         //// tri 5
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(old_tmp_tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }

         tmp_tri = Tri_on_side(nbtri,(tmp_side+2)%3);
         old_tmp_side = tmp_side;
         
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(nbtri == Tri_on_side(tmp_tri,tmp_side))
                 break;
         }

         /// edge 6
         tris[N_edge] = tmp_tri;
         Edge_side[N_edge] = (tmp_side+2)%3;
         N_edge++;

         /// edge 7
         tris[N_edge] = tmp_tri;
         Edge_side[N_edge] = tmp_side;
         N_edge++;

         //// tri 6
         tmp_tri = Tri_on_side(nbtri,(old_tmp_side+1)%3);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(nbtri == Tri_on_side(tmp_tri,tmp_side))
                 break;
         }

         //// edge 8
         tris[N_edge] = tmp_tri;
         Edge_side[N_edge] = (tmp_side+1)%3;
         N_edge++;
 
         return N_edge;
}

EXPORT int count_num_tris_vertex(
        TRI      *tri,
        POINT    *pt,
        int      *N)
{
        int      side, tri_N = 0, n, tmp_side, old_side;
        TRI      *curr_t, *next_t, *tris[50];
        TRI      *last_clock_t;

        for(side = 0; side < 3; side++)
        {
            if(pt == Point_of_tri(tri)[side])
                break;
        }

        if(side == 3)
        {
            printf("ERROR: collect_tris_at_vert(), pt not on tri\n");
            clean_up(ERROR);
        }

        old_side = side;
        tris[tri_N] = tri;
        tri_N++;

        //// first do clockwise direction
        curr_t = tri;
        for(; ;)
        {
            next_t = Tri_on_side(curr_t,side);
            if(next_t == NULL || next_t == tri)
                break;

            tris[tri_N] = next_t;
            tri_N++;

            for(tmp_side = 0; tmp_side < 3; tmp_side++)
            {
                if(curr_t == Tri_on_side(next_t,tmp_side))
                    break;
            }

            side = (tmp_side+1)%3;
            curr_t = next_t;
        }

        *N = tri_N;
        if(next_t == NULL)
        {
            last_clock_t = tris[tri_N-1];

            /// now do counter-clockwise direction to get tri the other side.
            curr_t = tri;
            side = (old_side+2)%3;
            for(; ;)
            {
                next_t = Tri_on_side(curr_t,side);
                if(next_t == NULL || next_t == last_clock_t)
                    break;

                tris[tri_N] = next_t;
                tri_N++;

                for(tmp_side = 0; tmp_side < 3; tmp_side++)
                {
                    if(curr_t == Tri_on_side(next_t,tmp_side))
                        break;
                }
                side = (tmp_side+2)%3;
                curr_t = next_t;
            }
            *N = tri_N;

            return NO;
        }
        return YES;
}

// J.C.P. 1998, 144, 194-212. WENO schemes for the interpolation of mean values
// on unstructured grids.  use OI(P)_4
EXPORT double weno_weight_P2(
        TRI     *tri,
        double  *u)
{
        double  **Lmass_matrix = tri->Lmass_matrix;
        double  diam = fg_diam(tri);
        double  tmp1, tmp2, tmp3, tmp4, tmp5;
 
        /// differentiate w.r.t. x
        tmp1 = sqr(u[1])*Lmass_matrix[0][0] + 4.0*sqr(u[3])*Lmass_matrix[0][3] + sqr(u[4])*Lmass_matrix[0][5] + 
               4.0*u[1]*u[3]*Lmass_matrix[0][1] + 2.0*u[1]*u[4]*Lmass_matrix[0][2] + 4.0*u[3]*u[4]*Lmass_matrix[0][4];
        tmp1 /= sqr(diam);

        /// differentiate w.r.t. y
        tmp2 = sqr(u[2])*Lmass_matrix[0][0] + sqr(u[4])*Lmass_matrix[0][3] + 4.0*sqr(u[5])*Lmass_matrix[0][5] +
               2.0*u[2]*u[4]*Lmass_matrix[0][1] + 4.0*u[2]*u[5]*Lmass_matrix[0][2] + 4.0*u[4]*u[5]*Lmass_matrix[0][4];
        tmp2 /= sqr(diam);

        /// diff. w.r.t. xx
        tmp3 = 4.0*sqr(u[3])*Lmass_matrix[0][0];

        /// diff. w.r.t. yy
        tmp4 = 4.0*sqr(u[5])*Lmass_matrix[0][0];

        /// diff. w.r.t. xy
        tmp5 = sqr(u[4])*Lmass_matrix[0][0];

        return sqrt(tmp1 + tmp2 + tmp3 + tmp4 + tmp5);
}

//// case 1: 7 edges
LOCAL int Mag_p2_edge_sten(
         TRI       *tri,   
         int       side, 
         TRI       *tris[],
         int       *Edge_side)
{
         int       N_edge = 0, tmp_side, n_side;
         TRI       *nbtri, *tmp_tri;

         /// edge 0
         N_edge = 0; 
         tris[N_edge] = tri;
         Edge_side[N_edge] = side;
         N_edge++;

         /// edge 1
         tris[N_edge] = tri;
         Edge_side[N_edge] = (side+1)%3;
         N_edge++;

         nbtri = Tri_on_side(tri,side);

         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }
         
         /// edge 2
         tris[N_edge] = nbtri;
         Edge_side[N_edge] =(tmp_side+1)%3;
         N_edge++;
         n_side = (tmp_side+1)%3;
 
         tmp_tri = Tri_on_side(nbtri,n_side);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(nbtri == Tri_on_side(tmp_tri,tmp_side))
                 break;
         }

         /// edge 3
         tris[N_edge] = tmp_tri;
         Edge_side[N_edge] =(tmp_side+2)%3;
         N_edge++;
         n_side = (tmp_side+2)%3;

         nbtri = Tri_on_side(tmp_tri,n_side);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(tmp_tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }

         /// edge 4
         tris[N_edge] = nbtri;
         Edge_side[N_edge] =(tmp_side+1)%3;
         N_edge++;

         /// edge 5
         nbtri = Tri_on_side(tri,side);

         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }
         n_side = (tmp_side+2)%3;
         tmp_tri = Tri_on_side(nbtri,n_side);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(nbtri == Tri_on_side(tmp_tri,tmp_side))
                 break;
         }
         tris[N_edge] = tmp_tri;
         Edge_side[N_edge] =(tmp_side+2)%3;
         N_edge++;
         n_side = (tmp_side+1)%3;

         /// edge 6
         nbtri = Tri_on_side(tmp_tri,n_side);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(tmp_tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }
         tris[N_edge] = nbtri;
         Edge_side[N_edge] =(tmp_side+2)%3;
         N_edge++;

         return N_edge; 
}

LOCAL int Mag_p2_11edge_central_sten(
         TRI       *tri,
         TRI       *tris[],
         int       *Edge_side)
{
         int       N_edge = 0, tmp_side, n_side, side, n_side2;
         TRI       *nbtri, *tmp_tri;

         /// edge 0
         N_edge = 0;
         tris[N_edge] = tri;
         Edge_side[N_edge] = 0;
         N_edge++;
         
         /// edge 1
         tris[N_edge] = tri;
         Edge_side[N_edge] = 1;
         N_edge++;

         for(side = 0; side < 3; side++)
         {
             nbtri = Tri_on_side(tri,side);
             for(tmp_side = 0; tmp_side < 3; tmp_side++)
             {
                 if(tri == Tri_on_side(nbtri,tmp_side))
                     break;
             }
             /// edge 2
             tris[N_edge] = nbtri;
             Edge_side[N_edge] = (tmp_side+2)%3;
             N_edge++;
             n_side = (tmp_side+2)%3;
             n_side2 = (tmp_side+1)%3;

             /// edge 3
             tmp_tri = Tri_on_side(nbtri,n_side);
             for(tmp_side = 0; tmp_side < 3; tmp_side++)
             {
                 if(nbtri == Tri_on_side(tmp_tri,tmp_side))
                     break;
             }
             tris[N_edge] = tmp_tri;
             Edge_side[N_edge] = (tmp_side+2)%3;
             N_edge++;
 
             /// edge 4
             tmp_tri = Tri_on_side(nbtri,n_side2);
             for(tmp_side = 0; tmp_side < 3; tmp_side++)
             {
                 if(nbtri == Tri_on_side(tmp_tri,tmp_side))
                     break;
             }
             tris[N_edge] = tmp_tri;
             Edge_side[N_edge] = (tmp_side+1)%3;
             N_edge++;
         }

printf("N_edge=%d\n",N_edge);

         return N_edge;
}

//// 9 edges-case 1, one-sided case
LOCAL int Mag_p2_9edge_sten_1(
         TRI       *tri,   
         int       side, 
         TRI       *tris[],
         int       *Edge_side)
{
         int       N_edge = 0, tmp_side, n_side;
         TRI       *nbtri, *tmp_tri;

         /// edge 0
         N_edge = 0; 
         tris[N_edge] = tri;
         Edge_side[N_edge] = side;
         N_edge++;

         /// edge 1
         tris[N_edge] = tri;
         Edge_side[N_edge] = (side+1)%3;
         N_edge++;

         nbtri = Tri_on_side(tri,side);

         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }
         
         /// edge 2
         tris[N_edge] = nbtri;
         Edge_side[N_edge] =(tmp_side+1)%3;
         N_edge++;
         n_side = (tmp_side+1)%3;
 
         tmp_tri = Tri_on_side(nbtri,n_side);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(nbtri == Tri_on_side(tmp_tri,tmp_side))
                 break;
         }

         /// edge 3
         tris[N_edge] = tmp_tri;
         Edge_side[N_edge] =(tmp_side+2)%3;
         N_edge++;
         n_side = (tmp_side+2)%3;

         nbtri = Tri_on_side(tmp_tri,n_side);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(tmp_tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }

         /// edge 4
         tris[N_edge] = nbtri;
         Edge_side[N_edge] =(tmp_side+1)%3;
         N_edge++;
         n_side = (tmp_side+1)%3;

         /// edge 5 ---------
         tmp_tri = Tri_on_side(nbtri,n_side);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(nbtri == Tri_on_side(tmp_tri,tmp_side))
                 break;
         }

         tris[N_edge] = tmp_tri;
         Edge_side[N_edge] =(tmp_side+1)%3;
         N_edge++;

         /// edge 6
         nbtri = Tri_on_side(tri,side);

         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }
         n_side = (tmp_side+2)%3;
         tmp_tri = Tri_on_side(nbtri,n_side);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(nbtri == Tri_on_side(tmp_tri,tmp_side))
                 break;
         }
         tris[N_edge] = tmp_tri;
         Edge_side[N_edge] =(tmp_side+2)%3;
         N_edge++;
         n_side = (tmp_side+1)%3;

         /// edge 7
         nbtri = Tri_on_side(tmp_tri,n_side);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(tmp_tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }
         tris[N_edge] = nbtri;
         Edge_side[N_edge] =(tmp_side+2)%3;
         N_edge++;
         n_side = (tmp_side+2)%3;

         /// edge 8
         tmp_tri = Tri_on_side(nbtri,n_side);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(nbtri == Tri_on_side(tmp_tri,tmp_side))
                 break;
         }
         tris[N_edge] = tmp_tri;
         Edge_side[N_edge] =(tmp_side+2)%3;
         N_edge++; 

         return N_edge; 
}

//// 9 edges-case 2, one-sided case
LOCAL int Mag_p2_9edge_sten_2(
         TRI       *tri,   
         int       side, 
         TRI       *tris[],
         int       *Edge_side)
{
         int       N_edge = 0, tmp_side, n_side;
         TRI       *nbtri, *tmp_tri, *tri9;

         /// edge 0
         N_edge = 0; 
         tris[N_edge] = tri;
         Edge_side[N_edge] = side;
         N_edge++;

         /// edge 1
         tris[N_edge] = tri;
         Edge_side[N_edge] = (side+1)%3;
         N_edge++;

         nbtri = Tri_on_side(tri,side);

         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }
         
         /// edge 2
         tris[N_edge] = nbtri;
         Edge_side[N_edge] =(tmp_side+1)%3;
         N_edge++;
         n_side = (tmp_side+1)%3;
 
         tmp_tri = Tri_on_side(nbtri,n_side);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(nbtri == Tri_on_side(tmp_tri,tmp_side))
                 break;
         }

         /// edge 3
         tris[N_edge] = tmp_tri;
         Edge_side[N_edge] =(tmp_side+2)%3;
         N_edge++;
         n_side = (tmp_side+2)%3;

         nbtri = Tri_on_side(tmp_tri,n_side);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(tmp_tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }

         /// edge 4
         tris[N_edge] = nbtri;
         Edge_side[N_edge] =(tmp_side+1)%3;
         N_edge++;
         n_side = (tmp_side+1)%3;

         /// edge 5 ---------
         tmp_tri = Tri_on_side(nbtri,n_side);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(nbtri == Tri_on_side(tmp_tri,tmp_side))
                 break;
         }

         tris[N_edge] = tmp_tri;
         Edge_side[N_edge] =(tmp_side+1)%3;
         N_edge++;

         /// edge 6
         nbtri = Tri_on_side(tri,side);

         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }
         n_side = (tmp_side+2)%3;
         tmp_tri = Tri_on_side(nbtri,n_side);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(nbtri == Tri_on_side(tmp_tri,tmp_side))
                 break;
         }
         tris[N_edge] = tmp_tri;
         Edge_side[N_edge] =(tmp_side+2)%3;
         N_edge++;
         n_side = (tmp_side+1)%3;

         /// TRI 7
         nbtri = Tri_on_side(tmp_tri,n_side);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(tmp_tri == Tri_on_side(nbtri,tmp_side))
                 break;
         }
         // tris[N_edge] = nbtri;
         // Edge_side[N_edge] =(tmp_side+2)%3;
         // N_edge++;
         n_side = (tmp_side+2)%3;

         /// edge 7 on TRI8
         tmp_tri = Tri_on_side(nbtri,n_side);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(nbtri == Tri_on_side(tmp_tri,tmp_side))
                 break;
         }
         tris[N_edge] = tmp_tri;
         Edge_side[N_edge] =(tmp_side+2)%3;
         N_edge++; 
         n_side = (tmp_side+2)%3;

         /// edge 8 on TRI9
         tri9 = nbtri = Tri_on_side(tmp_tri,n_side);
         for(tmp_side = 0; tmp_side < 3; tmp_side++)
         {
             if(tmp_tri == Tri_on_side(tri9,tmp_side))
                 break;
         }
         tris[N_edge] = tri9;
         Edge_side[N_edge] =(tmp_side+2)%3;
         N_edge++;

         return N_edge; 
}

//// Construct poly by constraint least square.
//// Conservation is only enforced on tri and its neighbors. 
//// On other tries, conservation is enforced by least square.
LOCAL void tri_comput_P2_polynomials_from_avg(
         TRI       *tri,   
         TRI       *tris[],
         int       nn_num,
         Mid_soln *midsoln,  
         Limiting_store **limit_store,
         int      rk_step)
{
        TRI       *Bnbtri[3];
        double    Ab[4][20], Bb[4][4], A[20][MAX_N_COEF], B[4][MAX_N_COEF]; /// B[#cv][mass_matrix_term for each coeff]
        double    AA[300], BB[300], CC[300], DD[300], XX[300], work[300], tmp;
        static double   **ALmass_matrix = NULL;
        int       dim = 2, i, j, k, M, N, P, l, LDA, LDB, LWORK, INFO, indx, debug = NO, cv_indx;
        Locstate  st, nbst[3], st2;
        double          **Lmass_matrix;

        if(ALmass_matrix == NULL)
            matrix(&(ALmass_matrix), 6, MAX_N_COEF, sizeof(double));

        /***
        if(tri->id == 20)
        {
            printf("tri[%d] enter tri_comput_P2_polynomials_from_avg()\n", tri->id);
            debug = YES;
        }
        ***/

        for(i = 0; i < 3; i++)
            Bnbtri[i] = Tri_on_side(tri,i);

        //// first assemble far neighboring tris, which satisfy conservation in the least square sense.
        for(cv_indx = 0; cv_indx < nn_num; cv_indx++)
        {
             comp_mass_matrix_1st_row(MAX_N_COEF,tris[cv_indx],dim,fg_centroid(tri),mass_1st_row);
             for(i = 0; i < MAX_N_COEF; i++)
                 ALmass_matrix[cv_indx][i] = mass_1st_row[0][i];
             for(i = 0; i < MAX_N_COEF; i++)
                 A[cv_indx][i] = ALmass_matrix[cv_indx][i]/ALmass_matrix[cv_indx][0];
        }
      
        //// set constraint eqns
        Lmass_matrix = tri->Lmass_matrix;
        for(i = 0; i < MAX_N_COEF; i++)
            B[0][i] = Lmass_matrix[0][i]/Lmass_matrix[0][0];
        for(cv_indx = 0; cv_indx < 3; cv_indx++)
        {
             comp_mass_matrix_1st_row(MAX_N_COEF,Bnbtri[cv_indx],dim,fg_centroid(tri),mass_1st_row);
             for(i = 0; i < MAX_N_COEF; i++)
                 B[cv_indx+1][i] = mass_1st_row[0][i]/mass_1st_row[0][0];
        }

        /***
        if(rk_step == RK_STEP)
        {
            st = tri->st;
            for(i = 0; i < 3; i++)
                nbst[i] = Bnbtri[i]->st;
        }
        else
        {
            st = midsoln[tri->id].st[rk_step];
            for(i = 0; i < 3; i++)
                nbst[i] = midsoln[Bnbtri[i]->id].st[rk_step];
        }
        ***/
        st = midsoln[tri->id].st[0];
        for(i = 0; i < 3; i++)
            nbst[i] = midsoln[Bnbtri[i]->id].st[0];

        Bb[0][0] = Dens(st);
        Bb[1][0] = Mom(st)[0];
        Bb[2][0] = Mom(st)[1];
        Bb[3][0] = Energy(st);
        for(i = 0; i < 3; i++)
        {
            Bb[0][i+1] = Dens(nbst[i]);
            Bb[1][i+1] = Mom(nbst[i])[0];
            Bb[2][i+1] = Mom(nbst[i])[1];
            Bb[3][i+1] = Energy(nbst[i]);
        }

        if(debug == YES)
        {
            printf("Bb[%g, %g, %g, %g]\n", Bb[0][0], Bb[0][1], Bb[0][2], Bb[0][3]);
        }

        /***
        if(rk_step == RK_STEP)
        {
            for(i = 0; i < nn_num; i++)
            {
                Ab[0][i] = Dens(tris[i]->st);
                Ab[1][i] = Mom(tris[i]->st)[0];
                Ab[2][i] = Mom(tris[i]->st)[1];
                Ab[3][i] = Energy(tris[i]->st);
            }
        }
        else
        {
            for(i = 0; i < nn_num; i++)
            {
                Ab[0][i] = Dens(midsoln[tris[i]->id].st[rk_step]);
                Ab[1][i] = Mom(midsoln[tris[i]->id].st[rk_step])[0];
                Ab[2][i] = Mom(midsoln[tris[i]->id].st[rk_step])[1];
                Ab[3][i] = Energy(midsoln[tris[i]->id].st[rk_step]);
            }
        }
        */
        for(i = 0; i < nn_num; i++)
        {
            Ab[0][i] = Dens(midsoln[tris[i]->id].st[0]);
            Ab[1][i] = Mom(midsoln[tris[i]->id].st[0])[0];
            Ab[2][i] = Mom(midsoln[tris[i]->id].st[0])[1];
            Ab[3][i] = Energy(midsoln[tris[i]->id].st[0]);
        }

        if(debug == YES)
        {
            for(i = 0; i < nn_num; i++)
                printf("Ab[%d] = %g, from tri %d\n", i, Ab[0][i], tris[i]->id);
        }

        if(rk_step == RK_STEP)
            st = tri->st;
        else
            st = midsoln[tri->id].st[rk_step];

        for(k = 0; k < N_EQN; k++)
        {
            M = nn_num; N = MAX_N_COEF; P = 4; LDA = M; LDB = P; LWORK = M+N+P;

            l = 0;
            for(j = 0; j < N; j++)
            {
                for(i = 0; i < M; i++)
                {
                    AA[l] = A[i][j];
                    l++;
                }
            }
            l = 0;
            for(j = 0; j < N; j++)
            {
                for(i = 0; i < 4; i++)
                {
                    BB[l] = B[i][j];
                    l++;
                }
            }
            /*
            for(i = 0; i < 4; i++)
            {
                for(j = 0; j < N; j++)
                {
                    BB[l] = B[i][j];
                    l++;
                }
            }
            */
            for(i = 0; i < M; i++)  // right side for the least square part
                CC[i] = Ab[k][i];
            for(i = 0; i < 4; i++)  // right side for the constrained part
                DD[i] = Bb[k][i];

            FORTRAN_NAME(dgglse)(&M, &N, &P, AA, &LDA, ///// int M,int N,int P, double array A, int LDA
                                 BB, &LDB, CC, DD, XX, ///// double array B, int LDB, double array C, D, X,
                                 work, &LWORK, &INFO);
            ///// save fitted soln
            switch(k)
            {
            case 0:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Dens(st)[i] = XX[i];

                ///// TMP
                /**
                if(debug == YES)
                {
                    printf("computed coef[%g, %g, %g, %g, %g, %g] for tri[%d]\n",
                      XX[0], XX[1], XX[2], XX[3], XX[4], XX[5], tri->id);
                    tmp = 0.0;
                    for(i = 0; i < MAX_N_COEF; i++)
                        tmp += XX[i]*Lmass_matrix[0][i];
                    tmp /= Lmass_matrix[0][0];
                    printf("average on SV = %g\n", tmp);
                }
                **/ 
            break;
            case 1:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Mom(st)[0][i] = XX[i];
            break;
            case 2:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Mom(st)[1][i] = XX[i];
            break;
            case 3:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Energy(st)[i] = XX[i];
            break;
            }
        }

        //// Temp for debug
        /**
        if(tri->id == 200)
        {
            printf("tri[%d] centriod (%g %g) after reconstruction\n", tri->id, fg_centroid(tri)[0], fg_centroid(tri)[1]);
            g_verbose_print_state(st);
        }
        **/

        // Assign to CV and map polynomial to one defined on CV center
        if(rk_step == RK_STEP)
        {
            for(cv_indx = 0; cv_indx < N_PART; cv_indx++)
            {
                for(indx = 0; indx < MAX_N_COEF; indx++)
                {
                    tri->cv_soln->cv_dg_rho[cv_indx][indx] = dg_Dens(st)[indx];
                    tri->cv_soln->cv_dg_e[cv_indx][indx] = dg_Energy(st)[indx];
                    tri->cv_soln->cv_dg_m[0][cv_indx][indx] = dg_Mom(st)[0][indx];
                    tri->cv_soln->cv_dg_m[1][cv_indx][indx] = dg_Mom(st)[1][indx];
                }
            }
            map_poly_SV_to_CV_p2(tri);
        }
        else
        {
            for(cv_indx = 0; cv_indx < N_PART; cv_indx++)
            {
                for(indx = 0; indx < MAX_N_COEF; indx++)
                {
                    limit_store[rk_step][tri->id].cv_dg_rho[cv_indx][indx] = dg_Dens(st)[indx];
                    limit_store[rk_step][tri->id].cv_dg_e[cv_indx][indx] = dg_Energy(st)[indx];
                    limit_store[rk_step][tri->id].cv_dg_m[0][cv_indx][indx] = dg_Mom(st)[0][indx];
                    limit_store[rk_step][tri->id].cv_dg_m[1][cv_indx][indx] = dg_Mom(st)[1][indx];
                }
            }
            map_poly_SV_to_CV_ver2_p2(tri, limit_store[rk_step]);
        }
}

EXPORT void tri_comput_P2_polynomials_from_avg_MHD_6sten_one_side(
         TRI       *tri,
         TRI       *tris[],
         int       nn_num,
         Mid_soln *midsoln,
         Limiting_store **limit_store,
         int      rk_step,
         int      side,
         double   **con_u)
{
        TRI       *Bnbtri[3];
        double    Ab[8][20], Bb[8][6], B[8][MAX_N_COEF]; /// B[#cv][mass_matrix_term for each coeff]
        double    AA[300], BB[300], CC[300], DD[300], XX[300], work[300], tmp;
        static double   **ALmass_matrix = NULL, **A, **loc_mass_1st_row;
        int       dim = 2, i, j, k, M, N, P, l, LDA, LDB, LWORK, INFO, indx, debug = NO, cv_indx;
        Locstate  st, nbst[3], st2;
        double    **Lmass_matrix, Bavg[8], **tmpA;

        if(ALmass_matrix == NULL)
        {
            matrix(&(ALmass_matrix), 6, MAX_N_COEF, sizeof(double));
            matrix(&(A), 6, MAX_N_COEF, sizeof(double));
            matrix(&loc_mass_1st_row, 1, MAX_N_COEF,sizeof(double));
        }

        //// assemble tris
        if(alltri_HR_sten == NULL)
        {
            for(cv_indx = 0; cv_indx < nn_num; cv_indx++)
            {
                comp_mass_matrix_1st_row(MAX_N_COEF,tris[cv_indx],dim,fg_centroid(tri),loc_mass_1st_row);
                for(i = 0; i < MAX_N_COEF; i++)
                    A[cv_indx][i] = loc_mass_1st_row[0][i]/loc_mass_1st_row[0][0];
            }
        }

        if(alltri_HR_sten != NULL && alltri_HR_sten[tri->id].WENO_side_set[side] == NO)
        {
            alltri_HR_sten[tri->id].WENO_side_set[side] = YES;
            // for(i = 0; i < 3; i++)
            matrix(&(alltri_HR_sten[tri->id].WENO_Aside[side]), nn_num, MAX_N_COEF, sizeof(double));
            for(cv_indx = 0; cv_indx < nn_num; cv_indx++)
            {
                comp_mass_matrix_1st_row(MAX_N_COEF,tris[cv_indx],dim,fg_centroid(tri),loc_mass_1st_row);
                for(i = 0; i < MAX_N_COEF; i++)
                    alltri_HR_sten[tri->id].WENO_Aside[side][cv_indx][i] = A[cv_indx][i] = 
                          loc_mass_1st_row[0][i]/loc_mass_1st_row[0][0];
            }
        }
        else if(alltri_HR_sten != NULL && alltri_HR_sten[tri->id].WENO_side_set[side] == YES)
        {
            for(cv_indx = 0; cv_indx < nn_num; cv_indx++)
            {
                for(i = 0; i < MAX_N_COEF; i++)
                    A[cv_indx][i] = alltri_HR_sten[tri->id].WENO_Aside[side][cv_indx][i];
            }
        }

        if(NULL == midsoln)
        {
            for(indx = 0; indx < nn_num; indx++)
            {
                Ab[0][indx] = Dens(tris[indx]->st);
                Ab[1][indx] = Mom(tris[indx]->st)[0];
                Ab[2][indx] = Mom(tris[indx]->st)[1];
                Ab[3][indx] = Mom(tris[indx]->st)[2];
                Ab[4][indx] = Energy(tris[indx]->st);
                Ab[5][indx] = Mag(tris[indx]->st)[2];
            }
        }
        else
        {
            // st = midsoln[tri->id].st[0];
            for(indx = 0; indx < nn_num; indx++)
            {
                Ab[0][indx] = Dens(midsoln[tris[indx]->id].st[0]);
                Ab[1][indx] = Mom(midsoln[tris[indx]->id].st[0])[0];
                Ab[2][indx] = Mom(midsoln[tris[indx]->id].st[0])[1];
                Ab[3][indx] = Mom(midsoln[tris[indx]->id].st[0])[2];
                Ab[4][indx] = Energy(midsoln[tris[indx]->id].st[0]);
                Ab[5][indx] = Mag(midsoln[tris[indx]->id].st[0])[2];
            }
        }

        if(rk_step == RK_STEP)
            st = tri->st;
        else
            st = midsoln[tri->id].st[rk_step];

        // for(k = 0; k < N_EQN; k++)
        for(k = 0; k < 6; k++)
        {
            solve_by_LU(A,MAX_N_COEF,Ab[k],XX);

            switch(k)
            {
            case 0:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Dens(st)[i] = XX[i];
            break;
            case 1:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Mom(st)[0][i] = XX[i];
            break;
            case 2:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Mom(st)[1][i] = XX[i];
            break;
            case 3:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Mom(st)[2][i] = XX[i];
            break;
            case 4:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Energy(st)[i] = XX[i];
            break;
            case 5:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_B(st)[2][i] = XX[i];
            break;
            case 6:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_B(st)[0][i] = XX[i];
            break;
            case 7:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_B(st)[1][i] = XX[i];
            break;
            }
        }


        if(con_u != NULL)
        {
            for(i = 0; i < MAX_N_COEF; i++)
            {
                con_u[0][i] = dg_Dens(st)[i];
                con_u[4][i] = dg_Energy(st)[i];
                for(j = 0; j < 3; j++)
                {
                    con_u[j+1][i] = dg_Mom(st)[j][i];
                    con_u[j+5][i] = dg_B(st)[j][i];
                }
            }
        }

        /***
        if(g_invalid_MHD_state("tri_comput_P2_polynomials_from_avg_MHD_6sten_one_side()",st,YES))
        {
            printf("ERROR: IN tri_comput_P2_polynomials_from_avg_MHD_6sten_one_side() on tri[%d] iteration %d\n",
                    tri->id, rk_step);
            print_tri_crds(tri);
            verbose_print_state("state",st);
            clean_up(ERROR);
        }
        ***/
}

//// Construct poly by constraint least square.
//// Conservation is only enforced on tri. 
//// On other tries, conservation is enforced by least square.
//// NOTE: tris[0] = tri;
EXPORT void tri_comput_P2_polynomials_from_avg_MHD_7sten_reverse(
         TRI       *tri,
         TRI       *tris[],
         int       nn_num,
         Mid_soln *midsoln,
         Limiting_store **limit_store,
         int      rk_step,
         int      side,
         double   **con_u)
{
        TRI       *Bnbtri[3];
        double    Ab[8][20], Bb[8][6], A[20][MAX_N_COEF], B[8][MAX_N_COEF]; /// B[#cv][mass_matrix_term for each coeff]
        double    AA[300], BB[300], CC[300], DD[300], XX[300], work[300], tmp;
        static double   **ALmass_matrix = NULL, **loc_mass_1st_row;
        int       dim = 2, i, j, k, M, N, P, l, LDA, LDB, LWORK, INFO, indx, debug = NO, cv_indx;
        Locstate  st, nbst[3], st2;
        double    **Lmass_matrix, Bavg[8];

        if(ALmass_matrix == NULL)
        {
            matrix(&(ALmass_matrix), 10, MAX_N_COEF, sizeof(double));
            matrix(&loc_mass_1st_row, 1, MAX_N_COEF,sizeof(double));
        }

        for(i = 0; i < 3; i++)
            Bnbtri[i] = Tri_on_side(tri,i);

        //// first assemble neighboring tris, which satisfy conservation in the least square sense.
        if(alltri_HR_sten == NULL)
        {
            for(cv_indx = 1; cv_indx < nn_num; cv_indx++)
            {
                comp_mass_matrix_1st_row(MAX_N_COEF,tris[cv_indx],dim,fg_centroid(tri),loc_mass_1st_row);
                for(i = 0; i < MAX_N_COEF; i++)
                    A[cv_indx-1][i] = loc_mass_1st_row[0][i]/loc_mass_1st_row[0][0];
            }
        }

        if(alltri_HR_sten != NULL && alltri_HR_sten[tri->id].WENO_rev_set[side] == NO)
        {
            alltri_HR_sten[tri->id].WENO_rev_set[side] = YES;
            // for(i = 0; i < 3; i++)
            matrix(&(alltri_HR_sten[tri->id].WENO_Arev[side]), nn_num, MAX_N_COEF, sizeof(double));           
            for(cv_indx = 1; cv_indx < nn_num; cv_indx++)
            {
                comp_mass_matrix_1st_row(MAX_N_COEF,tris[cv_indx],dim,fg_centroid(tri),loc_mass_1st_row);
                for(i = 0; i < MAX_N_COEF; i++)
                    alltri_HR_sten[tri->id].WENO_Arev[side][cv_indx-1][i] = A[cv_indx-1][i] = 
                                loc_mass_1st_row[0][i]/loc_mass_1st_row[0][0];
            }
        }
        else if(alltri_HR_sten != NULL && alltri_HR_sten[tri->id].WENO_rev_set[side] == YES)
        {
            for(cv_indx = 1; cv_indx < nn_num; cv_indx++)
            {
                for(i = 0; i < MAX_N_COEF; i++)
                    A[cv_indx-1][i] = alltri_HR_sten[tri->id].WENO_Arev[side][cv_indx-1][i];
            }
        }

        //// set constraint eqns
        Lmass_matrix = tri->Lmass_matrix;
        for(i = 0; i < MAX_N_COEF; i++)
            B[0][i] = Lmass_matrix[0][i]/Lmass_matrix[0][0];

        if(NULL == midsoln)
            st = tri->st;
        else
            st = midsoln[tri->id].st[0];

        /// The conservation constraint part
        Bb[0][0] = Dens(st);
        Bb[1][0] = Mom(st)[0];
        Bb[2][0] = Mom(st)[1];
        Bb[3][0] = Mom(st)[2];
        Bb[4][0] = Energy(st);
        Bb[5][0] = Mag(st)[2];

        if(NULL == midsoln)
        {
            for(i = 1; i < nn_num; i++)
            {
                Ab[0][i-1] = Dens(tris[i]->st);
                Ab[1][i-1] = Mom(tris[i]->st)[0];
                Ab[2][i-1] = Mom(tris[i]->st)[1];
                Ab[3][i-1] = Mom(tris[i]->st)[2];
                Ab[4][i-1] = Energy(tris[i]->st);

                Ab[5][i-1] = Mag(tris[i]->st)[2];
                ////  This is for testing purpose
                // Ab[6][i] = Mag(tris[i]->st)[0];
                // Ab[7][i] = Mag(tris[i]->st)[1];
            }
        }
        else
        {
            for(i = 1; i < nn_num; i++)
            {
                Ab[0][i-1] = Dens(midsoln[tris[i]->id].st[0]);
                Ab[1][i-1] = Mom(midsoln[tris[i]->id].st[0])[0];
                Ab[2][i-1] = Mom(midsoln[tris[i]->id].st[0])[1];
                Ab[3][i-1] = Mom(midsoln[tris[i]->id].st[0])[2];
                Ab[4][i-1] = Energy(midsoln[tris[i]->id].st[0]);

                Ab[5][i-1] = Mag(midsoln[tris[i]->id].st[0])[2];
                ////  This is for testing purpose
                // Ab[6][i] = Mag(midsoln[tris[i]->id].st[0])[0];
                // Ab[7][i] = Mag(midsoln[tris[i]->id].st[0])[1];
            }
        }

        if(rk_step == RK_STEP)
            st = tri->st;
        else
            st = midsoln[tri->id].st[rk_step];

        // for(k = 0; k < N_EQN; k++)
        for(k = 0; k < 6; k++)
        {
            M = nn_num-1; N = MAX_N_COEF; P = 1; LDA = M; LDB = P; LWORK = M+N+P;

            l = 0;
            for(j = 0; j < N; j++)
            {
                for(i = 0; i < M; i++)
                {
                    AA[l] = A[i][j];
                    l++;
                }
            }
            l = 0;
            for(j = 0; j < N; j++)
            {
                for(i = 0; i < P; i++)
                {
                    BB[l] = B[i][j];
                    l++;
                }
            }

            for(i = 0; i < M; i++)  // right side for the least square part
                CC[i] = Ab[k][i];
            for(i = 0; i < P; i++)  // right side for the constrained part
                DD[i] = Bb[k][i];

            FORTRAN_NAME(dgglse)(&M, &N, &P, AA, &LDA, ///// int M,int N,int P, double array A, int LDA
                                 BB, &LDB, CC, DD, XX, ///// double array B, int LDB, double array C, D, X,
                                 work, &LWORK, &INFO);
            ///// save fitted soln
            ///// save fitted soln
            switch(k)
            {
            case 0:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Dens(st)[i] = XX[i];
            break;
            case 1:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Mom(st)[0][i] = XX[i];
            break;
            case 2:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Mom(st)[1][i] = XX[i];
            break;
            case 3:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Mom(st)[2][i] = XX[i];
            break;
            case 4:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Energy(st)[i] = XX[i];
            break;
            case 5:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_B(st)[2][i] = XX[i];
            break;
            case 6:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_B(st)[0][i] = XX[i];
            break;
            case 7:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_B(st)[1][i] = XX[i];
            break;
            }
        }

        if(con_u != NULL)
        {
            for(i = 0; i < MAX_N_COEF; i++)
            {
                con_u[0][i] = dg_Dens(st)[i];
                con_u[4][i] = dg_Energy(st)[i];
                for(j = 0; j < 3; j++)
                {
                    con_u[j+1][i] = dg_Mom(st)[j][i];
                    con_u[j+5][i] = dg_B(st)[j][i];
                }
            }
        }

        /***
        if(g_invalid_MHD_state("tri_comput_P2_polynomials_from_avg_MHD_7sten_reverse()",st,YES))
        {
            printf("ERROR: IN tri_comput_P2_polynomials_from_avg_MHD_7sten_reverse() on tri[%d] teration %d\n", tri->id, rk_step);
            print_tri_crds(tri);
            verbose_print_state("state",st);
            clean_up(ERROR);
        }
        ***/
}




//// Construct poly by constraint least square.
//// Conservation is only enforced on tri and its neighbors. 
//// On other tries, conservation is enforced by least square.
//// The central Stencil is used. See Hu and Shu WENO on triangle. for
//// preliminary reconstruction.
EXPORT void tri_comput_P2_polynomials_from_avg_MHD(
         TRI       *tri,   
         TRI       *tris[],
         int       nn_num,
         Mid_soln *midsoln,  
         Limiting_store **limit_store,
         int      rk_step,
	 double   **con_u)
{
        TRI       *Bnbtri[3];
        double    Ab[8][20], Bb[8][6], A[20][MAX_N_COEF], B[8][MAX_N_COEF]; /// B[#cv][mass_matrix_term for each coeff]
        double    AA[300], BB[300], CC[300], DD[300], XX[300], work[300], tmp;
        static double   **loc_mass_1st_row = NULL;
        int       dim = 2, i, j, k, M, N, P, l, LDA, LDB, LWORK, INFO, indx, debug = NO, cv_indx;
        Locstate  st, nbst[3], st2;
        double    **Lmass_matrix, Bavg[8];

        if(loc_mass_1st_row == NULL)
        {
            // matrix(&(ALmass_matrix), 6, MAX_N_COEF, sizeof(double));
            matrix(&loc_mass_1st_row, 1, MAX_N_COEF,sizeof(double));
        }

        /***
        if(tri->id == 20)
        {
            printf("tri[%d] enter tri_comput_P2_polynomials_from_avg()\n", tri->id);
            debug = YES;
        }
        ***/

        for(i = 0; i < 3; i++)
            Bnbtri[i] = Tri_on_side(tri,i);

        if(alltri_HR_sten == NULL)
        {
            //// first assemble far neighboring tris, which satisfy conservation in the least square sense.
            for(cv_indx = 0; cv_indx < nn_num; cv_indx++)
            {
                 comp_mass_matrix_1st_row(MAX_N_COEF,tris[cv_indx],dim,fg_centroid(tri),loc_mass_1st_row);
                 // for(i = 0; i < MAX_N_COEF; i++)
                 //     ALmass_matrix[cv_indx][i] = loc_mass_1st_row[0][i];
                 for(i = 0; i < MAX_N_COEF; i++)
                     A[cv_indx][i] = loc_mass_1st_row[0][i]/loc_mass_1st_row[0][0];
            }
            //// set constraint eqns
            Lmass_matrix = tri->Lmass_matrix;
            for(i = 0; i < MAX_N_COEF; i++)
                B[0][i] = Lmass_matrix[0][i]/Lmass_matrix[0][0];
            /***
            for(cv_indx = 0; cv_indx < 3; cv_indx++)
            {
                 comp_mass_matrix_1st_row(MAX_N_COEF,Bnbtri[cv_indx],dim,fg_centroid(tri),loc_mass_1st_row);
                 for(i = 0; i < MAX_N_COEF; i++)
                     B[cv_indx+1][i] = loc_mass_1st_row[0][i]/loc_mass_1st_row[0][0];
            }
            ***/
        }

        if(alltri_HR_sten != NULL && alltri_HR_sten[tri->id].WENO_cent_set == NO)
        {
            alltri_HR_sten[tri->id].WENO_cent_set = YES;
            matrix(&(alltri_HR_sten[tri->id].WENO_Acent), nn_num, MAX_N_COEF, sizeof(double));
            matrix(&(alltri_HR_sten[tri->id].WENO_Bcent), 4, MAX_N_COEF, sizeof(double));
            //// first assemble far neighboring tris, which satisfy conservation in the least square sense.
            for(cv_indx = 0; cv_indx < nn_num; cv_indx++)
            {
                 comp_mass_matrix_1st_row(MAX_N_COEF,tris[cv_indx],dim,fg_centroid(tri),loc_mass_1st_row);
                 for(i = 0; i < MAX_N_COEF; i++)
                     alltri_HR_sten[tri->id].WENO_Acent[cv_indx][i] = A[cv_indx][i] = 
                                      loc_mass_1st_row[0][i]/loc_mass_1st_row[0][0];
            }
            //// set constraint eqns
            Lmass_matrix = tri->Lmass_matrix;
            for(i = 0; i < MAX_N_COEF; i++)
                alltri_HR_sten[tri->id].WENO_Bcent[0][i] = B[0][i] = 
                               Lmass_matrix[0][i]/Lmass_matrix[0][0];
            /***
            for(cv_indx = 0; cv_indx < 3; cv_indx++)
            {
                 comp_mass_matrix_1st_row(MAX_N_COEF,Bnbtri[cv_indx],dim,fg_centroid(tri),loc_mass_1st_row);
                 for(i = 0; i < MAX_N_COEF; i++)
                     alltri_HR_sten[tri->id].WENO_Bcent[cv_indx+1][i] = B[cv_indx+1][i] = 
                              loc_mass_1st_row[0][i]/loc_mass_1st_row[0][0];
            }
            ***/
        }
        else if(alltri_HR_sten != NULL && alltri_HR_sten[tri->id].WENO_cent_set == YES)
        {
            for(cv_indx = 0; cv_indx < nn_num; cv_indx++)
            {
                for(i = 0; i < MAX_N_COEF; i++)
                    A[cv_indx][i] = alltri_HR_sten[tri->id].WENO_Acent[cv_indx][i];
            }
            // for(cv_indx = 0; cv_indx < 4; cv_indx++)
            for(cv_indx = 0; cv_indx < 1; cv_indx++)
            {
                for(i = 0; i < MAX_N_COEF; i++) 
                    B[cv_indx][i] = alltri_HR_sten[tri->id].WENO_Bcent[cv_indx][i];
            }
        }

        if(NULL == midsoln)
        {
            st = tri->st;
            for(i = 0; i < 3; i++)
                nbst[i] = Bnbtri[i]->st;
        }
        else
        {
            st = midsoln[tri->id].st[0];
            for(i = 0; i < 3; i++)
                nbst[i] = midsoln[Bnbtri[i]->id].st[0];
        }

        Bb[0][0] = Dens(st);
        Bb[1][0] = Mom(st)[0];
        Bb[2][0] = Mom(st)[1];
        Bb[3][0] = Mom(st)[2];
        Bb[4][0] = Energy(st);
        Bb[5][0] = Mag(st)[2];
        //// This is for testing purpose
        // Bb[6][0] = Mag(st)[0];
        // Bb[7][0] = Mag(st)[1];

        /****
        for(i = 0; i < 3; i++)
        {
            Bb[0][i+1] = Dens(nbst[i]);
            Bb[1][i+1] = Mom(nbst[i])[0];
            Bb[2][i+1] = Mom(nbst[i])[1];
            Bb[3][i+1] = Mom(nbst[i])[2];
            Bb[4][i+1] = Energy(nbst[i]);
            Bb[5][i+1] = Mag(nbst[i])[2];
            ///// This is for testing purpose
            Bb[6][i+1] = Mag(nbst[i])[0];
            Bb[7][i+1] = Mag(nbst[i])[1];
        }
        ****/

        if(debug == YES)
        {
            printf("Bb[%g, %g, %g, %g]\n", Bb[0][0], Bb[0][1], Bb[0][2], Bb[0][3]);
        }

        /***
        if(rk_step == RK_STEP)
        {
            for(i = 0; i < nn_num; i++)
            {
                Ab[0][i] = Dens(tris[i]->st);
                Ab[1][i] = Mom(tris[i]->st)[0];
                Ab[2][i] = Mom(tris[i]->st)[1];
                Ab[3][i] = Energy(tris[i]->st);
            }
        }
        else
        {
            for(i = 0; i < nn_num; i++)
            {
                Ab[0][i] = Dens(midsoln[tris[i]->id].st[rk_step]);
                Ab[1][i] = Mom(midsoln[tris[i]->id].st[rk_step])[0];
                Ab[2][i] = Mom(midsoln[tris[i]->id].st[rk_step])[1];
                Ab[3][i] = Energy(midsoln[tris[i]->id].st[rk_step]);
            }
        }
        */

        if(NULL == midsoln)
        {
            for(i = 0; i < nn_num; i++)
            {
                Ab[0][i] = Dens(tris[i]->st);
                Ab[1][i] = Mom(tris[i]->st)[0];
                Ab[2][i] = Mom(tris[i]->st)[1];
                Ab[3][i] = Mom(tris[i]->st)[2];
                Ab[4][i] = Energy(tris[i]->st);

                Ab[5][i] = Mag(tris[i]->st)[2];
                ////  This is for testing purpose
                // Ab[6][i] = Mag(tris[i]->st)[0];
                // Ab[7][i] = Mag(tris[i]->st)[1];
            }
        }
        else
        {
            for(i = 0; i < nn_num; i++)
            {
                Ab[0][i] = Dens(midsoln[tris[i]->id].st[0]);
                Ab[1][i] = Mom(midsoln[tris[i]->id].st[0])[0];
                Ab[2][i] = Mom(midsoln[tris[i]->id].st[0])[1];
                Ab[3][i] = Mom(midsoln[tris[i]->id].st[0])[2];
                Ab[4][i] = Energy(midsoln[tris[i]->id].st[0]);

                Ab[5][i] = Mag(midsoln[tris[i]->id].st[0])[2];
                ////  This is for testing purpose
                // Ab[6][i] = Mag(midsoln[tris[i]->id].st[0])[0];
                // Ab[7][i] = Mag(midsoln[tris[i]->id].st[0])[1];
            }
        }

        if(debug == YES)
        {
            for(i = 0; i < nn_num; i++)
                printf("Ab[%d] = %g, from tri %d\n", i, Ab[0][i], tris[i]->id);
        }

        if(rk_step == RK_STEP)
            st = tri->st;
        else
            st = midsoln[tri->id].st[rk_step];

        // for(k = 0; k < N_EQN; k++)
        for(k = 0; k < 6; k++)
        {
            M = nn_num; N = MAX_N_COEF; P = 1; LDA = M; LDB = P; LWORK = M+N+P;

            l = 0;
            for(j = 0; j < N; j++)
            {
                for(i = 0; i < M; i++)
                {
                    AA[l] = A[i][j];
                    l++;
                }
            }
            l = 0;
            for(j = 0; j < N; j++)
            {
                for(i = 0; i < P; i++)
                {
                    BB[l] = B[i][j];
                    l++;
                }
            }
            /*
            for(i = 0; i < 4; i++)
            {
                for(j = 0; j < N; j++)
                {
                    BB[l] = B[i][j];
                    l++;
                }
            }
            */
            for(i = 0; i < M; i++)  // right side for the least square part
                CC[i] = Ab[k][i];
            for(i = 0; i < P; i++)  // right side for the constrained part
                DD[i] = Bb[k][i];

            FORTRAN_NAME(dgglse)(&M, &N, &P, AA, &LDA, ///// int M,int N,int P, double array A, int LDA
                                 BB, &LDB, CC, DD, XX, ///// double array B, int LDB, double array C, D, X,
                                 work, &LWORK, &INFO);
            ///// save fitted soln
            switch(k)
            {
            case 0:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Dens(st)[i] = XX[i];
                ///// TMP
                /**
                if(debug == YES)
                {
                    printf("computed coef[%g, %g, %g, %g, %g, %g] for tri[%d]\n",
                      XX[0], XX[1], XX[2], XX[3], XX[4], XX[5], tri->id);
                    tmp = 0.0;
                    for(i = 0; i < MAX_N_COEF; i++)
                        tmp += XX[i]*Lmass_matrix[0][i];
                    tmp /= Lmass_matrix[0][0];
                    printf("average on SV = %g\n", tmp);
                }
                **/ 
            break;
            case 1:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Mom(st)[0][i] = XX[i];
            break;
            case 2:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Mom(st)[1][i] = XX[i];
            break;
            case 3:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Mom(st)[2][i] = XX[i];
            break;
            case 4:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Energy(st)[i] = XX[i];
            break;
            case 5:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_B(st)[2][i] = XX[i];
                    // dg_B(st)[2][i] = 0.0;
                // dg_B(st)[2][0] = Mag(st)[2];
            break;
            case 6:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_B(st)[0][i] = XX[i];
                    // dg_B(st)[0][i] = 0.0;
                // dg_B(st)[0][0] = Mag(st)[0];
            break;
            case 7:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_B(st)[1][i] = XX[i];
                    // dg_B(st)[1][i] = 0.0;
                // dg_B(st)[1][0] = Mag(st)[1];
            break;
            }
        }

        if(con_u != NULL)
        {
            for(i = 0; i < MAX_N_COEF; i++)
            {
                con_u[0][i] = dg_Dens(st)[i];
                con_u[4][i] = dg_Energy(st)[i];
                for(j = 0; j < 3; j++)
                {
                    con_u[j+1][i] = dg_Mom(st)[j][i];
                    con_u[j+5][i] = dg_B(st)[j][i];
                }  
            } 
        }

        /***
        if(g_invalid_MHD_state("tri_comput_P2_polynomials_from_avg_MHD()",st,YES))
        {
            printf("ERROR: IN tri_comput_P2_polynomials_from_avg_MHD() on tri[%d] teration %d\n", tri->id, rk_step);
            print_tri_crds(tri);
            verbose_print_state("state",st);
            clean_up(ERROR);
        }
        ***/

        //// Temp for debug
        /**
        if(tri->id == 200)
        {
            printf("tri[%d] centriod (%g %g) after reconstruction\n", tri->id, fg_centroid(tri)[0], fg_centroid(tri)[1]);
            g_verbose_print_state(st);
        }
        **/

        /***** 
        // Assign to CV and map polynomial to one defined on CV center
        if(rk_step == RK_STEP)
        {
            for(cv_indx = 0; cv_indx < N_PART; cv_indx++)
            {
                for(indx = 0; indx < MAX_N_COEF; indx++)
                {
                    tri->cv_soln->cv_dg_rho[cv_indx][indx] = dg_Dens(st)[indx];
                    tri->cv_soln->cv_dg_e[cv_indx][indx] = dg_Energy(st)[indx];
                    tri->cv_soln->cv_dg_m[0][cv_indx][indx] = dg_Mom(st)[0][indx];
                    tri->cv_soln->cv_dg_m[1][cv_indx][indx] = dg_Mom(st)[1][indx];
                }
            }
            map_poly_SV_to_CV_p2(tri);
        }
        else
        {
            for(cv_indx = 0; cv_indx < N_PART; cv_indx++)
            {
                for(indx = 0; indx < MAX_N_COEF; indx++)
                {
                    limit_store[rk_step][tri->id].cv_dg_rho[cv_indx][indx] = dg_Dens(st)[indx];
                    limit_store[rk_step][tri->id].cv_dg_e[cv_indx][indx] = dg_Energy(st)[indx];
                    limit_store[rk_step][tri->id].cv_dg_m[0][cv_indx][indx] = dg_Mom(st)[0][indx];
                    limit_store[rk_step][tri->id].cv_dg_m[1][cv_indx][indx] = dg_Mom(st)[1][indx];
                }
            }
            map_poly_SV_to_CV_ver2_p2(tri, limit_store[rk_step]);
        }
        *****/
}

//// Construct poly by constraint least square.
//// Conservation is only enforced on tri. 
//// On other tries, conservation is enforced by least square.
LOCAL void tri_comput_P2_polynomials_from_avg_MHD_ver2(
         TRI       *tri,   
         TRI       *tris[],
         int       nn_num,
         Mid_soln *midsoln,  
         Limiting_store **limit_store,
         int      rk_step)
{
        TRI       *Bnbtri[3];
        double    Ab[8][20], Bb[8][6], A[20][MAX_N_COEF], B[8][MAX_N_COEF]; /// B[#cv][mass_matrix_term for each coeff]
        double    AA[300], BB[300], CC[300], DD[300], XX[300], work[300], tmp;
        static double   **ALmass_matrix = NULL;
        int       dim = 2, i, j, k, M, N, P, l, LDA, LDB, LWORK, INFO, indx, debug = NO, cv_indx;
        Locstate  st, nbst[3], st2;
        double    **Lmass_matrix, Bavg[8];

        if(ALmass_matrix == NULL)
            matrix(&(ALmass_matrix), 10, MAX_N_COEF, sizeof(double));

        for(i = 0; i < 3; i++)
            Bnbtri[i] = Tri_on_side(tri,i);

        //// first assemble far neighboring tris, which satisfy conservation in the least square sense.
        for(cv_indx = 0; cv_indx < nn_num; cv_indx++)
        {
             comp_mass_matrix_1st_row(MAX_N_COEF,tris[cv_indx],dim,fg_centroid(tri),mass_1st_row);
             for(i = 0; i < MAX_N_COEF; i++)
                 ALmass_matrix[cv_indx][i] = mass_1st_row[0][i];
             for(i = 0; i < MAX_N_COEF; i++)
                 A[cv_indx][i] = ALmass_matrix[cv_indx][i]/ALmass_matrix[cv_indx][0];
        }
      
        //// set constraint eqns
        Lmass_matrix = tri->Lmass_matrix;
        for(i = 0; i < MAX_N_COEF; i++)
            B[0][i] = Lmass_matrix[0][i]/Lmass_matrix[0][0];

        if(NULL == midsoln)
        {
            st = tri->st;
            for(i = 0; i < 3; i++)
                nbst[i] = Bnbtri[i]->st;
        }
        else
        {
            st = midsoln[tri->id].st[0];
            for(i = 0; i < 3; i++)
                nbst[i] = midsoln[Bnbtri[i]->id].st[0];
        }

        /// The conservation constraint part
        Bb[0][0] = Dens(st);
        Bb[1][0] = Mom(st)[0];
        Bb[2][0] = Mom(st)[1];
        Bb[3][0] = Mom(st)[2];
        Bb[4][0] = Energy(st);
        Bb[5][0] = Mag(st)[2];
        //// This is for testing purpose
        // Bb[6][0] = Mag(st)[0];
        // Bb[7][0] = Mag(st)[1];

        if(debug == YES)
        {
            printf("Bb[%g, %g, %g, %g]\n", Bb[0][0], Bb[0][1], Bb[0][2], Bb[0][3]);
        }

        if(NULL == midsoln)
        {
            for(i = 0; i < nn_num; i++)
            {
                Ab[0][i] = Dens(tris[i]->st);
                Ab[1][i] = Mom(tris[i]->st)[0];
                Ab[2][i] = Mom(tris[i]->st)[1];
                Ab[3][i] = Mom(tris[i]->st)[2];
                Ab[4][i] = Energy(tris[i]->st);

                Ab[5][i] = Mag(tris[i]->st)[2];
                ////  This is for testing purpose
                // Ab[6][i] = Mag(tris[i]->st)[0];
                // Ab[7][i] = Mag(tris[i]->st)[1];
            }
        }
        else
        {
            for(i = 0; i < nn_num; i++)
            {
                Ab[0][i] = Dens(midsoln[tris[i]->id].st[0]);
                Ab[1][i] = Mom(midsoln[tris[i]->id].st[0])[0];
                Ab[2][i] = Mom(midsoln[tris[i]->id].st[0])[1];
                Ab[3][i] = Mom(midsoln[tris[i]->id].st[0])[2];
                Ab[4][i] = Energy(midsoln[tris[i]->id].st[0]);

                Ab[5][i] = Mag(midsoln[tris[i]->id].st[0])[2];
                ////  This is for testing purpose
                // Ab[6][i] = Mag(midsoln[tris[i]->id].st[0])[0];
                // Ab[7][i] = Mag(midsoln[tris[i]->id].st[0])[1];
            }
        }

        if(debug == YES)
        {
            for(i = 0; i < nn_num; i++)
                printf("Ab[%d] = %g, from tri %d\n", i, Ab[0][i], tris[i]->id);
        }

        if(rk_step == RK_STEP)
            st = tri->st;
        else
            st = midsoln[tri->id].st[rk_step];

        // for(k = 0; k < N_EQN; k++)
        for(k = 0; k < 6; k++)
        {
            M = nn_num; N = MAX_N_COEF; P = 1; LDA = M; LDB = P; LWORK = M+N+P;

            l = 0;
            for(j = 0; j < N; j++)
            {
                for(i = 0; i < M; i++)
                {
                    AA[l] = A[i][j];
                    l++;
                }
            }
            l = 0;
            for(j = 0; j < N; j++)
            {
                for(i = 0; i < P; i++)
                {
                    BB[l] = B[i][j];
                    l++;
                }
            }

            for(i = 0; i < M; i++)  // right side for the least square part
                CC[i] = Ab[k][i];
            for(i = 0; i < P; i++)  // right side for the constrained part
                DD[i] = Bb[k][i];

            FORTRAN_NAME(dgglse)(&M, &N, &P, AA, &LDA, ///// int M,int N,int P, double array A, int LDA
                                 BB, &LDB, CC, DD, XX, ///// double array B, int LDB, double array C, D, X,
                                 work, &LWORK, &INFO);
            ///// save fitted soln
            switch(k)
            {
            case 0:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Dens(st)[i] = XX[i];

                ///// TMP
                /**
                if(debug == YES)
                {
                    printf("computed coef[%g, %g, %g, %g, %g, %g] for tri[%d]\n",
                      XX[0], XX[1], XX[2], XX[3], XX[4], XX[5], tri->id);
                    tmp = 0.0;
                    for(i = 0; i < MAX_N_COEF; i++)
                        tmp += XX[i]*Lmass_matrix[0][i];
                    tmp /= Lmass_matrix[0][0];
                    printf("average on SV = %g\n", tmp);
                }
                **/ 
            break;
            case 1:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Mom(st)[0][i] = XX[i];
            break;
            case 2:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Mom(st)[1][i] = XX[i];
            break;
            case 3:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Mom(st)[2][i] = XX[i];
            break;
            case 4:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Energy(st)[i] = XX[i];
            break;
            case 5:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_B(st)[2][i] = XX[i];
                    // dg_B(st)[2][i] = 0.0;
                // dg_B(st)[2][0] = Mag(st)[2];
            break;
            case 6:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_B(st)[0][i] = XX[i];
                    // dg_B(st)[0][i] = 0.0;
                // dg_B(st)[0][0] = Mag(st)[0];
            break;
            case 7:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_B(st)[1][i] = XX[i];
                    // dg_B(st)[1][i] = 0.0;
                // dg_B(st)[1][0] = Mag(st)[1];
            break;
            }
        }

        if(g_invalid_MHD_state("tri_comput_P2_polynomials_from_avg_MHD()",st,YES))
        {
            printf("ERROR: IN tri_comput_P2_polynomials_from_avg_MHD() on tri[%d] teration %d\n", tri->id, rk_step);
            print_tri_crds(tri);
            verbose_print_state("state",st);
            clean_up(ERROR);
        }

        //// Temp for debug
        /**
        if(tri->id == 200)
        {
            printf("tri[%d] centriod (%g %g) after reconstruction\n", tri->id, fg_centroid(tri)[0], fg_centroid(tri)[1]);
            g_verbose_print_state(st);
        }
        **/
}

//// Construct poly by constraint least square.
//// Conservation is only enforced on tri. 
//// On other tries, conservation is enforced by least square.
LOCAL void tri_comput_P2_polynomials_from_avg_MHD_ver1(
         TRI       *tri,   
         TRI       *tris[],
         int       nn_num,
         Mid_soln *midsoln,  
         Limiting_store **limit_store,
         int      rk_step)
{
        TRI       *Bnbtri[3];
        double    Ab[8][20], Bb[8][6], A[20][MAX_N_COEF], B[8][MAX_N_COEF]; /// B[#cv][mass_matrix_term for each coeff]
        double    AA[300], BB[300], CC[300], DD[300], XX[300], work[300], tmp;
        static double   **ALmass_matrix = NULL;
        int       dim = 2, i, j, k, M, N, P, l, LDA, LDB, LWORK, INFO, indx, debug = NO, cv_indx;
        Locstate  st, nbst[3], st2;
        double    **Lmass_matrix, Bavg[8];

        if(ALmass_matrix == NULL)
            matrix(&(ALmass_matrix), 9, MAX_N_COEF, sizeof(double));

        for(i = 0; i < 3; i++)
            Bnbtri[i] = Tri_on_side(tri,i);

        //// first assemble far neighboring tris, which satisfy conservation in the least square sense.
        for(cv_indx = 0; cv_indx < nn_num; cv_indx++)
        {
             comp_mass_matrix_1st_row(MAX_N_COEF,tris[cv_indx],dim,fg_centroid(tri),mass_1st_row);
             for(i = 0; i < MAX_N_COEF; i++)
                 ALmass_matrix[cv_indx][i] = mass_1st_row[0][i];
             for(i = 0; i < MAX_N_COEF; i++)
                 A[cv_indx][i] = ALmass_matrix[cv_indx][i]/ALmass_matrix[cv_indx][0];
        }
        //// assemble edge neighboring tris, which satisfy conservation in the least square sense as well.
        for(cv_indx = 0; cv_indx < 3; cv_indx++)
        {
             comp_mass_matrix_1st_row(MAX_N_COEF,Bnbtri[cv_indx],dim,fg_centroid(tri),mass_1st_row);
             for(i = 0; i < MAX_N_COEF; i++)
                 A[cv_indx + nn_num][i] = mass_1st_row[0][i]/mass_1st_row[0][0];
        }
      
        //// set constraint eqns
        Lmass_matrix = tri->Lmass_matrix;
        for(i = 0; i < MAX_N_COEF; i++)
            B[0][i] = Lmass_matrix[0][i]/Lmass_matrix[0][0];

        if(NULL == midsoln)
        {
            st = tri->st;
            for(i = 0; i < 3; i++)
                nbst[i] = Bnbtri[i]->st;
        }
        else
        {
            st = midsoln[tri->id].st[0];
            for(i = 0; i < 3; i++)
                nbst[i] = midsoln[Bnbtri[i]->id].st[0];
        }

        /// The conservation constraint part
        Bb[0][0] = Dens(st);
        Bb[1][0] = Mom(st)[0];
        Bb[2][0] = Mom(st)[1];
        Bb[3][0] = Mom(st)[2];
        Bb[4][0] = Energy(st);
        Bb[5][0] = Mag(st)[2];
        //// This is for testing purpose
        // Bb[6][0] = Mag(st)[0];
        // Bb[7][0] = Mag(st)[1];

        /***
        for(i = 0; i < 3; i++)
        {
            Bb[0][i+1] = Dens(nbst[i]);
            Bb[1][i+1] = Mom(nbst[i])[0];
            Bb[2][i+1] = Mom(nbst[i])[1];
            Bb[3][i+1] = Mom(nbst[i])[2];
            Bb[4][i+1] = Energy(nbst[i]);
            Bb[5][i+1] = Mag(nbst[i])[2];
            ///// This is for testing purpose
            Bb[6][i+1] = Mag(nbst[i])[0];
            Bb[7][i+1] = Mag(nbst[i])[1];
        }
        ***/

        if(debug == YES)
        {
            printf("Bb[%g, %g, %g, %g]\n", Bb[0][0], Bb[0][1], Bb[0][2], Bb[0][3]);
        }

        if(NULL == midsoln)
        {
            for(i = 0; i < nn_num; i++)
            {
                Ab[0][i] = Dens(tris[i]->st);
                Ab[1][i] = Mom(tris[i]->st)[0];
                Ab[2][i] = Mom(tris[i]->st)[1];
                Ab[3][i] = Mom(tris[i]->st)[2];
                Ab[4][i] = Energy(tris[i]->st);

                Ab[5][i] = Mag(tris[i]->st)[2];
                ////  This is for testing purpose
                // Ab[6][i] = Mag(tris[i]->st)[0];
                // Ab[7][i] = Mag(tris[i]->st)[1];
            }
        }
        else
        {
            for(i = 0; i < nn_num; i++)
            {
                Ab[0][i] = Dens(midsoln[tris[i]->id].st[0]);
                Ab[1][i] = Mom(midsoln[tris[i]->id].st[0])[0];
                Ab[2][i] = Mom(midsoln[tris[i]->id].st[0])[1];
                Ab[3][i] = Mom(midsoln[tris[i]->id].st[0])[2];
                Ab[4][i] = Energy(midsoln[tris[i]->id].st[0]);

                Ab[5][i] = Mag(midsoln[tris[i]->id].st[0])[2];
                ////  This is for testing purpose
                // Ab[6][i] = Mag(midsoln[tris[i]->id].st[0])[0];
                // Ab[7][i] = Mag(midsoln[tris[i]->id].st[0])[1];
            }
        }
        /// add Ab from edge neighboring cells
        for(i = 0; i < 3; i++)
        {
                Ab[0][i+nn_num] = Dens(nbst[i]);
                Ab[1][i+nn_num] = Mom(nbst[i])[0];
                Ab[2][i+nn_num] = Mom(nbst[i])[1];
                Ab[3][i+nn_num] = Mom(nbst[i])[2];
                Ab[4][i+nn_num] = Energy(nbst[i]);

                Ab[5][i+nn_num] = Mag(nbst[i])[2];
      
        }

        if(debug == YES)
        {
            for(i = 0; i < nn_num; i++)
                printf("Ab[%d] = %g, from tri %d\n", i, Ab[0][i], tris[i]->id);
        }

        if(rk_step == RK_STEP)
            st = tri->st;
        else
            st = midsoln[tri->id].st[rk_step];

        // for(k = 0; k < N_EQN; k++)
        for(k = 0; k < 6; k++)
        {
            M = nn_num+3; N = MAX_N_COEF; P = 1; LDA = M; LDB = P; LWORK = M+N+P;

            l = 0;
            for(j = 0; j < N; j++)
            {
                for(i = 0; i < M; i++)
                {
                    AA[l] = A[i][j];
                    l++;
                }
            }
            l = 0;
            for(j = 0; j < N; j++)
            {
                for(i = 0; i < P; i++)
                {
                    BB[l] = B[i][j];
                    l++;
                }
            }
            /*
            for(i = 0; i < 4; i++)
            {
                for(j = 0; j < N; j++)
                {
                    BB[l] = B[i][j];
                    l++;
                }
            }
            */
            for(i = 0; i < M; i++)  // right side for the least square part
                CC[i] = Ab[k][i];
            for(i = 0; i < P; i++)  // right side for the constrained part
                DD[i] = Bb[k][i];

            FORTRAN_NAME(dgglse)(&M, &N, &P, AA, &LDA, ///// int M,int N,int P, double array A, int LDA
                                 BB, &LDB, CC, DD, XX, ///// double array B, int LDB, double array C, D, X,
                                 work, &LWORK, &INFO);
            ///// save fitted soln
            switch(k)
            {
            case 0:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Dens(st)[i] = XX[i];

                ///// TMP
                /**
                if(debug == YES)
                {
                    printf("computed coef[%g, %g, %g, %g, %g, %g] for tri[%d]\n",
                      XX[0], XX[1], XX[2], XX[3], XX[4], XX[5], tri->id);
                    tmp = 0.0;
                    for(i = 0; i < MAX_N_COEF; i++)
                        tmp += XX[i]*Lmass_matrix[0][i];
                    tmp /= Lmass_matrix[0][0];
                    printf("average on SV = %g\n", tmp);
                }
                **/ 
            break;
            case 1:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Mom(st)[0][i] = XX[i];
            break;
            case 2:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Mom(st)[1][i] = XX[i];
            break;
            case 3:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Mom(st)[2][i] = XX[i];
            break;
            case 4:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Energy(st)[i] = XX[i];
            break;
            case 5:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_B(st)[2][i] = XX[i];
                    // dg_B(st)[2][i] = 0.0;
                // dg_B(st)[2][0] = Mag(st)[2];
            break;
            case 6:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_B(st)[0][i] = XX[i];
                    // dg_B(st)[0][i] = 0.0;
                // dg_B(st)[0][0] = Mag(st)[0];
            break;
            case 7:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_B(st)[1][i] = XX[i];
                    // dg_B(st)[1][i] = 0.0;
                // dg_B(st)[1][0] = Mag(st)[1];
            break;
            }
        }

        if(g_invalid_MHD_state("tri_comput_P2_polynomials_from_avg_MHD()",st,YES))
        {
            printf("ERROR: IN tri_comput_P2_polynomials_from_avg_MHD() on tri[%d] teration %d\n", tri->id, rk_step);
            print_tri_crds(tri);
            verbose_print_state("state",st);
            clean_up(ERROR);
        }

        //// Temp for debug
        /**
        if(tri->id == 200)
        {
            printf("tri[%d] centriod (%g %g) after reconstruction\n", tri->id, fg_centroid(tri)[0], fg_centroid(tri)[1]);
            g_verbose_print_state(st);
        }
        **/

        /***** 
        // Assign to CV and map polynomial to one defined on CV center
        if(rk_step == RK_STEP)
        {
            for(cv_indx = 0; cv_indx < N_PART; cv_indx++)
            {
                for(indx = 0; indx < MAX_N_COEF; indx++)
                {
                    tri->cv_soln->cv_dg_rho[cv_indx][indx] = dg_Dens(st)[indx];
                    tri->cv_soln->cv_dg_e[cv_indx][indx] = dg_Energy(st)[indx];
                    tri->cv_soln->cv_dg_m[0][cv_indx][indx] = dg_Mom(st)[0][indx];
                    tri->cv_soln->cv_dg_m[1][cv_indx][indx] = dg_Mom(st)[1][indx];
                }
            }
            map_poly_SV_to_CV_p2(tri);
        }
        else
        {
            for(cv_indx = 0; cv_indx < N_PART; cv_indx++)
            {
                for(indx = 0; indx < MAX_N_COEF; indx++)
                {
                    limit_store[rk_step][tri->id].cv_dg_rho[cv_indx][indx] = dg_Dens(st)[indx];
                    limit_store[rk_step][tri->id].cv_dg_e[cv_indx][indx] = dg_Energy(st)[indx];
                    limit_store[rk_step][tri->id].cv_dg_m[0][cv_indx][indx] = dg_Mom(st)[0][indx];
                    limit_store[rk_step][tri->id].cv_dg_m[1][cv_indx][indx] = dg_Mom(st)[1][indx];
                }
            }
            map_poly_SV_to_CV_ver2_p2(tri, limit_store[rk_step]);
        }
        *****/
}


//// Construct poly by minimization.
//// Conservation is only enforced on tri itself. 
//// Oin neighboring tries, conservation is enforced by least square.
LOCAL void tri_comput_P3_polynomials_from_avg_by_min(
         TRI       *tri,
         TRI       *tris[],
         int       nn_num,
         Mid_soln *midsoln,
         Limiting_store **limit_store,
         int      rk_step)
{
        Locstate  st, nbst[40], st2;
        int       num_CVs, i, j, dim = 2, n_eqn;
        double    tmp, Ab[4][40], Bb[4][40], B[4][MAX_N_COEF]; /// B[#cv][mass_matrix_term for each coeff]
        int       cv_indx, k, M, N, P, l, LDA, LDB, LWORK, INFO, indx;
        static double   **ALmass_matrix = NULL, **A;
        double          **Lmass_matrix, XX[300];
        int       debug = NO;

        /***
        if(tri->id == 2330)
        {
            printf("tri[%d] entered tri_comput_P3_polynomials_from_avg()\n", tri->id);
            debug = YES;
        }
        ****/

        if(ALmass_matrix == NULL)
        {
            matrix(&(ALmass_matrix), 40, MAX_N_COEF, sizeof(double));
            matrix(&A, MAX_N_COEF+1, MAX_N_COEF+1, sizeof(double));
        }

        //// first assemble neighboring tris, then assemble the current tri itself
        for(cv_indx = 0; cv_indx < nn_num; cv_indx++)
        {
             comp_mass_matrix_1st_row(MAX_N_COEF,tris[cv_indx],dim,fg_centroid(tri),mass_1st_row);
             for(i = 0; i < MAX_N_COEF; i++)
                 ALmass_matrix[cv_indx][i] = mass_1st_row[0][i];
        }

        Lmass_matrix = tri->Lmass_matrix;
        /// for(i = 0; i < MAX_N_COEF; i++)
        ///     ALmass_matrix[cv_indx][i] = Lmass_matrix[0][i];

        /***
        for(cv_indx = 0; cv_indx < nn_num; cv_indx++)
        {
            for(i = 0; i < MAX_N_COEF; i++)
            {
                A[cv_indx][i] = ALmass_matrix[cv_indx][i]/ALmass_matrix[cv_indx][0];
            }
        }
        ***/

        st = midsoln[tri->id].st[0];
        for(indx = 0; indx < nn_num; indx++)
        {
            Bb[0][indx] = Dens(midsoln[tris[indx]->id].st[0]);
            Bb[1][indx] = Mom(midsoln[tris[indx]->id].st[0])[0];
            Bb[2][indx] = Mom(midsoln[tris[indx]->id].st[0])[1];
            Bb[3][indx] = Energy(midsoln[tris[indx]->id].st[0]);
        }
        Bb[0][indx] = Dens(st);
        Bb[1][indx] = Mom(st)[0];
        Bb[2][indx] = Mom(st)[1];
        Bb[3][indx] = Energy(st);

        /// set minimization eqn
        for(n_eqn = 0; n_eqn < MAX_N_COEF; n_eqn++)
        {
            for(l = 0; l < MAX_N_COEF; l++)
            {
                tmp = 0.0;
                for(cv_indx = 0; cv_indx < nn_num; cv_indx++)
                    tmp += ALmass_matrix[cv_indx][l]*ALmass_matrix[cv_indx][n_eqn]/sqr(ALmass_matrix[cv_indx][0]);
                A[n_eqn][l] = tmp;      
            }
            /// Lagrange multiplier
            A[n_eqn][MAX_N_COEF] = -Lmass_matrix[0][n_eqn]/Lmass_matrix[0][0];
        }  
        /// set constraint eqn
        for(l = 0; l < MAX_N_COEF; l++)
            A[MAX_N_COEF][l] = Lmass_matrix[0][l]/Lmass_matrix[0][0];

        /// set right hand side of minimization eqn.
        for(k = 0; k < N_EQN; k++)
        {
            for(n_eqn = 0; n_eqn < MAX_N_COEF; n_eqn++)
            {
                tmp = 0.0;
                for(cv_indx = 0; cv_indx < nn_num; cv_indx++)
                    tmp += Bb[k][cv_indx]*ALmass_matrix[cv_indx][n_eqn]/ALmass_matrix[cv_indx][0];
                Ab[k][n_eqn] = tmp;
            }
        }
        /// set right hand side of constraint eqn.
        for(k = 0; k < N_EQN; k++)
            Ab[k][MAX_N_COEF] = Bb[k][nn_num];

        if(rk_step == RK_STEP)
            st = tri->st;
        else
            st = midsoln[tri->id].st[rk_step];

        /***
        if(debug == YES)
        {
            for(indx = 0; indx < nn_num; indx++)
            {
                printf("eqn[%d]: ", indx);
                for(i = 0; i < MAX_N_COEF; i++)
                    printf("%g, ", A[indx][i]);
                printf("=%g\n", Ab[0][indx]);
            }
        }
        ***/ 

        for(k = 0; k < N_EQN; k++)
        {
            solve_by_LU(A,MAX_N_COEF+1,Ab[k],XX);
            ///// save fitted soln
            switch(k)
            {
            case 0:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Dens(st)[i] = XX[i];
                ///// TMP
                /***
                if(debug == YES)
                {
                    printf("computed coef[%g, %g, %g, %g, %g, %g, %g %g %g %g] for tri[%d]\n",
                      XX[0], XX[1], XX[2], XX[3], XX[4], XX[5], XX[6], XX[7], XX[8], XX[9], tri->id);
                    tmp = 0.0;
                    for(i = 0; i < MAX_N_COEF; i++)
                        tmp += XX[i]*tri->SVmass_matrix[i];
                    tmp /= tri->SVmass_matrix[0];
                    printf("average on SV = %g\n", tmp);
                    // printf("EXIT in tri_WENO_constrained_trans_B_reconstruction\n");
                    // clean_up(0);
                }
                ***/
            break;
            case 1:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Mom(st)[0][i] = XX[i];
            break;
            case 2:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Mom(st)[1][i] = XX[i];
            break;
            case 3:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Energy(st)[i] = XX[i];
            break;
            }
        }

        if(debug == YES)
        {
            printf("tri[%d] reconstructed state\n", tri->id);
            g_verbose_print_state(st);
        }

        // Assign to CV and map polynomial to one defined on CV center
        if(rk_step == RK_STEP)
        {
            for(cv_indx = 0; cv_indx < 4; cv_indx++)
            {
                for(indx = 0; indx < MAX_N_COEF; indx++)
                {
                    tri->cv_soln->cv_dg_rho[cv_indx][indx] = dg_Dens(st)[indx];
                    tri->cv_soln->cv_dg_e[cv_indx][indx] = dg_Energy(st)[indx];
                    tri->cv_soln->cv_dg_m[0][cv_indx][indx] = dg_Mom(st)[0][indx];
                    tri->cv_soln->cv_dg_m[1][cv_indx][indx] = dg_Mom(st)[1][indx];
                }
            }
            map_poly_SV_to_CV_p3(tri);
        }
        else
        {
            for(cv_indx = 0; cv_indx < 4; cv_indx++)
            {
                for(indx = 0; indx < MAX_N_COEF; indx++)
                {
                    limit_store[rk_step][tri->id].cv_dg_rho[cv_indx][indx] = dg_Dens(st)[indx];
                    limit_store[rk_step][tri->id].cv_dg_e[cv_indx][indx] = dg_Energy(st)[indx];
                    limit_store[rk_step][tri->id].cv_dg_m[0][cv_indx][indx] = dg_Mom(st)[0][indx];
                    limit_store[rk_step][tri->id].cv_dg_m[1][cv_indx][indx] = dg_Mom(st)[1][indx];
                }
            }
            map_poly_SV_to_CV_ver2_p3(tri, limit_store[rk_step]);
        }
}


LOCAL void tri_comput_P3_polynomials_from_avg(
         TRI       *tri,
         TRI       *tris[],
         int       nn_num,
         Mid_soln *midsoln,
         Limiting_store **limit_store,
         int      rk_step)
{
        Locstate  st, nbst[30], st2;
        int       num_CVs, i, j, dim = 2;
        double    Ab[4][20], Bb[4][4], B[4][MAX_N_COEF]; /// B[#cv][mass_matrix_term for each coeff]
        int       cv_indx, k, M, N, P, l, LDA, LDB, LWORK, INFO, indx;
        static double   **ALmass_matrix = NULL, **A;
        double          **Lmass_matrix, XX[300];
        int       debug = NO;

        /***
        if(tri->id == 2330)
        {
            printf("tri[%d] entered tri_comput_P3_polynomials_from_avg()\n", tri->id);
            debug = YES;
        }
        ****/

        if(ALmass_matrix == NULL)
        {
            matrix(&(ALmass_matrix), MAX_N_COEF, MAX_N_COEF, sizeof(double));
            matrix(&A, MAX_N_COEF, MAX_N_COEF, sizeof(double));
        }

        //// first assemble neighboring tris, then assemble the current tri itself
        for(cv_indx = 0; cv_indx < nn_num; cv_indx++)
        {
             comp_mass_matrix_1st_row(MAX_N_COEF,tris[cv_indx],dim,fg_centroid(tri),mass_1st_row);
             for(i = 0; i < MAX_N_COEF; i++)
                 ALmass_matrix[cv_indx][i] = mass_1st_row[0][i];
        }

        Lmass_matrix = tri->Lmass_matrix;
        for(i = 0; i < MAX_N_COEF; i++)
            ALmass_matrix[cv_indx][i] = Lmass_matrix[0][i];

        for(cv_indx = 0; cv_indx < nn_num + 1; cv_indx++)
        {
            for(i = 0; i < MAX_N_COEF; i++)
            {
                A[cv_indx][i] = ALmass_matrix[cv_indx][i]/ALmass_matrix[cv_indx][0];
            }
        }

        /**
        if(rk_step == RK_STEP)
        {
            st = tri->st;
            for(cv_indx = 0; cv_indx < nn_num; cv_indx++)
            {
                Ab[0][cv_indx] = Dens(tris[cv_indx]->st);
                Ab[1][cv_indx] = Mom(tris[cv_indx]->st)[0];
                Ab[2][cv_indx] = Mom(tris[cv_indx]->st)[1];
                Ab[3][cv_indx] = Energy(tris[cv_indx]->st);
            }

            Ab[0][cv_indx] = Dens(st);
            Ab[1][cv_indx] = Mom(st)[0];
            Ab[2][cv_indx] = Mom(st)[1];
            Ab[3][cv_indx] = Energy(st);
        }
        else
        {
            st = midsoln[tri->id].st[rk_step];

            for(cv_indx = 0; cv_indx < nn_num; cv_indx++)
            {
                Ab[0][cv_indx] = Dens(midsoln[tris[cv_indx]->id].st);
                Ab[1][cv_indx] = Mom(midsoln[tris[cv_indx]->id].st)[0];
                Ab[2][cv_indx] = Mom(midsoln[tris[cv_indx]->id].st)[1];
                Ab[3][cv_indx] = Energy(midsoln[tris[cv_indx]->id].st);
            }

            Ab[0][cv_indx] = Dens(st);
            Ab[1][cv_indx] = Mom(st)[0];
            Ab[2][cv_indx] = Mom(st)[1];
            Ab[3][cv_indx] = Energy(st);
        }
        **/
        st = midsoln[tri->id].st[0];
        for(indx = 0; indx < nn_num; indx++)
        {
            Ab[0][indx] = Dens(midsoln[tris[indx]->id].st[0]);
            Ab[1][indx] = Mom(midsoln[tris[indx]->id].st[0])[0];
            Ab[2][indx] = Mom(midsoln[tris[indx]->id].st[0])[1];
            Ab[3][indx] = Energy(midsoln[tris[indx]->id].st[0]);
        }
        Ab[0][indx] = Dens(st);
        Ab[1][indx] = Mom(st)[0];
        Ab[2][indx] = Mom(st)[1];
        Ab[3][indx] = Energy(st);

        if(rk_step == RK_STEP)
            st = tri->st;
        else
            st = midsoln[tri->id].st[rk_step];

        /***
        if(debug == YES)
        {
            for(indx = 0; indx < nn_num; indx++)
            {
                printf("eqn[%d]: ", indx);
                for(i = 0; i < MAX_N_COEF; i++)
                    printf("%g, ", A[indx][i]);
                printf("=%g\n", Ab[0][indx]);
            }
        }
        ***/ 

        for(k = 0; k < N_EQN; k++)
        {
            solve_by_LU(A,MAX_N_COEF,Ab[k],XX);
            ///// save fitted soln
            switch(k)
            {
            case 0:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Dens(st)[i] = XX[i];
                ///// TMP
                /***
                if(debug == YES)
                {
                    printf("computed coef[%g, %g, %g, %g, %g, %g, %g %g %g %g] for tri[%d]\n",
                      XX[0], XX[1], XX[2], XX[3], XX[4], XX[5], XX[6], XX[7], XX[8], XX[9], tri->id);
                    tmp = 0.0;
                    for(i = 0; i < MAX_N_COEF; i++)
                        tmp += XX[i]*tri->SVmass_matrix[i];
                    tmp /= tri->SVmass_matrix[0];
                    printf("average on SV = %g\n", tmp);
                    // printf("EXIT in tri_WENO_constrained_trans_B_reconstruction\n");
                    // clean_up(0);
                }
                ***/
            break;
            case 1:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Mom(st)[0][i] = XX[i];
            break;
            case 2:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Mom(st)[1][i] = XX[i];
            break;
            case 3:
                for(i = 0; i < MAX_N_COEF; i++)
                    dg_Energy(st)[i] = XX[i];
            break;
            }
        }

        if(debug == YES)
        {
            printf("tri[%d] reconstructed state\n", tri->id);
            g_verbose_print_state(st);
        }

        // Assign to CV and map polynomial to one defined on CV center
        if(rk_step == RK_STEP)
        {
            for(cv_indx = 0; cv_indx < 4; cv_indx++)
            {
                for(indx = 0; indx < MAX_N_COEF; indx++)
                {
                    tri->cv_soln->cv_dg_rho[cv_indx][indx] = dg_Dens(st)[indx];
                    tri->cv_soln->cv_dg_e[cv_indx][indx] = dg_Energy(st)[indx];
                    tri->cv_soln->cv_dg_m[0][cv_indx][indx] = dg_Mom(st)[0][indx];
                    tri->cv_soln->cv_dg_m[1][cv_indx][indx] = dg_Mom(st)[1][indx];
                }
            }
            map_poly_SV_to_CV_p3(tri);
        }
        else
        {
            for(cv_indx = 0; cv_indx < 4; cv_indx++)
            {
                for(indx = 0; indx < MAX_N_COEF; indx++)
                {
                    limit_store[rk_step][tri->id].cv_dg_rho[cv_indx][indx] = dg_Dens(st)[indx];
                    limit_store[rk_step][tri->id].cv_dg_e[cv_indx][indx] = dg_Energy(st)[indx];
                    limit_store[rk_step][tri->id].cv_dg_m[0][cv_indx][indx] = dg_Mom(st)[0][indx];
                    limit_store[rk_step][tri->id].cv_dg_m[1][cv_indx][indx] = dg_Mom(st)[1][indx];
                }
            }
            map_poly_SV_to_CV_ver2_p3(tri, limit_store[rk_step]);
        }
}

//ok
LOCAL void set_RK_time_for_bdry(
	Front    *nfr,
	Front    *fr,
        int      rk_iter,
        float    dt)
{
        double t1, t2; 

        if(RK_STEP == 1)
        {
            RK_bdry_time = nfr->time = fr->time; // time for evolving state for boundary condition
            current_time = fr->time + dt; // time for reconstruction which involves boundary element
                                          // This is after one step time evolution.
        }
        else if(RK_STEP == 2)
        {
            if(rk_iter == 0)
            {
                RK_bdry_time = nfr->time = fr->time; // time for evolving state for boundary condition
                current_time = fr->time + dt; // time for reconstruction which involves boundary element
            }
            else
            {
                RK_bdry_time = nfr->time = fr->time + dt;
                current_time = fr->time + dt;
            }
        }
        else if(RK_STEP == 3)
        {
            if(rk_iter == 0)
            {
                nfr->time = fr->time; // time for evolving state for boundary condition
                current_time = fr->time + dt; // time for reconstruction which involves boundary element
            }
            else if(rk_iter == 1)
            {
                nfr->time = fr->time + dt;
                current_time = fr->time + 0.5*dt;
            }
            else
            {
                nfr->time = fr->time + 0.5*dt;
                current_time = fr->time + dt;
            }

            // printf("IN set_RK_time_for_bdry():: time = %g dt = %g\n", fr->time, dt);

        }
        else if(RK_STEP == 4)
        {
            if(rk_iter == 0)
            {
                RK_bdry_time = nfr->time = fr->time;
                current_time = fr->time + 0.5*dt;
            }
            else if(rk_iter == 1)
            {
                RK_bdry_time = nfr->time = fr->time + 0.5*dt;
                current_time = (649.0/1600.0)*(-dt*10890423.0*1600.0/(25193600.0*649.0)+fr->time)+
                     (951.0/1600.0)*(dt*5000.0*1600.0/(7873.0*951.0) + fr->time + 0.5*dt);
            }
            else if(rk_iter == 2)
            {
                t1 = fr->time + 0.5*dt;
                t2 = (649.0/1600.0)*(-dt*10890423.0*1600.0/(25193600.0*649.0)+fr->time)+
                     (951.0/1600.0)*(dt*5000.0*1600.0/(7873.0*951.0) + fr->time + 0.5*dt);

                RK_bdry_time = nfr->time = t2;
                current_time = (53989.0/2500000.0)*(-dt*(2500000.0/53989.0)*(102261.0/5000000.0)+fr->time)+
                 (4806213.0/20000000.0)*(-dt*(20000000.0/4806213.0)*5121.0/20000.0+t1)+
                 (23619.0/32000.0)*(dt*(32000.0/23619.0)*7873.0/10000.0+t2);; 
            }
            else
            { 
                t1 = fr->time + 0.5*dt;
                t2 = (649.0/1600.0)*(-dt*10890423.0*1600.0/(25193600.0*649.0)+fr->time)+
                     (951.0/1600.0)*(dt*5000.0*1600.0/(7873.0*951.0) + fr->time + 0.5*dt);

                RK_bdry_time = nfr->time = (53989.0/2500000.0)*(-dt*(2500000.0/53989.0)*(102261.0/5000000.0)+fr->time)+
                 (4806213.0/20000000.0)*(-dt*(20000000.0/4806213.0)*5121.0/20000.0+t1)+
                 (23619.0/32000.0)*(dt*(32000.0/23619.0)*7873.0/10000.0+t2);
                current_time = fr->time + dt;
            }
        }
        else
        {
            printf("ERROR: set_RK_time_for_bdry, implement RK_STEP = %d\n", RK_STEP);
            clean_up(ERROR);
        }
}

//ok
LOCAL void   attach_cell_averages_in_buffer(
	Front    *fr,
	Mid_soln *midsoln)
{
        TRI       *tri, *crsp_tri, *nbtri[3];
        SURFACE   **surf;

        if(debugging("Lax") || debugging("Sod"))
        {
            update_buffer_x_ref_for_constraint(fr,midsoln);
        }
        else if(debugging("v_evo") || debugging("acc_vert") || debugging("db_Mach") ||
                debugging("g_sine") || debugging("Mach_step") || debugging("mag_acc_vert") || debugging("rotor") || debugging("decay_alfven"))
        {
#if defined(__MPI__)
            BLOCK_SIZE = GetHypPPBlockSize();
            update_db_Mach_buffer(fr,midsoln,0,NULL);
           
#else 
            if(!debugging("db_Mach") && !debugging("g_sine") && !debugging("Mach_step"))
                update_buffer_for_constraint(fr,midsoln);
#endif // if defined(__MPI__)
        }

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf);
             !at_end_of_tri_list(tri,*surf); tri = tri->next)
            {
                if(tri->BC_type == SUBDOMAIN)
                    continue;

                if(Boundary_tri(tri) || tri_on_phy_bdry(tri))
                {
                    if(debugging("g_sine") || debugging("mag_g_sine"))
                        attach_g_sine_buffer_states_ver2(fr,midsoln,RK_STEP+1,tri,NULL);
                    else if(debugging("Burgers"))
                    {
                        ///// implemented, need to check
                        if(!debugging("diag_tri"))
                            attach_Burgers_buffer_states_ver2(fr,midsoln,RK_STEP+1,tri,NULL);
                    }
                    else if(debugging("Sod"))
                    {
                        shock_vort_attach_buffer_states(fr,midsoln,RK_STEP+1,tri,NULL);
                    }
                    else if(debugging("db_Mach"))
                        db_Mach_attach_buffer_states(fr,midsoln,RK_STEP+1,tri,NULL);
                    else if(debugging("Mach_step"))
                        Mach_step_attach_buffer_states(fr,midsoln,RK_STEP+1,tri,NULL);
                }
            }
        }
}

LOCAL void   attach_cell_averages_in_buffer_at_step(
	Front    *fr,
    int      rk_step,
	Mid_soln *midsoln)
{
        TRI       *tri, *crsp_tri, *nbtri[3];
        SURFACE   **surf;

        if(debugging("Lax") || debugging("Sod"))
        {
            update_buffer_x_ref_for_constraint(fr,midsoln);
        }
        else if(debugging("v_evo") || debugging("acc_vert") || debugging("db_Mach") ||
                debugging("g_sine") || debugging("Mach_step") || debugging("mag_acc_vert") || 
                debugging("rotor") || debugging("decay_alfven"))
        {
#if defined(__MPI__)
            BLOCK_SIZE = GetHypPPBlockSize();
            update_db_Mach_buffer(fr,midsoln,rk_step,NULL);
#endif // if defined(__MPI__)
        }

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf);
             !at_end_of_tri_list(tri,*surf); tri = tri->next)
            {
                if(tri->BC_type == SUBDOMAIN)
                    continue;

                if(Boundary_tri(tri) || tri_on_phy_bdry(tri))
                {
                    if(debugging("g_sine") || debugging("mag_g_sine"))
                        attach_g_sine_buffer_states_ver2(fr,midsoln,rk_step,tri,NULL);
                    else if(debugging("Burgers"))
                    {
                        ///// implemented, need to check
                        if(!debugging("diag_tri"))
                            attach_Burgers_buffer_states_ver2(fr,midsoln,rk_step,tri,NULL);
                    }
                    else if(debugging("Sod"))
                    {
                        shock_vort_attach_buffer_states(fr,midsoln,rk_step,tri,NULL);
                    }
                    else if(debugging("db_Mach"))
                        db_Mach_attach_buffer_states(fr,midsoln,rk_step,tri,NULL);
                    else if(debugging("Mach_step"))
                        Mach_step_attach_buffer_states(fr,midsoln,rk_step,tri,NULL);
                }
            }
        }
}

LOCAL void   print_bottom_tris_state(
	Front    *fr)
{
        SURFACE      **surf;
        TRI          *tri;
        double  cent[3];

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf); !at_end_of_tri_list(tri,*surf);
                tri = tri->next)
            {
                cent[0] = fg_centroid(tri)[0];
                cent[1] = fg_centroid(tri)[1];
                // if(tri->BC_type == IN_FLOW || cent[1] < 5.1)
                if((cent[1] < 54.5 && cent[1] > 50.5) &&
                   (cent[0] > fr->rect_grid->L[0] && 
                    cent[0] < fr->rect_grid->U[0]))
                {
                    printf("cent[%g %g] tri(%d):\n", cent[0], cent[1], tri->id);
                    printf("den, p, v [%12.11g %12.11g %12.11g %12.11g]\n", Dens(tri->st),
                          pressure(tri->st), vel(0, tri->st), vel(1, tri->st));    
                }
            }
        }       
}

// The ghost states are all piecewise const.
LOCAL void attach_buffer(
        Front    *fr,
        Mid_soln *midsoln,
        int      rk_step,
	TRI      *tri,
        TRI      **ghost,
        int      *g_flag)
{
	TRI       *nbtri[3];
        int       side, i, dim = 2;
        float     nor[3], t[3], u, crds[3], fcent[3];
        POINT     *gp[3], *p[3];
        Locstate  st, gst;
        float     dirx[2] = {1.0, 0.0}, ans;
        size_t    sizest = fr->sizest;
        double   *cent; 

        if(rk_step == RK_STEP)
            st = tri->st;
        else
            st = midsoln[tri->id].st[rk_step];

        for(side = 0; side < 3; side++)
        {
            nbtri[side] = Tri_on_side(tri,side);
            if(nbtri[side] == NULL)
                g_flag[side] = YES;
            else
                g_flag[side] = NO;
        }

        // TMP
        /**
        if(tri->id == 0 && rk_step == 1)
        {
            printf("tri[%d] attach buffer, g_flag[%d %d %d], Boundary_tri(tri) = %d\n",
                  tri->id, g_flag[0], g_flag[1], g_flag[2], Boundary_tri(tri));
            printf("ntri[%d %d %d]\n", nbtri[0], nbtri[1], nbtri[2]);
            printf("ghost[%d %d %d]\n", ghost[0], ghost[1], ghost[2]);
        }
        **/

        for(side = 0; side < 3; side++)
        {
            if(nbtri[side] == NULL)
            {
                Tri_on_side(tri,side) = ghost[side];
                gst = ghost[side]->st;
                for(i = 0; i < dim; i++)
                    t[i] = fg_side_vector(tri)[side][i];
                nor[0] = t[1];
                nor[1] = -t[0];
 
                for(i = 0; i < 3; i++)
                {
                    gp[i] = Point_of_tri(ghost[side])[i];
                    p[i] = Point_of_tri(tri)[(side+i)%3]; 
                }
 
                /*
                if(gp[0] == NULL || gp[1] == NULL || gp[2] == NULL)
                {
                    printf("ERROR: tri[%d] attach buffer g_flag[%d %d %d], Boundary_tri(tri) = %d\n",
                      tri->id, g_flag[0], g_flag[1], g_flag[2], Boundary_tri(tri));
                    printf("ntri[%d %d %d]\n", nbtri[0], nbtri[1], nbtri[2]);
                    printf("ghost_tri point is NULL\n");
                    clean_up(ERROR); 
                }
                */
       
                // Let ghost side 0 be the adjcent side
                for(i = 0; i < dim; i++)
                {
                    Coords(gp[0])[i] = Coords(p[1])[i]; 
                    Coords(gp[1])[i] = Coords(p[0])[i]; 
                    Coords(gp[2])[i] = Coords(p[2])[i];
                }
                u = ((Coords(p[2])[0]-Coords(p[0])[0])*(Coords(p[1])[0]-Coords(p[0])[0]) + 
                     (Coords(p[2])[1]-Coords(p[0])[1])*(Coords(p[1])[1]-Coords(p[0])[1]) )
                        /fg_length_side(tri)[side];
                crds[0] = Coords(p[0])[0] + u*(Coords(p[1])[0]-Coords(p[0])[0]);
                crds[1] = Coords(p[0])[1] + u*(Coords(p[1])[1]-Coords(p[0])[1]);

                i_reflect_point(gp[2], crds, nor, fr->interf);

                comp_mass_matrix(MAX_N_COEF,ghost[side],2,ghost[side]->Lmass_matrix);
                inverse_matrix(ghost[side]->Lmass_matrix,MAX_N_COEF,ghost[side]->mass_inv);

                set_comput_tri_geom_flag(YES);
                set_normal_of_tri(ghost[side]);
                set_comput_tri_geom_flag(NO);
                cent = fg_centroid(ghost[side]);
                for(i = 0; i < dim; i++)
                    fcent[i] = cent[i];

                // TMP
                /**
                printf("tri[%d] side %d is null\n", tri->id, side);
                print_general_vector("Tri_pt", Coords(Point_of_tri(tri)[0]), dim, "\n");
                print_general_vector("Tri_pt", Coords(Point_of_tri(tri)[1]), dim, "\n");
                print_general_vector("Tri_pt", Coords(Point_of_tri(tri)[2]), dim, "\n");
                printf("print ghost tri\n");
                print_general_vector("Tri_pt", Coords(Point_of_tri(ghost[side])[0]), dim, "\n");
                print_general_vector("Tri_pt", Coords(Point_of_tri(ghost[side])[1]), dim, "\n");
                print_general_vector("Tri_pt", Coords(Point_of_tri(ghost[side])[2]), dim, "\n");
                clean_up(0);
                **/

                assign(gst, st, sizest);

                if(debugging("g_sine"))
                {
                    set_state(gst,GAS_STATE,gst);
                    g_sine_evolution_initializer(ghost[side],gst,fr->time);
                    continue;
                }

                set_state(gst,TGAS_STATE,gst);
                for(i = 0; i < MAX_N_COEF; i++)
                {
                    dg_Dens(gst)[i] = 0.0;
                    dg_Mom(gst)[0][i] = 0.0;
                    dg_Mom(gst)[1][i] = 0.0;
                    dg_Energy(gst)[i] = 0.0;
                }

                if(fg_e_type(tri)[side] == CONST_P)  
                {
                    ans = fabs(nor[0]*dirx[0] + nor[0]*dirx[0]);
                    if(ans > 0.5 && nor[0] > 0.5)
                    {
                        // right side, post shock condition
                        Dens(gst) = post_sk_st[0];
                        Vel(gst)[0] = post_sk_st[1]*cos(radians(60.0));
                        Vel(gst)[1] = post_sk_st[1]*sin(radians(60.0));
                        Press(gst) = post_sk_st[3];
                    }
                    else if(ans > 0.5 && nor[0] < -0.5)
                    {
                        //left side, Mach 10 shock
                        if(cent[1] < sk_y_pos)
                        {
                        // post shock condition
                            Dens(gst) = post_sk_st[0];
                            Vel(gst)[0] = post_sk_st[1]*cos(radians(60.0));
                            Vel(gst)[1] = post_sk_st[1]*sin(radians(60.0));
                            Press(gst) = post_sk_st[3];
                        }
                        else
                        {
                        // pre shock condition
                            Dens(gst) = pre_sk_st[0];
                            Vel(gst)[0] = pre_sk_st[1];
                            Vel(gst)[1] = pre_sk_st[2];
                            Press(gst) = pre_sk_st[3];
                        }
                    }
                    else
                    {
                    // bottom side, post shock condition
                        Dens(gst) = post_sk_st[0];
                        Vel(gst)[0] = post_sk_st[1]*cos(radians(60.0));
                        Vel(gst)[1] = post_sk_st[1]*sin(radians(60.0));
                        Press(gst) = post_sk_st[3];
                    }
                    set_state(gst,GAS_STATE,gst);
                } 
                else if(fg_e_type(tri)[side] == NEUMANN)
                {
                    reflect_state(gst, fr->interf, fcent, crds, nor);
                    set_state(gst,GAS_STATE,gst);

                    /**
                    printf("ERROR attach_buffer %d, implement type %d NEUMANN = %d\n",
                                 tri->id, fg_e_type(tri)[side], NEUMANN);
                    printf("normal[%g %g]\n", nor[0], nor[1]);
                    verbose_print_state("tri st", st);
                    verbose_print_state("tri ghost st", gst);
                    clean_up(ERROR);
                    **/
                }
                else if(fg_e_type(tri)[side] == OUT_FLOW)
                {
                    set_state(gst,GAS_STATE,gst);
                }
                else
                {
                    printf("ERROR attach_buffer %d, implement type %d\n",
                                 tri->id, fg_e_type(tri)[side]);
                    clean_up(ERROR);
                }
            }
        }

}

// The ghost states are set in buffer tris.
LIB_LOCAL void attach_buffer_states(
        Front    *fr,
        Mid_soln *midsoln,
        int      rk_step,
	TRI      *tri,
        Limiting_store **store)
{
	TRI       *nbtri[3];
        int       side, i, dim = 2, indx, cv_indx;
        float     nor[3], t[3], u, crds[3], fcent[3];
        Locstate  st, gst;
        float     dirx[2] = {1.0, 0.0}, ans;
        size_t    sizest = fr->sizest;
        double   *cent, pt[MAXD], con_u[10]; 

        if(rk_step == RK_STEP)
            st = tri->st;
        else
            st = midsoln[tri->id].st[rk_step];
        for(side = 0; side < 3; side++)
            nbtri[side] = Tri_on_side(tri,side);

        for(side = 0; side < 3; side++)
        {
            if(nbtri[side]->BC_type == SUBDOMAIN)
            {
                for(i = 0; i < dim; i++)
                    pt[i] = Coords(Point_of_tri(tri)[side])[i] +
                        Coords(Point_of_tri(tri)[(side+1)%3])[i];
                con_u_at_pt(st, pt, fg_centroid(tri), sqrt(fg_area(tri)), con_u);

                if(rk_step == RK_STEP)
                    gst = nbtri[side]->st;
                else
                    gst = midsoln[nbtri[side]->id].st[rk_step];

                assign(gst, st, sizest);
                // zero_moments(gst); // submission 1 and 2
                // p1_L2projection_ver2(tri,gst,gst);

                /// New, submission 3
                con_u_to_state(con_u, dim, gst);
                zero_moments(gst);
                /// End New, submission 3
                assign(midsoln[nbtri[side]->id].st[0], gst, sizest);
            }
        }

        if(store != NULL)
        {
            for(side = 0; side < 3; side++)
            {
                if(nbtri[side]->BC_type == SUBDOMAIN)
                {
                    if(rk_step == RK_STEP)
                    {
                        gst = nbtri[side]->st;
                        for(cv_indx = 0; cv_indx < N_PART; cv_indx++)
                        {
                            for(indx = 0; indx < MAX_N_COEF; indx++)
                            {
                                nbtri[side]->cv_soln->cv_dg_rho[cv_indx][indx] = dg_Dens(gst)[indx];
                                nbtri[side]->cv_soln->cv_dg_e[cv_indx][indx] = dg_Energy(gst)[indx];
                                nbtri[side]->cv_soln->cv_dg_m[0][cv_indx][indx] = dg_Mom(gst)[0][indx];
                                nbtri[side]->cv_soln->cv_dg_m[1][cv_indx][indx] = dg_Mom(gst)[1][indx];
                            }
                        }
                        if(6 == MAX_N_COEF)
                            map_poly_SV_to_CV_p2(nbtri[side]);
                        else if(10 == MAX_N_COEF)
                            map_poly_SV_to_CV_p3(nbtri[side]);
                    }
                    else
                    {
                        gst = midsoln[nbtri[side]->id].st[rk_step];
                        for(cv_indx = 0; cv_indx < N_PART; cv_indx++)
                        {
                            for(indx = 0; indx < MAX_N_COEF; indx++)
                            {
                                store[rk_step][nbtri[side]->id].cv_dg_rho[cv_indx][indx] =
                                              dg_Dens(gst)[indx];
                                store[rk_step][nbtri[side]->id].cv_dg_e[cv_indx][indx] =
                                              dg_Energy(gst)[indx];
                                store[rk_step][nbtri[side]->id].cv_dg_m[0][cv_indx][indx] =
                                              dg_Mom(gst)[0][indx];
                                store[rk_step][nbtri[side]->id].cv_dg_m[1][cv_indx][indx] =
                                              dg_Mom(gst)[1][indx];
                            }
                        }
                        if(6 == MAX_N_COEF)
                            map_poly_SV_to_CV_ver2_p2(nbtri[side], store[rk_step]);
                        else if(10 == MAX_N_COEF)
                            map_poly_SV_to_CV_ver2_p3(nbtri[side], store[rk_step]);
                    }

                    // The subdomain zero level state, for reconstruction purpose (P1 projection)
                    for(cv_indx = 0; cv_indx < N_PART; cv_indx++)
                    {
                        for(indx = 0; indx < MAX_N_COEF; indx++)
                        {
                            store[0][nbtri[side]->id].cv_dg_rho[cv_indx][indx] =
                                          dg_Dens(gst)[indx];
                            store[0][nbtri[side]->id].cv_dg_e[cv_indx][indx] =
                                          dg_Energy(gst)[indx];
                            store[0][nbtri[side]->id].cv_dg_m[0][cv_indx][indx] =
                                          dg_Mom(gst)[0][indx];
                            store[0][nbtri[side]->id].cv_dg_m[1][cv_indx][indx] =
                                          dg_Mom(gst)[1][indx];
                        }
                    }
                    if(6 == MAX_N_COEF)
                        map_poly_SV_to_CV_ver2_p2(nbtri[side], store[0]);
                    else if(10 == MAX_N_COEF)
                        map_poly_SV_to_CV_ver2_p3(nbtri[side], store[0]);
                }
            }
        }
}

// The ghost states are set in buffer tris.
LIB_LOCAL void shock_vort_attach_buffer_states(
        Front    *fr,
        Mid_soln *midsoln,
        int      rk_step,
	TRI      *tri,
        Limiting_store **store)
{
	TRI       *nbtri[3], *gst_tri;
        int       side, i, dim = 2, indx, cv_indx;
        float     nor[3], t[3], u, crds[3], fcent[3];
        Locstate  st, gst;
        size_t    sizest = fr->sizest;
        double   *cent, pt[MAXD], con_u[10], **Lmass_matrix; 
        float     dirx[2] = {1.0, 0.0}, ans;
        GRID_DIRECTION Gside;

        for(side = 0; side < 3; side++)
            nbtri[side] = Tri_on_side(tri,side);

        if((RK_STEP+1) == rk_step)
        {
            // for constraint DG
            st = midsoln[tri->id].st[0];
            for(side = 0; side < 3; side++)
            {
                if(nbtri[side]->BC_type == SUBDOMAIN &&
                   (fg_e_type(tri)[side] == IN_FLOW ||
                    fg_e_type(tri)[side] == OUT_FLOW))
                {
                    gst = midsoln[nbtri[side]->id].st[0];
                    assign(gst, st, sizest);
                    if(debugging("Shu_Osher") && (fg_e_type(tri)[side] == IN_FLOW))
                    {
                        zero_moments(gst);
                        Dens(gst) = 3.857143;
                        Mom(gst)[0] = 0.0;
                        Mom(gst)[1] = 3.857143*2.629369;
                        Energy(gst) = 10.333333/0.4 + 0.5*Dens(gst)*(sqr(2.629369));
                    }
                }
            }
        }
        else
        {

            if(rk_step == RK_STEP)
                st = tri->st;
            else
                st = midsoln[tri->id].st[rk_step];

            for(side = 0; side < 3; side++)
            {
                if(nbtri[side]->BC_type == SUBDOMAIN &&
                   (fg_e_type(tri)[side] == IN_FLOW ||  
                    fg_e_type(tri)[side] == OUT_FLOW))
                {
                    /// NEW
                    for(indx = 0; indx < 3; indx++)
                    {
                        if(tri ==  Tri_on_side(nbtri[side],indx))
                            break;
                    }
                    for(i = 0; i < dim; i++)
                        t[i] = fg_side_vector(nbtri[side])[indx][i];
                    nor[0] = t[1];
                    nor[1] = -t[0];
                    // Convert to x-y coord.
                    ans = fabs(nor[0]*dirx[0] + nor[1]*dirx[1]);
                    if(ans > 0.5 && nor[0] < -0.5) // right side
                        Gside = EAST;
                    else if(ans > 0.5 && nor[0] > 0.5) //  left side
                        Gside = WEST;
                    else if(ans < 0.5 && nor[1] < -0.5) // top
                        Gside = NORTH;
                    else // bottom
                        Gside = SOUTH;
                    if(rk_step == RK_STEP)
                        gst = nbtri[side]->st;
                    else
                        gst = midsoln[nbtri[side]->id].st[rk_step];
                    assign(gst, st, sizest);
                    Lmass_matrix = nbtri[side]->Lmass_matrix;
                    switch(Gside)
                    {
                    case EAST:
                    case WEST:
                    dg_Dens(gst)[1] *= -1.0;
                    dg_Dens(gst)[4] *= -1.0;
                    if(MAX_N_COEF ==  10)
                    {
                        dg_Dens(gst)[6] *= -1.0;
                        dg_Dens(gst)[8] *= -1.0;

                        dg_Energy(gst)[6] *= -1.0;
                        dg_Energy(gst)[8] *= -1.0;

                        dg_Mom(gst)[1][6] *= -1.0;
                        dg_Mom(gst)[1][8] *= -1.0;

                        dg_Mom(gst)[0][6] *= -1.0;
                        dg_Mom(gst)[0][8] *= -1.0;
                    }
                    dg_Energy(gst)[1] *= -1.0;
                    dg_Energy(gst)[4] *= -1.0;

                    // dg_Energy(gst)[6] *= -1.0;
                    // dg_Energy(gst)[8] *= -1.0;

                    dg_Mom(gst)[1][1] *= -1.0;
                    dg_Mom(gst)[1][4] *= -1.0;

                    // dg_Mom(gst)[1][6] *= -1.0;
                    // dg_Mom(gst)[1][8] *= -1.0;

                    dg_Mom(gst)[0][1] *= -1.0;
                    dg_Mom(gst)[0][4] *= -1.0;
  
                    // dg_Mom(gst)[0][6] *= -1.0;
                    // dg_Mom(gst)[0][8] *= -1.0;
                    break;
                    case NORTH:
                    case SOUTH:
                    dg_Dens(gst)[2] *= -1.0;
                    dg_Dens(gst)[4] *= -1.0;
                    if(MAX_N_COEF ==  10)
                    {
                        dg_Dens(gst)[7] *= -1.0;
                        dg_Dens(gst)[9] *= -1.0;
                        
                        dg_Energy(gst)[7] *= -1.0;
                        dg_Energy(gst)[9] *= -1.0;

                        dg_Mom(gst)[1][7] *= -1.0;
                        dg_Mom(gst)[1][9] *= -1.0;

                        dg_Mom(gst)[0][7] *= -1.0;
                        dg_Mom(gst)[0][9] *= -1.0;
                    }

                    dg_Energy(gst)[2] *= -1.0;
                    dg_Energy(gst)[4] *= -1.0;

                    // dg_Energy(gst)[7] *= -1.0;
                    // dg_Energy(gst)[9] *= -1.0;

                    dg_Mom(gst)[1][2] *= -1.0;
                    dg_Mom(gst)[1][4] *= -1.0;

                    // dg_Mom(gst)[1][7] *= -1.0;
                    // dg_Mom(gst)[1][9] *= -1.0;

                    dg_Mom(gst)[0][2] *= -1.0;
                    dg_Mom(gst)[0][4] *= -1.0;

                    // dg_Mom(gst)[0][7] *= -1.0;
                    // dg_Mom(gst)[0][9] *= -1.0;
                    break;
                    }
                    // Compute average soln
                    Dens(gst) = 0.0;
                    Mom(gst)[0] = 0.0;
                    Mom(gst)[1] = 0.0;
                    Energy(gst) = 0.0;
                    for(indx = 0; indx < MAX_N_COEF; indx++)
                    {
                        Dens(gst) += dg_Dens(gst)[indx]*Lmass_matrix[0][indx];
                        Mom(gst)[0] += dg_Mom(gst)[0][indx]*Lmass_matrix[0][indx];
                        Mom(gst)[1] += dg_Mom(gst)[1][indx]*Lmass_matrix[0][indx];
                        Energy(gst) += dg_Energy(gst)[indx]*Lmass_matrix[0][indx];
                    }
                    Dens(gst) /= Lmass_matrix[0][0];
                    Mom(gst)[0] /= Lmass_matrix[0][0];
                    Mom(gst)[1] /= Lmass_matrix[0][0];
                    Energy(gst) /= Lmass_matrix[0][0];

                    zero_moments(gst); /// tmp, to test Shu-Osher problem

                    if(debugging("Shu_Osher") && (fg_e_type(tri)[side] == IN_FLOW))
                    {
                        zero_moments(gst);
                        Dens(gst) = 3.857143;
                        Mom(gst)[0] = 0.0;
                        Mom(gst)[1] = 3.857143*2.629369;
                        Energy(gst) = 10.333333/0.4 + 0.5*Dens(gst)*(sqr(2.629369));
                    }

                    // The subdomain zero level state, for reconstruction purpose (P1 projection)
                    assign(midsoln[nbtri[side]->id].st[0], gst, sizest);
                    /// END NEW
                    /** 
                    for(i = 0; i < dim; i++)
                        pt[i] = Coords(Point_of_tri(tri)[side])[i] +
                        Coords(Point_of_tri(tri)[(side+1)%3])[i];
                    con_u_at_pt(st, pt, fg_centroid(tri), con_u);

                    if(rk_step == RK_STEP)
                        gst = nbtri[side]->st;
                    else
                        gst = midsoln[nbtri[side]->id].st[rk_step];

                    assign(gst, st, sizest);
                    // The subdomain zero level state, for reconstruction purpose (P1 projection)
                    // zero_moments(gst); // submission 1 and 2
                    // p1_L2projection_ver2(tri,gst,gst);

                    /// New, submission 3
                    con_u_to_state(con_u, dim, gst);
                    zero_moments(gst);
                    /// End New, submission 3
                    assign(midsoln[nbtri[side]->id].st[0], gst, sizest);

                    // TMP
                    if(nbtri[side]->id == 2400)
                    {
                        printf("print ghost tri state\n");
                        print_tri_crds(nbtri[side]);
                        verbose_print_state("ghost",gst);
                        verbose_print_state("interior",st);
                        printf("--------------------\n\n");
                    }
                    **/
                } // END: if(nbtri[side]->BC_type == SUBDOMAIN &&
                  //        (fg_e_type(tri)[side] == IN_FLOW ||
                  //         fg_e_type(tri)[side] == OUT_FLOW))
            }
        }


        /// for P3 finite volume, tri has two subdomain type neighbors that
        /// need to be filled. 
        /// Here tri is the neighbor of the physical boundary tri. !!!!
        if((RK_STEP+1) == rk_step)
        {
            st = midsoln[tri->id].st[0];
            for(side = 0; side < 3; side++)
            {
                if(nbtri[side]->BC_type == SUBDOMAIN)
                {
                    gst_tri = nbtri[side];
                    break;
                }
            }
            for(side = 0; side < 3; side++)
            {
                nbtri[side] = Tri_on_side(gst_tri,side);
                if(nbtri[side] != NULL && nbtri[side]->BC_type == SUBDOMAIN)
                {
                    gst = midsoln[nbtri[side]->id].st[0];
                    assign(gst, midsoln[gst_tri->id].st[0], sizest);
                    if(debugging("Shu_Osher") && (fg_e_type(tri)[side] == IN_FLOW))
                    {
                        zero_moments(gst);
                        Dens(gst) = 3.857143;
                        Mom(gst)[0] = 0.0;
                        Mom(gst)[1] = 3.857143*2.629369;
                        Energy(gst) = 10.333333/0.4 + 0.5*Dens(gst)*(sqr(2.629369));
                    }
                }
            }
        }

        if(store != NULL)
        {
            for(side = 0; side < 3; side++)
            {
                if(nbtri[side]->BC_type == SUBDOMAIN &&
                   (fg_e_type(tri)[side] == IN_FLOW ||
                    fg_e_type(tri)[side] == OUT_FLOW))
                {
                    if(rk_step == RK_STEP)
                    {
                        gst = nbtri[side]->st;
                        for(cv_indx = 0; cv_indx < N_PART; cv_indx++)
                        {
                            for(indx = 0; indx < MAX_N_COEF; indx++)
                            {
                                nbtri[side]->cv_soln->cv_dg_rho[cv_indx][indx] = dg_Dens(gst)[indx];
                                nbtri[side]->cv_soln->cv_dg_e[cv_indx][indx] = dg_Energy(gst)[indx];
                                nbtri[side]->cv_soln->cv_dg_m[0][cv_indx][indx] = dg_Mom(gst)[0][indx];
                                nbtri[side]->cv_soln->cv_dg_m[1][cv_indx][indx] = dg_Mom(gst)[1][indx];
                            }
                        }
                        // map_poly_SV_to_CV(nbtri[side]);
                        if(MAX_N_COEF ==  6)
                            map_poly_SV_to_CV_p2(nbtri[side]);
                        else
                            map_poly_SV_to_CV_p3(nbtri[side]);
                    }
                    else
                    {
                        gst = midsoln[nbtri[side]->id].st[rk_step];
                        for(cv_indx = 0; cv_indx < N_PART; cv_indx++)
                        {
                            for(indx = 0; indx < MAX_N_COEF; indx++)
                            {
                                store[rk_step][nbtri[side]->id].cv_dg_rho[cv_indx][indx] =
                                              dg_Dens(gst)[indx];
                                store[rk_step][nbtri[side]->id].cv_dg_e[cv_indx][indx] =
                                              dg_Energy(gst)[indx];
                                store[rk_step][nbtri[side]->id].cv_dg_m[0][cv_indx][indx] =
                                              dg_Mom(gst)[0][indx];
                                store[rk_step][nbtri[side]->id].cv_dg_m[1][cv_indx][indx] =
                                              dg_Mom(gst)[1][indx];
                            }
                        }
                        if(MAX_N_COEF ==  6)
                            map_poly_SV_to_CV_ver2_p2(nbtri[side], store[rk_step]);
                        else
                            map_poly_SV_to_CV_ver2_p3(nbtri[side], store[rk_step]);
                    }
                    // The subdomain zero level state, for reconstruction purpose (P1 projection)
                    for(cv_indx = 0; cv_indx < N_PART; cv_indx++)
                    {
                        for(indx = 0; indx < MAX_N_COEF; indx++)
                        {
                            store[0][nbtri[side]->id].cv_dg_rho[cv_indx][indx] =
                                          dg_Dens(gst)[indx];
                            store[0][nbtri[side]->id].cv_dg_e[cv_indx][indx] =
                                          dg_Energy(gst)[indx];
                            store[0][nbtri[side]->id].cv_dg_m[0][cv_indx][indx] =
                                          dg_Mom(gst)[0][indx];
                            store[0][nbtri[side]->id].cv_dg_m[1][cv_indx][indx] =
                                          dg_Mom(gst)[1][indx];
                        }
                    }
                    // map_poly_SV_to_CV_ver2(nbtri[side], store[0]);
                    if(MAX_N_COEF ==  6)
                        map_poly_SV_to_CV_ver2_p2(nbtri[side], store[0]);
                    else
                        map_poly_SV_to_CV_ver2_p3(nbtri[side], store[0]);
                }
            }
        }
}

LIB_LOCAL void Mach_step_attach_buffer_states(
        Front    *fr,
        Mid_soln *midsoln,
        int      rk_step,
        TRI      *tri,
        Limiting_store **store)
{
        TRI       *nbtri[3];
        int       side, i, dim = 2, indx, cv_indx;
        float     nor[3], t[3], u, crds[3], fcent[3];
        Locstate  st, gst;
        float     dirx[2] = {1.0, 0.0}, ans;
        size_t    sizest = fr->sizest;
        double    *cent = fg_centroid(tri), *nbcent;
        double    **Lmass_matrix;
        double    pt[MAXD], con_u[4];

        for(side = 0; side < 3; side++)
            nbtri[side] = Tri_on_side(tri,side);

        if((RK_STEP+1) == rk_step)
        {
        /////////////////////////////////////
        // for constraint DG, only avg. value
        /////////////////////////////////////
            st = midsoln[tri->id].st[0];
            for(side = 0; side < 3; side++)
            {
                if(nbtri[side]->BC_type == SUBDOMAIN)
                {
                    if(fg_e_type(tri)[side] == NEUMANN)
                    {
                        gst = midsoln[nbtri[side]->id].st[0];
                        assign(gst, st, sizest);
                        for(i = 0; i < dim; i++)
                            t[i] = fg_side_vector(tri)[side][i];
                        nor[0] = t[1];
                        nor[1] = -t[0];
                        ans = fabs(nor[0]*dirx[0] + nor[1]*dirx[1]);

                        if(ans > 0.5 && nor[0] > 0.5) // right side
                            Mom(gst)[0] *= -1.0;
                        else if(ans > 0.5 && nor[0] < -0.5) // left side
                            Mom(gst)[0] *= -1.0;
                        else if(ans < 0.5 && nor[1] > 0.5) // top step side
                            Mom(gst)[1] *= -1.0;
                        else
                        {
                            printf("ERROR: Mach_step_attach_buffer_states(), unknown case\n");
                            clean_up(ERROR);
                        }
                    }
                    else
                    {
#if defined(__MPI__)
                        if(YES == tri_outside_Mach_step_boundary(nbtri[side]))
                        {
                            gst = midsoln[nbtri[side]->id].st[0];
                            assign(gst, st, sizest);
                            if(fg_e_type(tri)[side] == OUT_FLOW)
                            {
                                NULL;
                            }
                            else if(fg_e_type(tri)[side] == IN_FLOW)
                            {
                                set_state(gst,TGAS_STATE,gst);
                                Dens(gst) = 1.4;
                                Press(gst) = 1.0;
                                Vel(gst)[0] = 0.0; Vel(gst)[1] = 3.0;
                                set_state(gst,GAS_STATE,gst);
                            }
                            else
                            {
                                printf("ERROR: Mach_step_attach_buffer_states(), unknown case 2\n");
                                clean_up(ERROR);
                            }
                        }
#else
                        gst = midsoln[nbtri[side]->id].st[0];
                        assign(gst, st, sizest);
                        if(fg_e_type(tri)[side] == OUT_FLOW)
                        {
                            NULL;
                        }
                        else if(fg_e_type(tri)[side] == IN_FLOW)
                        {
                            set_state(gst,TGAS_STATE,gst);
                            Dens(gst) = 1.4;
                            Press(gst) = 1.0;
                            Vel(gst)[0] = 0.0; Vel(gst)[1] = 3.0;
                            set_state(gst,GAS_STATE,gst);
                        }
                        else
                        {
                            printf("ERROR: Mach_step_attach_buffer_states(), unknown case 3\n");
                            clean_up(ERROR);
                        }
#endif // if defined(__MPI__)
                    }
                }
            }
            return;
        }

        if(rk_step == RK_STEP)
            st = tri->st;
        else
            st = midsoln[tri->id].st[rk_step];

        for(side = 0; side < 3; side++)
        {
            if(nbtri[side]->BC_type == SUBDOMAIN)
            {
                if(fg_e_type(tri)[side] == NEUMANN)
                {
                    if(rk_step == RK_STEP)
                        gst = nbtri[side]->st;
                    else
                        gst = midsoln[nbtri[side]->id].st[rk_step];
                    assign(gst, st, sizest);
                    Lmass_matrix = nbtri[side]->Lmass_matrix;
                    for(i = 0; i < dim; i++)
                        t[i] = fg_side_vector(tri)[side][i];
                    nor[0] = t[1];
                    nor[1] = -t[0];
                    ans = fabs(nor[0]*dirx[0] + nor[1]*dirx[1]);

                    if((ans > 0.5 && nor[0] > 0.5) ||
                       (ans > 0.5 && nor[0] < -0.5)
                      ) // right side and left side
                    {
                        dg_Dens(gst)[1] *= -1.0;
                        dg_Dens(gst)[4] *= -1.0;
                        dg_Energy(gst)[1] *= -1.0;
                        dg_Energy(gst)[4] *= -1.0;
                        dg_Mom(gst)[1][1] *= -1.0;
                        dg_Mom(gst)[1][4] *= -1.0;
                        dg_Mom(gst)[0][0] *= -1.0;
                        dg_Mom(gst)[0][2] *= -1.0;
                        dg_Mom(gst)[0][3] *= -1.0;
                        dg_Mom(gst)[0][5] *= -1.0;
                    }
                    else if(ans < 0.5 && nor[1] > 0.5) // top step side
                    {
                        dg_Dens(gst)[2] *= -1.0;
                        dg_Dens(gst)[4] *= -1.0;
                        dg_Energy(gst)[2] *= -1.0;
                        dg_Energy(gst)[4] *= -1.0;
                        dg_Mom(gst)[0][2] *= -1.0;
                        dg_Mom(gst)[0][4] *= -1.0;
                        dg_Mom(gst)[1][0] *= -1.0;
                        dg_Mom(gst)[1][1] *= -1.0;
                        dg_Mom(gst)[1][3] *= -1.0;
                        dg_Mom(gst)[1][5] *= -1.0;
                    }

                    // Compute average soln
                    Dens(gst) = 0.0;
                    Mom(gst)[0] = 0.0;
                    Mom(gst)[1] = 0.0;
                    Energy(gst) = 0.0;
                    for(indx = 0; indx < MAX_N_COEF; indx++)
                    {
                        Dens(gst) += dg_Dens(gst)[indx]*Lmass_matrix[0][indx];
                        Mom(gst)[0] += dg_Mom(gst)[0][indx]*Lmass_matrix[0][indx];
                        Mom(gst)[1] += dg_Mom(gst)[1][indx]*Lmass_matrix[0][indx];
                        Energy(gst) += dg_Energy(gst)[indx]*Lmass_matrix[0][indx];
                    }
                    Dens(gst) /= Lmass_matrix[0][0];
                    Mom(gst)[0] /= Lmass_matrix[0][0];
                    Mom(gst)[1] /= Lmass_matrix[0][0];
                    Energy(gst) /= Lmass_matrix[0][0];

                    // The subdomain zero level state, for reconstruction purpose (P1 projection)
                    assign(midsoln[nbtri[side]->id].st[0], gst, sizest);
                }
                else
                {
#if defined(__MPI__)
                    if(YES == tri_outside_Mach_step_boundary(nbtri[side]))
                    {
                        if(rk_step == RK_STEP)
                            gst = nbtri[side]->st;
                        else
                            gst = midsoln[nbtri[side]->id].st[rk_step];

                        assign(gst, st, sizest);
                        if(fg_e_type(tri)[side] == OUT_FLOW)
                        {
                            NULL;
                        }
                        else if(fg_e_type(tri)[side] == IN_FLOW)
                        {
                            set_state(gst,TGAS_STATE,gst);
                            Dens(gst) = 1.4;
                            Press(gst) = 1.0;
                            Vel(gst)[0] = 0.0; Vel(gst)[1] = 3.0;
                            set_state(gst,GAS_STATE,gst);
                        }
                        zero_moments(gst);
                        assign(midsoln[nbtri[side]->id].st[0], gst, sizest);
                    }
#else
                    if(rk_step == RK_STEP)
                        gst = nbtri[side]->st;
                    else
                        gst = midsoln[nbtri[side]->id].st[rk_step];
                    assign(gst, st, sizest);
                    if(fg_e_type(tri)[side] == OUT_FLOW)
                    {
                        NULL;
                    }
                    else if(fg_e_type(tri)[side] == IN_FLOW)
                    {
                        set_state(gst,TGAS_STATE,gst);
                        Dens(gst) = 1.4;
                        Press(gst) = 1.0;
                        Vel(gst)[0] = 0.0; Vel(gst)[1] = 3.0;
                        set_state(gst,GAS_STATE,gst);
                    }
                    zero_moments(gst);
                    assign(midsoln[nbtri[side]->id].st[0], gst, sizest);
#endif // if defined(__MPI__)
                }
            }
        }

        if(store != NULL)
        {
            for(side = 0; side < 3; side++)
            {
                if(nbtri[side]->BC_type == SUBDOMAIN)
                {
                    if(rk_step == RK_STEP)
                        gst = nbtri[side]->st;
                    else
                        gst = midsoln[nbtri[side]->id].st[rk_step];

                    if(fg_e_type(tri)[side] == NEUMANN)
                    {
                        if(rk_step == RK_STEP)
                        {
                            // gst = nbtri[side]->st;
                            for(cv_indx = 0; cv_indx < 4; cv_indx++)
                            {
                                for(indx = 0; indx < MAX_N_COEF; indx++)
                                {
                                    nbtri[side]->cv_soln->cv_dg_rho[cv_indx][indx] = dg_Dens(gst)[indx];
                                    nbtri[side]->cv_soln->cv_dg_e[cv_indx][indx] = dg_Energy(gst)[indx];
                                    nbtri[side]->cv_soln->cv_dg_m[0][cv_indx][indx] = dg_Mom(gst)[0][indx];
                                    nbtri[side]->cv_soln->cv_dg_m[1][cv_indx][indx] = dg_Mom(gst)[1][indx];
                                }
                            }
                            if(6 == MAX_N_COEF)
                                map_poly_SV_to_CV_p2(nbtri[side]);
                            else if(10 == MAX_N_COEF)
                                map_poly_SV_to_CV_p3(nbtri[side]);
                        }
                        else
                        {
                            // gst = midsoln[nbtri[side]->id].st[rk_step];
                            for(cv_indx = 0; cv_indx < 4; cv_indx++)
                            {
                                for(indx = 0; indx < MAX_N_COEF; indx++)
                                {
                                    store[rk_step][nbtri[side]->id].cv_dg_rho[cv_indx][indx] =
                                              dg_Dens(gst)[indx];
                                    store[rk_step][nbtri[side]->id].cv_dg_e[cv_indx][indx] =
                                              dg_Energy(gst)[indx];
                                    store[rk_step][nbtri[side]->id].cv_dg_m[0][cv_indx][indx] =
                                              dg_Mom(gst)[0][indx];
                                    store[rk_step][nbtri[side]->id].cv_dg_m[1][cv_indx][indx] =
                                              dg_Mom(gst)[1][indx];
                                }
                            }
                            if(6 == MAX_N_COEF)
                                map_poly_SV_to_CV_ver2_p2(nbtri[side], store[rk_step]);
                            else if(10 == MAX_N_COEF)
                                map_poly_SV_to_CV_ver2_p3(nbtri[side], store[rk_step]);
                        }
                    } // end: if(fg_e_type(tri)[side] == NEUMANN)
                    else
                    {
/////// need to make cases for tris in subdomain buffer (DO NOT NEED to do anything)
/////// and tris on the outside part of the inflow, outflow boundary
#if defined(__MPI__)
                        if(YES == tri_outside_Mach_step_boundary(nbtri[side]))
                        {
                            if(rk_step == RK_STEP)
                            {
                                // gst = nbtri[side]->st;
                                for(cv_indx = 0; cv_indx < 4; cv_indx++)
                                {
                                    for(indx = 0; indx < MAX_N_COEF; indx++)
                                    {
                                        nbtri[side]->cv_soln->cv_dg_rho[cv_indx][indx] = dg_Dens(gst)[indx];
                                        nbtri[side]->cv_soln->cv_dg_e[cv_indx][indx] = dg_Energy(gst)[indx];
                                        nbtri[side]->cv_soln->cv_dg_m[0][cv_indx][indx] = dg_Mom(gst)[0][indx];
                                        nbtri[side]->cv_soln->cv_dg_m[1][cv_indx][indx] = dg_Mom(gst)[1][indx];
                                    }
                                }
                                if(6 == MAX_N_COEF)
                                    map_poly_SV_to_CV_p2(nbtri[side]);
                                else if(10 == MAX_N_COEF)
                                    map_poly_SV_to_CV_p3(nbtri[side]);
                            }
                            else
                            {
                                // gst = midsoln[nbtri[side]->id].st[rk_step];
                                for(cv_indx = 0; cv_indx < 4; cv_indx++)
                                {
                                    for(indx = 0; indx < MAX_N_COEF; indx++)
                                    {
                                        store[rk_step][nbtri[side]->id].cv_dg_rho[cv_indx][indx] =
                                              dg_Dens(gst)[indx];
                                        store[rk_step][nbtri[side]->id].cv_dg_e[cv_indx][indx] =
                                              dg_Energy(gst)[indx];
                                        store[rk_step][nbtri[side]->id].cv_dg_m[0][cv_indx][indx] =
                                              dg_Mom(gst)[0][indx];
                                        store[rk_step][nbtri[side]->id].cv_dg_m[1][cv_indx][indx] =
                                              dg_Mom(gst)[1][indx];
                                    }
                                }
                                if(6 == MAX_N_COEF)
                                    map_poly_SV_to_CV_ver2_p2(nbtri[side], store[rk_step]);
                                else if(10 == MAX_N_COEF)
                                    map_poly_SV_to_CV_ver2_p3(nbtri[side], store[rk_step]);
                            }
                        }
#else
                        if(rk_step == RK_STEP)
                        {
                            // gst = nbtri[side]->st;
                            for(cv_indx = 0; cv_indx < 4; cv_indx++)
                            {
                                for(indx = 0; indx < MAX_N_COEF; indx++)
                                {
                                    nbtri[side]->cv_soln->cv_dg_rho[cv_indx][indx] = dg_Dens(gst)[indx];
                                    nbtri[side]->cv_soln->cv_dg_e[cv_indx][indx] = dg_Energy(gst)[indx];
                                    nbtri[side]->cv_soln->cv_dg_m[0][cv_indx][indx] = dg_Mom(gst)[0][indx];
                                    nbtri[side]->cv_soln->cv_dg_m[1][cv_indx][indx] = dg_Mom(gst)[1][indx];
                                }
                            }
                            if(6 == MAX_N_COEF)
                                map_poly_SV_to_CV_p2(nbtri[side]);
                            else if(10 == MAX_N_COEF)
                                map_poly_SV_to_CV_p3(nbtri[side]);
                        }
                        else
                        {
                            // gst = midsoln[nbtri[side]->id].st[rk_step];
                            for(cv_indx = 0; cv_indx < 4; cv_indx++)
                            {
                                for(indx = 0; indx < MAX_N_COEF; indx++)
                                {
                                    store[rk_step][nbtri[side]->id].cv_dg_rho[cv_indx][indx] =
                                              dg_Dens(gst)[indx];
                                    store[rk_step][nbtri[side]->id].cv_dg_e[cv_indx][indx] =
                                              dg_Energy(gst)[indx];
                                    store[rk_step][nbtri[side]->id].cv_dg_m[0][cv_indx][indx] =
                                              dg_Mom(gst)[0][indx];
                                    store[rk_step][nbtri[side]->id].cv_dg_m[1][cv_indx][indx] =
                                              dg_Mom(gst)[1][indx];
                                }
                            }
                            if(6 == MAX_N_COEF)
                                map_poly_SV_to_CV_ver2_p2(nbtri[side], store[rk_step]);
                            else if(10 == MAX_N_COEF)
                                map_poly_SV_to_CV_ver2_p3(nbtri[side], store[rk_step]);
                        }
#endif // if defined(__MPI__)
                    } // end: if(){if(fg_e_type(tri)[side] == NEUMANN)};  else(){};
                } // end: if(nbtri[side]->BC_type == SUBDOMAIN)
            } // end: for(side = 0; side < 3; side++)
        } // end: if(store != NULL)

}

// The ghost states are set in buffer tris.
LIB_LOCAL void db_Mach_attach_buffer_states(
        Front    *fr,
        Mid_soln *midsoln,
        int      rk_step,
	TRI      *tri,
        Limiting_store **store)
{
	TRI       *nbtri[3];
        int       side, i, dim = 2, indx, cv_indx;
        float     nor[3], t[3], u, crds[3], fcent[3];
        Locstate  st, gst;
        float     dirx[2] = {1.0, 0.0}, ans;
        size_t    sizest = fr->sizest;
        double   **Lmass_matrix, *cent = fg_centroid(tri), *nbcent;; 
        double   pt[MAXD], con_u[4];


        for(side = 0; side < 3; side++)
            nbtri[side] = Tri_on_side(tri,side);

        if((RK_STEP+1) == rk_step)
        {
        /////////////////////////////////////
        // for constraint DG, only avg. value 
        /////////////////////////////////////
            st = midsoln[tri->id].st[0];
            for(side = 0; side < 3; side++)
            {
                if(nbtri[side]->BC_type == SUBDOMAIN)
                {
                    if(fg_e_type(tri)[side] == NEUMANN)
                    {
                        gst = midsoln[nbtri[side]->id].st[0];
                        assign(gst, st, sizest);
                        Mom(gst)[0] *= -1.0;
                    }
                }
                else
                {
#if defined(__MPI__)
                    if(YES == tri_outside_db_Mach_boundary(nbtri[side]))
                    {
                        gst = midsoln[nbtri[side]->id].st[0];
                        assign(gst, st, sizest);
                    }
#else
                    gst = midsoln[nbtri[side]->id].st[0];
                    assign(gst, st, sizest);
#endif // if defined(__MPI__)
                }
            }
            return; 
        }

        if(rk_step == RK_STEP)
            st = tri->st;
        else
            st = midsoln[tri->id].st[rk_step];

        for(side = 0; side < 3; side++)
        {
            if(nbtri[side]->BC_type == SUBDOMAIN)
            {
                if(fg_e_type(tri)[side] == NEUMANN)
                {
                    if(rk_step == RK_STEP)
                        gst = nbtri[side]->st;
                    else
                        gst = midsoln[nbtri[side]->id].st[rk_step];
                    assign(gst, st, sizest);
                    Lmass_matrix = nbtri[side]->Lmass_matrix;
                    dg_Dens(gst)[1] *= -1.0; 
                    dg_Dens(gst)[4] *= -1.0; 
                    // new P3
                    dg_Dens(gst)[6] *= -1.0;
                    dg_Dens(gst)[8] *= -1.0;

                    dg_Energy(gst)[1] *= -1.0; 
                    dg_Energy(gst)[4] *= -1.0; 
                    // new P3
                    dg_Energy(gst)[6] *= -1.0;
                    dg_Energy(gst)[8] *= -1.0;

                    dg_Mom(gst)[1][1] *= -1.0; 
                    dg_Mom(gst)[1][4] *= -1.0; 
                    // new P3
                    dg_Mom(gst)[1][6] *= -1.0;
                    dg_Mom(gst)[1][8] *= -1.0;

                    dg_Mom(gst)[0][0] *= -1.0; 
                    dg_Mom(gst)[0][2] *= -1.0; 
                    dg_Mom(gst)[0][3] *= -1.0; 
                    dg_Mom(gst)[0][5] *= -1.0; 
                    // new P3
                    dg_Mom(gst)[0][7] *= -1.0;
                    dg_Mom(gst)[0][9] *= -1.0;

                    // Compute average soln
                    Dens(gst) = 0.0;
                    Mom(gst)[0] = 0.0;
                    Mom(gst)[1] = 0.0;
                    Energy(gst) = 0.0;
                    for(indx = 0; indx < MAX_N_COEF; indx++)
                    {
                        Dens(gst) += dg_Dens(gst)[indx]*Lmass_matrix[0][indx];
                        Mom(gst)[0] += dg_Mom(gst)[0][indx]*Lmass_matrix[0][indx];
                        Mom(gst)[1] += dg_Mom(gst)[1][indx]*Lmass_matrix[0][indx];
                        Energy(gst) += dg_Energy(gst)[indx]*Lmass_matrix[0][indx];
                    }
                    Dens(gst) /= Lmass_matrix[0][0];
                    Mom(gst)[0] /= Lmass_matrix[0][0];
                    Mom(gst)[1] /= Lmass_matrix[0][0];
                    Energy(gst) /= Lmass_matrix[0][0];

                    // The subdomain zero level state, for reconstruction purpose (P1 projection)
                    assign(midsoln[nbtri[side]->id].st[0], gst, sizest);
                }
                else
                {
#if defined(__MPI__)
                    if(YES == tri_outside_db_Mach_boundary(nbtri[side]))
                    {
                        // for(i = 0; i < dim; i++)
                        //     pt[i] = Coords(Point_of_tri(tri)[side])[i] +
                        //         Coords(Point_of_tri(tri)[(side+1)%3])[i];
                        // con_u_at_pt(st, pt, fg_centroid(tri), con_u);

                        if(rk_step == RK_STEP)
                            gst = nbtri[side]->st;
                        else
                            gst = midsoln[nbtri[side]->id].st[rk_step];

                        assign(gst, st, sizest);
                        // p1_L2projection_ver2(tri,gst,gst); // old, submission 1,2
                        /// New, submission 3
                        // con_u_to_state(con_u, dim, gst);
                        zero_moments(gst);
                        /// End New, submission 3
                        assign(midsoln[nbtri[side]->id].st[0], gst, sizest);
                    }
#else
                    // for(i = 0; i < dim; i++)
                    //     pt[i] = Coords(Point_of_tri(tri)[side])[i] +
                    //         Coords(Point_of_tri(tri)[(side+1)%3])[i];
                    // con_u_at_pt(st, pt, fg_centroid(tri), con_u);

                    if(rk_step == RK_STEP)
                        gst = nbtri[side]->st;
                    else
                        gst = midsoln[nbtri[side]->id].st[rk_step];

                    assign(gst, st, sizest);
                    // The subdomain zero level state, for reconstruction purpose (P1 projection)
                    // zero_moments(gst);
                    // p1_L2projection_ver2(tri,gst,gst); // submission 1 and 2

                    /// New, submission 3
                    // con_u_to_state(con_u, dim, gst);
                    zero_moments(gst);
                    /// End New, submission 3
                    assign(midsoln[nbtri[side]->id].st[0], gst, sizest);
#endif // if defined(__MPI__)
                }
            }
        }

        if(store != NULL)
        {
            for(side = 0; side < 3; side++)
            {
                if(nbtri[side]->BC_type == SUBDOMAIN)
                {
                    if(rk_step == RK_STEP)
                        gst = nbtri[side]->st;
                    else
                        gst = midsoln[nbtri[side]->id].st[rk_step];

                    if(fg_e_type(tri)[side] == NEUMANN)
                    {
                        if(rk_step == RK_STEP)
                        {
                            // gst = nbtri[side]->st;
                            for(cv_indx = 0; cv_indx < N_PART; cv_indx++)
                            {
                                for(indx = 0; indx < MAX_N_COEF; indx++)
                                {
                                    nbtri[side]->cv_soln->cv_dg_rho[cv_indx][indx] = dg_Dens(gst)[indx];
                                    nbtri[side]->cv_soln->cv_dg_e[cv_indx][indx] = dg_Energy(gst)[indx];
                                    nbtri[side]->cv_soln->cv_dg_m[0][cv_indx][indx] = dg_Mom(gst)[0][indx];
                                    nbtri[side]->cv_soln->cv_dg_m[1][cv_indx][indx] = dg_Mom(gst)[1][indx];
                                }
                            }
                            if(6 == MAX_N_COEF)
                                map_poly_SV_to_CV_p2(nbtri[side]);
                            else if(10 == MAX_N_COEF)
                                map_poly_SV_to_CV_p3(nbtri[side]);
                        }
                        else
                        {
                            // gst = midsoln[nbtri[side]->id].st[rk_step];
                            for(cv_indx = 0; cv_indx < N_PART; cv_indx++)
                            {
                                for(indx = 0; indx < MAX_N_COEF; indx++)
                                {
                                    store[rk_step][nbtri[side]->id].cv_dg_rho[cv_indx][indx] =
                                              dg_Dens(gst)[indx];
                                    store[rk_step][nbtri[side]->id].cv_dg_e[cv_indx][indx] =
                                              dg_Energy(gst)[indx];
                                    store[rk_step][nbtri[side]->id].cv_dg_m[0][cv_indx][indx] =
                                              dg_Mom(gst)[0][indx];
                                    store[rk_step][nbtri[side]->id].cv_dg_m[1][cv_indx][indx] =
                                              dg_Mom(gst)[1][indx];
                                }
                            }
                            if(6 == MAX_N_COEF)
                                map_poly_SV_to_CV_ver2_p2(nbtri[side], store[rk_step]);
                            else if(10 == MAX_N_COEF)
                                map_poly_SV_to_CV_ver2_p3(nbtri[side], store[rk_step]);
                        }
                    } // end: if(fg_e_type(tri)[side] == NEUMANN)
                    else
                    {
/////// need to make cases for tris in subdomain buffer (DO NOT NEED to do anything)
/////// and tris on the outside part of the inflow, outflow boundary
#if defined(__MPI__)
                        if(YES == tri_outside_db_Mach_boundary(nbtri[side]))
                        {
                            if(rk_step == RK_STEP)
                            {
                                // gst = nbtri[side]->st;
                                for(cv_indx = 0; cv_indx < N_PART; cv_indx++)
                                {
                                    for(indx = 0; indx < MAX_N_COEF; indx++)
                                    {
                                        nbtri[side]->cv_soln->cv_dg_rho[cv_indx][indx] = dg_Dens(gst)[indx];
                                        nbtri[side]->cv_soln->cv_dg_e[cv_indx][indx] = dg_Energy(gst)[indx];
                                        nbtri[side]->cv_soln->cv_dg_m[0][cv_indx][indx] = dg_Mom(gst)[0][indx];
                                        nbtri[side]->cv_soln->cv_dg_m[1][cv_indx][indx] = dg_Mom(gst)[1][indx];
                                    }
                                }
                                if(6 == MAX_N_COEF)
                                    map_poly_SV_to_CV_p2(nbtri[side]);
                                else if(10 == MAX_N_COEF)
                                    map_poly_SV_to_CV_p3(nbtri[side]);
                            }
                            else
                            {
                                // gst = midsoln[nbtri[side]->id].st[rk_step];
                                for(cv_indx = 0; cv_indx < N_PART; cv_indx++)
                                {
                                    for(indx = 0; indx < MAX_N_COEF; indx++)
                                    {
                                        store[rk_step][nbtri[side]->id].cv_dg_rho[cv_indx][indx] =
                                              dg_Dens(gst)[indx];
                                        store[rk_step][nbtri[side]->id].cv_dg_e[cv_indx][indx] =
                                              dg_Energy(gst)[indx];
                                        store[rk_step][nbtri[side]->id].cv_dg_m[0][cv_indx][indx] =
                                              dg_Mom(gst)[0][indx];
                                        store[rk_step][nbtri[side]->id].cv_dg_m[1][cv_indx][indx] =
                                              dg_Mom(gst)[1][indx];
                                    }
                                }
                                if(6 == MAX_N_COEF)
                                    map_poly_SV_to_CV_ver2_p2(nbtri[side], store[rk_step]);
                                else if(10 == MAX_N_COEF)
                                    map_poly_SV_to_CV_ver2_p3(nbtri[side], store[rk_step]);
                            }
                        }
#else
                        if(rk_step == RK_STEP)
                        {
                            // gst = nbtri[side]->st;
                            for(cv_indx = 0; cv_indx < N_PART; cv_indx++)
                            {
                                for(indx = 0; indx < MAX_N_COEF; indx++)
                                {
                                    nbtri[side]->cv_soln->cv_dg_rho[cv_indx][indx] = dg_Dens(gst)[indx];
                                    nbtri[side]->cv_soln->cv_dg_e[cv_indx][indx] = dg_Energy(gst)[indx];
                                    nbtri[side]->cv_soln->cv_dg_m[0][cv_indx][indx] = dg_Mom(gst)[0][indx];
                                    nbtri[side]->cv_soln->cv_dg_m[1][cv_indx][indx] = dg_Mom(gst)[1][indx];
                                }
                            }
                            if(6 == MAX_N_COEF)
                                map_poly_SV_to_CV_p2(nbtri[side]);
                            else if(10 == MAX_N_COEF)
                                map_poly_SV_to_CV_p3(nbtri[side]);
                        }
                        else
                        {
                            // gst = midsoln[nbtri[side]->id].st[rk_step];
                            for(cv_indx = 0; cv_indx < N_PART; cv_indx++)
                            {
                                for(indx = 0; indx < MAX_N_COEF; indx++)
                                {
                                    store[rk_step][nbtri[side]->id].cv_dg_rho[cv_indx][indx] =
                                              dg_Dens(gst)[indx];
                                    store[rk_step][nbtri[side]->id].cv_dg_e[cv_indx][indx] =
                                              dg_Energy(gst)[indx];
                                    store[rk_step][nbtri[side]->id].cv_dg_m[0][cv_indx][indx] =
                                              dg_Mom(gst)[0][indx];
                                    store[rk_step][nbtri[side]->id].cv_dg_m[1][cv_indx][indx] =
                                              dg_Mom(gst)[1][indx];
                                }
                            }
                            if(6 == MAX_N_COEF)
                                map_poly_SV_to_CV_ver2_p2(nbtri[side], store[rk_step]);
                            else if(10 == MAX_N_COEF)
                                map_poly_SV_to_CV_ver2_p3(nbtri[side], store[rk_step]);
                        }
#endif // if defined(__MPI__)
                    } // end: if(){if(fg_e_type(tri)[side] == NEUMANN)};  else(){};
                } // end: if(nbtri[side]->BC_type == SUBDOMAIN)
            } // end: for(side = 0; side < 3; side++)
        } // end: if(store != NULL)
}

// Limiting the soln in interior
LOCAL void limiting_soln_with_buffer_tris(
        Front    *fr,
        Mid_soln *midsoln,
        int      rk_step,
        int      detect_extr,
        int      comput_time,
        Limiting_store **store)
{
        RECT_GRID *gr = fr->rect_grid;
        float     *L = gr->L, *U = gr->U;
        // float     crsp_cent[3];
        TRI       *tri, *crsp_tri, *nbtri[3];
        SURFACE   **surf;
        int       dim = 2, i;
        double     *cent;
        size_t    sizest = fr->sizest;
        Locstate  st;
        float     max_dt;

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf);
             !at_end_of_tri_list(tri,*surf); tri = tri->next)
            {
                cent = fg_centroid(tri);
                if(tri->BC_type == SUBDOMAIN)
                    continue;

                if(Boundary_tri(tri) || tri_on_phy_bdry(tri))
                {
                    if(debugging("twod_riemann"))
                    {
                        attach_buffer_states(fr,midsoln,rk_step,tri,store);
                    }
                    else
                    {
                        if(rk_step == RK_STEP)
                            assign(midsoln[tri->id].st[0],tri->st,fr->sizest);
                        else
                            assign(midsoln[tri->id].st[0],midsoln[tri->id].st[rk_step],fr->sizest);
                        continue;
                    }
                }
                for(i = 0; i < 3; i++)
                    nbtri[i] = Tri_on_side(tri,i);
                // limiting the 2nd degree coefficients
                if(MAX_N_COEF == 10)
                {
                    limiting_3rd_degreeP3(tri,nbtri,3,midsoln,rk_step,detect_extr);
                    limiting_2nd_degreeP3(tri,nbtri,3,midsoln,rk_step,YES);
                    limiting_1st_degreeP3(tri,nbtri,3,midsoln,rk_step,NO);
                }
                else if(MAX_N_COEF == 6)
                {
                    limiting_2nd_degree(tri,nbtri,3,midsoln,rk_step);
                    limiting_1st_degree(tri,nbtri,3,midsoln,rk_step);
                }
            }
        }

        if(debugging("Sod") || debugging("Lax"))
        {
            update_buffer_x_peri(fr,midsoln,0,store);
        }
        else if(debugging("shock_vortex"))
        {
            update_buffer_x_ref(fr,midsoln,0,store);
        }
        else if(debugging("v_evo") || debugging("Burgers"))
        {
            update_buffer(fr,midsoln,0,store);
            // update_buffer_x_per_y_ref(fr,midsoln,0);
            // update_buffer_x_ref(fr,midsoln,0);
            // update_buffer_x_peri(fr,midsoln,0);
        }

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf);
                 !at_end_of_tri_list(tri,*surf); tri = tri->next)
            {
                if(tri->BC_type == SUBDOMAIN)
                    continue;

                if(Boundary_tri(tri) || tri_on_phy_bdry(tri))
                {
                    if(debugging("g_sine") || debugging("v_evo") 
                       || debugging("Burgers") || debugging("twod_riemann"))
                    {
                        NULL;
                    }
                    else
                    {
                        zero_moments(midsoln[tri->id].st[0]);
                    }

                    if(debugging("twod_riemann"))
                    {
                        NULL;
                    } 
                    else
                    {
                        if(rk_step == RK_STEP)
                            st = tri->st;
                        else
                            st = midsoln[tri->id].st[rk_step];
                        assign(st,midsoln[tri->id].st[0],fr->sizest);
                        if(rk_step == RK_STEP)
                        {
                            max_dt = (*fr->_time_step_on_tri)(fr, tri);
                            if(max_dt < newdt) 
                                   time_on_tri = tri;
                            newdt = min(newdt, max_dt);
                        }
                        continue;
                    }
                }
                // update coefficient
                update_coef(tri,midsoln,rk_step,fr,fr->sizest,comput_time);
            }
        }
}

// Limiting the soln in interior
LOCAL void local_limiting_soln_with_buffer_tris(
        Front    *fr,
        Mid_soln *midsoln,
        int      rk_step,
        Limiting_store **store)
{
        TRI       *tri, *crsp_tri, *nbtri[3];
        SURFACE   **surf;
        int       dim = 2, i, j, side;
        double     *cent;
        size_t    sizest = fr->sizest;
        Locstate  st2;
        float     max_dt;
        TRI       **limit_tris, **row_limit_tris[500];
        int       N_alloc = 800, N_row, N_use =0, N;

        TRI       **limit_tris2, **row_limit_tris2[500];
        int       N_alloc2 = 800, N_row2, N_use2 =0, N2;
        int       loop_num = 0, detect_extr = YES, comput_time = NO;

        vector(&limit_tris, N_alloc, sizeof(TRI*));
        row_limit_tris[0] = limit_tris;
        N_row = 1;

        if(! debugging("HRtwice"))
        {
            detect_extr = YES;
            comput_time = YES;
        }

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf);
                 !at_end_of_tri_list(tri,*surf); tri = tri->next)
            {
                if(tri->BC_type == SUBDOMAIN)
                    continue;

                if(Boundary_tri(tri) || tri_on_phy_bdry(tri))
                {
                    if(debugging("twod_riemann"))
                        attach_buffer_states(fr,midsoln,rk_step,tri,store);
                    if(debugging("db_Mach"))
                        db_Mach_attach_buffer_states(fr,midsoln,rk_step,tri,store);
                    if(debugging("shock_vort"))
                        shock_vort_attach_buffer_states(fr,midsoln,rk_step,tri,store);
                    if(debugging("Sod"))
                        shock_vort_attach_buffer_states(fr,midsoln,rk_step,tri,store);
                }

                if(overshoot_state_Shu_V(tri,midsoln,rk_step) == YES)
                {
                    limit_tris[N_use] = tri;
                    N_use++;  
                    if(N_use == N_alloc)
                    {
                        if(N_row +1 >= 500)
                        {
                            printf("ERROR: local_limiting_soln_with_buffer_tris, exceed alloc. limit\n");
                            clean_up(ERROR);
                        }
                        vector(&limit_tris, N_alloc, sizeof(TRI*));
                        row_limit_tris[N_row] = limit_tris;
                        N_row++;
                        N_use = 0;
                    }
                }
                else
                {
                    if(rk_step == RK_STEP)
                    {
                        max_dt = (*fr->_time_step_on_tri)(fr, tri);
                        if(max_dt < newdt)
                               time_on_tri = tri;
                        newdt = min(newdt, max_dt);
                        // states are saved there for redo highest degree
                        // if(!Boundary_tri(tri) && !tri_on_phy_bdry(tri))
                        assign(midsoln[tri->id].st[0],tri->st,fr->sizest);
                    }
                    else
                    {
                        // states are saved there for redo highest degree
                        // if(!Boundary_tri(tri) && !tri_on_phy_bdry(tri))
                        assign(midsoln[tri->id].st[0],midsoln[tri->id].st[rk_step],fr->sizest);
                    }
                }
            }
        }

        // TMP
        // printf("Limited size: alloc_row %d, N_alloc %d\n", N_row, N_use);
// DO HR twice
redo_HR:
        loop_num++;

        for(i = 0; i < N_row; i++)
        {
            if(i == N_row-1)
                N = N_use;
            else
                N = N_alloc;
            for(j = 0; j < N; j++)
            {
                for(side = 0; side < 3; side++)
                    nbtri[side] = Tri_on_side(row_limit_tris[i][j],side);
                if(MAX_N_COEF == 6)
                {
                    limiting_2nd_degree(row_limit_tris[i][j],nbtri,3,midsoln,rk_step);
                    limiting_1st_degree(row_limit_tris[i][j],nbtri,3,midsoln,rk_step);
                }
                else if(MAX_N_COEF == 10)
                {
                    limiting_3rd_degreeP3(row_limit_tris[i][j],nbtri,3,midsoln,rk_step,detect_extr);
                    limiting_2nd_degreeP3(row_limit_tris[i][j],nbtri,3,midsoln,rk_step,YES);
                    limiting_1st_degreeP3(row_limit_tris[i][j],nbtri,3,midsoln,rk_step,NO);
                }
            }
        }

        // redo highest degree term using the reconstructed
        // polynomial to reduce overshoot
        /**
        for(i = 0; i < N_row; i++)
        {
            if(i == N_row-1)
                N = N_use;
            else
                N = N_alloc;
            for(j = 0; j < N; j++)
            {
                for(side = 0; side < 3; side++)
                    nbtri[side] = Tri_on_side(row_limit_tris[i][j],side);
                if(MAX_N_COEF == 6)
                {
                }
                else if(MAX_N_COEF == 10)
                    redo_limiting_3rd_degreeP3(row_limit_tris[i][j],nbtri,midsoln,rk_step);
            }
        }
        **/

        if(debugging("Sod") || debugging("Lax"))
        {
            update_buffer_x_peri(fr,midsoln,0,store);
        }
        else if(debugging("shock_vort"))
        {
            update_buffer_x_ref(fr,midsoln,0,store);
        }
        else if(debugging("v_evo") || debugging("Burgers"))
        {
            update_buffer(fr,midsoln,0,store);
            // update_buffer_x_per_y_ref(fr,midsoln,0);
            // update_buffer_x_ref(fr,midsoln,0);
            // update_buffer_x_peri(fr,midsoln,0);
        }

        for(i = 0; i < N_row; i++)
        {
            if(i == N_row-1)
                N = N_use;
            else
                N = N_alloc;
            for(j = 0; j < N; j++)
            {
                if(row_limit_tris[i][j]->redo_limiting == YES)
                    fix_unphysical_st(row_limit_tris[i][j],midsoln,0,fr);

                // update coefficient
                update_coef(row_limit_tris[i][j],midsoln,rk_step,fr,fr->sizest,comput_time);
            }
        }

        if(loop_num == 1 && debugging("HRtwice"))
        {
            detect_extr = YES; comput_time = YES;

#if defined(__MPI__)
            if(debugging("db_Mach"))
                update_db_Mach_buffer(fr,midsoln,rk_step,store);
#endif // if defined(__MPI__)

            for(i = 0; i < N_row; i++)
            {
                if(i == N_row-1) N = N_use;
                else N = N_alloc;
                for(j = 0; j < N; j++)
                {
                    if(Boundary_tri(row_limit_tris[i][j]) ||
                       tri_on_phy_bdry(row_limit_tris[i][j]))
                    {
                        if(debugging("twod_riemann"))
                            attach_buffer_states(fr,midsoln,rk_step,row_limit_tris[i][j],store);
                        if(debugging("db_Mach"))
                            db_Mach_attach_buffer_states(fr,midsoln,rk_step,row_limit_tris[i][j],store);
                        if(debugging("shock_vort"))
                            shock_vort_attach_buffer_states(fr,midsoln,rk_step,row_limit_tris[i][j],store);
                        if(debugging("Sod"))
                            shock_vort_attach_buffer_states(fr,midsoln,rk_step,row_limit_tris[i][j],store);
                    }
                }
            }
            goto redo_HR;
        }

        for(i = 0; i < N_row; i++)
            free(row_limit_tris[i]);
}

// Limiting the soln in interior
LOCAL void local_limiting_soln_with_buffer_tris_multiple_times(
        Front    *fr,
        Mid_soln *midsoln,
        int      rk_step,
        int      HR_times,
        Limiting_store **store)
{
        TRI       *tri, *crsp_tri, *nbtri[3];
        SURFACE   **surf;
        int       dim = 2, i, j, side;
        double     *cent;
        size_t    sizest = fr->sizest;
        Locstate  st2;
        float     max_dt;
        TRI       **limit_tris, **row_limit_tris[1500];
        int       N_alloc = 800, N_row, N_use =0, N;

        // TRI       **limit_tris2, **row_limit_tris2[500];
        // int       N_alloc2 = 800, N_row2, N_use2 =0, N2;
        int       loop_num = 0, detect_extr = YES, comput_time = NO;
        static Locstate Roe_st[3] = {NULL, NULL, NULL};  //Roe mean value
	static float  **L[3], **R[3];
        float     conu[3][4][MAX_N_COEF];  // [side][# eqn][coef]
        float     outcome[4][MAX_N_COEF];
        char      s[256];
	int       debug = NO;
	
        if(Roe_st[0] == NULL)
	{
             for(i = 0; i < 3; i++)
             {    
                 matrix(&L[i], 4, 4, sizeof(float));
                 matrix(&R[i], 4, 4, sizeof(float));
                 g_alloc_state(&Roe_st[i], fr->sizest);
             }             
	}
	
        vector(&limit_tris, N_alloc, sizeof(TRI*));
        row_limit_tris[0] = limit_tris;
        N_row = 1;

        for(surf = fr->mesh->surfaces; surf && *surf; surf++)
        {
            for (tri = first_tri(*surf);
                 !at_end_of_tri_list(tri,*surf); tri = tri->next)
            {
                if(tri->BC_type == SUBDOMAIN)
                    continue;

                if(Boundary_tri(tri) || tri_on_phy_bdry(tri))
                {
                    if(debugging("twod_riemann"))
                        attach_buffer_states(fr,midsoln,rk_step,tri,store);
                    if(debugging("db_Mach"))
                        db_Mach_attach_buffer_states(fr,midsoln,rk_step,tri,store);
                    if(debugging("shock_vort"))
                        shock_vort_attach_buffer_states(fr,midsoln,rk_step,tri,store);
                    if(debugging("Sod"))
                        shock_vort_attach_buffer_states(fr,midsoln,rk_step,tri,store);
                }

                    /**
                    if(tri->id == 4)
                    {
                        printf("*********-Begin------------------------*******************\n");
                        printf("print before test overshoot st of tri[%d], step(%d), tri type %d\n",
                          tri->id, rk_step, tri->BC_type);
                        if(rk_step == RK_STEP)
                            verbose_print_state("original state:",tri->st);
                        else
                            verbose_print_state("original state:",midsoln[tri->id].st[rk_step]);
                        printf("*************-end------------------------***************\n");
                    }
                    **/

                if(overshoot_state_Shu_V(tri,midsoln,rk_step) == YES)
                {
                    limit_tris[N_use] = tri;
                    N_use++;
                    if(N_use == N_alloc)
                    {
                        if(N_row +1 >= 1500)
                        {
                            printf("ERROR: local_limiting_soln_with_buffer_tris, exceed alloc. limit\n");
                            clean_up(ERROR);
                        }
                        vector(&limit_tris, N_alloc, sizeof(TRI*));
                        row_limit_tris[N_row] = limit_tris;
                        N_row++;
                        N_use = 0;
                    }

                    /**
                    if(tri->id == 4)
                    {
                        printf("tri[%d] identified as overshoot, N_use = %d, N_row = %d\n", tri->id, N_use, N_row);
                        printf("*********-- install into tris-----------------------*******************\n");
                        printf("print before test overshoot st of tri[%d], step(%d), tri type %d\n",
                          tri->id, rk_step, tri->BC_type);
                        if(rk_step == RK_STEP)
                            verbose_print_state("original state:",tri->st);
                        else
                            verbose_print_state("original state:",midsoln[tri->id].st[rk_step]);
                        printf("*********-- END install into tris-----------------------*******************\n");
                    }
                    **/
                }
                else
                {
                    if(rk_step == RK_STEP)
                    {
                        max_dt = (*fr->_time_step_on_tri)(fr, tri);
                        if(max_dt < newdt)
                               time_on_tri = tri;
                        newdt = min(newdt, max_dt);
                        // states are saved there for redo highest degree
                        // if(!Boundary_tri(tri) && !tri_on_phy_bdry(tri))
                        assign(midsoln[tri->id].st[0],tri->st,fr->sizest);
                    }
                    else
                    {
                        // states are saved there for redo highest degree
                        // if(!Boundary_tri(tri) && !tri_on_phy_bdry(tri))
                        assign(midsoln[tri->id].st[0],midsoln[tri->id].st[rk_step],fr->sizest);
                    }
                }
            }
        }

redo_HR:
        loop_num++;
       
        // TMP
        // printf("local_limiting_soln_with_buffer_tris_multiple_times %d time\n", loop_num);

        for(i = 0; i < N_row; i++)
        {
            if(i == N_row-1)
                N = N_use;
            else
                N = N_alloc;
            for(j = 0; j < N; j++)
            {
                for(side = 0; side < 3; side++)
                    nbtri[side] = Tri_on_side(row_limit_tris[i][j],side);
                if(MAX_N_COEF == 6)
                {
                    limiting_2nd_degree(row_limit_tris[i][j],nbtri,3,midsoln,rk_step);
                    limiting_1st_degree(row_limit_tris[i][j],nbtri,3,midsoln,rk_step);
                }
                else if(MAX_N_COEF == 10)
                {
                    // limiting_3rd_degreeP3(row_limit_tris[i][j],nbtri,midsoln,rk_step,detect_extr);
                    // limiting_2nd_degreeP3(row_limit_tris[i][j],nbtri,midsoln,rk_step,YES);
                    // limiting_1st_degreeP3(row_limit_tris[i][j],nbtri,midsoln,rk_step,NO);
                    /**
                    // if(row_limit_tris[i][j]->id == 1205)
                    if(row_limit_tris[i][j]->id == 4)
		    {
			printf("print before construction st of tri[%d], step(%d)\n", row_limit_tris[i][j]->id, rk_step);
			if(rk_step == RK_STEP)
                            verbose_print_state("original state:",row_limit_tris[i][j]->st);
			else
                            verbose_print_state("original state:",midsoln[row_limit_tris[i][j]->id].st[rk_step]);
                        verbose_print_state("original reconstuction:",midsoln[row_limit_tris[i][j]->id].st[0]);
		    }
                    test_unphysical_st_at_quadrature(row_limit_tris[i][j],midsoln);
                    **/

                    /* each stage use char variable to do limiting, did not succeed
                    limiting_3rd_degreeP3_char(row_limit_tris[i][j],nbtri,midsoln,rk_step,detect_extr,Roe_st,L,R);
                    limiting_2nd_degreeP3_char(row_limit_tris[i][j],nbtri,midsoln,rk_step,Roe_st,L,R);
                    limiting_1st_degreeP3_char(row_limit_tris[i][j],nbtri,midsoln,rk_step,NO,Roe_st,L,R);
                    */
                    // implement: using 3 char polynomials do 3 reconstructions 
                    // each edge normal determines a set of char polynomials. therefore
                    // a set of reconstruction.

                    /// TMP
                    debug_flag = NO;
		    if(row_limit_tris[i][j]->id == 4530)
		    {
                        printf("local_limiting_soln_with_buffer_tris_multiple_times %d time\n", loop_num);
                        printf("do reconstruction for tri[%d]\n", row_limit_tris[i][j]->id);
                        print_tri_crds(row_limit_tris[i][j]);
                        debug_flag = YES;
                        if(rk_step == RK_STEP)
                            verbose_print_state("original state:",row_limit_tris[i][j]->st);
                        else
                            verbose_print_state("original state:",midsoln[row_limit_tris[i][j]->id].st[rk_step]);
		    }	
                    /// END TMP

                    ////// characteristic variable
                    comput_Roe(row_limit_tris[i][j],nbtri,midsoln,rk_step,Roe_st,L,R);
                    for(side = 0; side < 3; side++)
                    {
                        // convert candidate conserv. variables to char. variables.
                        if(debug_flag == YES)
                        {
                            printf("projection on side[%d] normal\n", side);
                            // if(side == 0)
                            {
                                printf("side[%d] tri[%d]\n", side, nbtri[side]->id);
                                print_tri_crds(nbtri[side]);
                                if(rk_step == RK_STEP) 
                                    verbose_print_state("neighbr[] state:",nbtri[side]->st);
                                else
                                    verbose_print_state("neighbr[] state:",midsoln[nbtri[side]->id].st[rk_step]);
                                verbose_print_state("Roe mean",Roe_st[side]);
                                print_ldb_matrix("L[]",4,4,L[side],"%g;");
                                print_ldb_matrix("R[]",4,4,R[side],"%g;");
                            }
                        }

                        convert_cand_conu_charu(row_limit_tris[i][j],nbtri,midsoln,rk_step,L[side]);

                        limiting_3rd_degreeP3(row_limit_tris[i][j],nbtri,3,midsoln,rk_step,detect_extr);
                        limiting_2nd_degreeP3(row_limit_tris[i][j],nbtri,3,midsoln,rk_step,YES);
                        limiting_1st_degreeP3(row_limit_tris[i][j],nbtri,3,midsoln,rk_step,NO);

                        // convert reconstructed char variables to conserv. variables.
                        convert_con_char(midsoln[row_limit_tris[i][j]->id].st[0], R[side], 
                                         midsoln[row_limit_tris[i][j]->id].st[0]);

			if(debug_flag == YES)
		        {
                            verbose_print_state("reconstruct direction[] state:",midsoln[row_limit_tris[i][j]->id].st[0]);
		        }
                        save_reconstruct_st(row_limit_tris[i][j],midsoln,rk_step, conu[side]);

                        // convert candidate char variables to conserv. variables.
                        convert_cand_conu_charu(row_limit_tris[i][j],nbtri,midsoln,rk_step,R[side]);
                        // printf("EXIT in local_limiting_soln_with_buffer_tris_multiple_times\n");
                        // clean_up(0);
                        // sprintf(s, "side%d char reconstruction:", side);
                        // verbose_print_state(s,midsoln[row_limit_tris[i][j]->id].st[0]);
                        // if(row_limit_tris[i][j]->id == 3 || row_limit_tris[i][j]->id == 2)
                        // {
                        //     printf("\n----side%d char reconstruction:-------------\n", side);
                        //     print_conu_state(conu[side]);
                        // }
                    }
                    weno_combine_P3(row_limit_tris[i][j],conu,outcome);
                    save_weno_st_to_midsoln(row_limit_tris[i][j],midsoln,rk_step,outcome);
                    ////// end characteristic variable
                    if(debug_flag == YES)
                    {
                        printf("IN save_weno_st_to_midsoln, tri[%d] reconstructed state\n",row_limit_tris[i][j]->id);
                        verbose_print_state("final state:",midsoln[row_limit_tris[i][j]->id].st[0]);
                    }

                    /**
                    if(row_limit_tris[i][j]->id == 3 || row_limit_tris[i][j]->id == 2)
                    {
                        printf("******* weno combine soln\n");
                        print_conu_state(outcome);
                        printf("------------------ print neighbr state\n");
                        if(rk_step == RK_STEP)
                        {
                            verbose_print_state("original state:",nbtri[0]->st);
                            verbose_print_state("original state:",nbtri[1]->st);
                            verbose_print_state("original state:",nbtri[2]->st);
                        }
                        else
                        {
                            printf("side[%d] tri[%d]\n", 0, nbtri[0]->id);
                            verbose_print_state("original state:",midsoln[nbtri[0]->id].st[rk_step]);
                            printf("side[%d] tri[%d]\n", 1, nbtri[1]->id);
                            verbose_print_state("original state:",midsoln[nbtri[1]->id].st[rk_step]);
                            printf("side[%d] tri[%d]\n", 2, nbtri[2]->id);
                            verbose_print_state("original state:",midsoln[nbtri[2]->id].st[rk_step]);
                        }
                    }
                    **/ 
                    // printf("EXIT in local_limiting_soln_with_buffer_tris_multiple_times\n");
                    // clean_up(0);
                }
            }
        }

        // printf("exit in  local_limiting_soln_with_buffer_tris_multiple_times\n");
        // clean_up(0);

        if(debugging("Sod") || debugging("Lax"))
        {
            update_buffer_x_peri(fr,midsoln,0,store);
        }
        else if(debugging("shock_vort"))
        {
            update_buffer_x_ref(fr,midsoln,0,store);
        }
        else if(debugging("v_evo") || debugging("Burgers"))
        {
            update_buffer(fr,midsoln,0,store);
        }
        for(i = 0; i < N_row; i++)
        {
            if(i == N_row-1)
                N = N_use;
            else
                N = N_alloc;
            for(j = 0; j < N; j++)
            {
                // if(row_limit_tris[i][j]->redo_limiting == YES)
                //     fix_unphysical_st(row_limit_tris[i][j],midsoln,0,fr);

                // update coefficient
                update_coef(row_limit_tris[i][j],midsoln,rk_step,fr,fr->sizest,comput_time);
            }
        }

        if(loop_num < HR_times)
        {
            if(loop_num == HR_times -1)
                comput_time = YES;

#if defined(__MPI__)
            if(debugging("db_Mach"))
                update_db_Mach_buffer(fr,midsoln,rk_step,store);
#endif // if defined(__MPI__)

            for(i = 0; i < N_row; i++)
            {
                if(i == N_row-1) N = N_use;
                else N = N_alloc;
                for(j = 0; j < N; j++)
                {
                    if(Boundary_tri(row_limit_tris[i][j]) ||
                       tri_on_phy_bdry(row_limit_tris[i][j]))
                    {
                        if(debugging("twod_riemann"))
                            attach_buffer_states(fr,midsoln,rk_step,row_limit_tris[i][j],store);
                        if(debugging("db_Mach"))
                            db_Mach_attach_buffer_states(fr,midsoln,rk_step,row_limit_tris[i][j],store);
                        if(debugging("shock_vort"))
                            shock_vort_attach_buffer_states(fr,midsoln,rk_step,row_limit_tris[i][j],store);
                        if(debugging("Sod"))
                            shock_vort_attach_buffer_states(fr,midsoln,rk_step,row_limit_tris[i][j],store);
                    }
                }
            }
            goto redo_HR;
        }

        for(i = 0; i < N_row; i++)
            free(row_limit_tris[i]);

        ////////////////////// TMP
        debug_flag = NO;
}


EXPORT void update_coef(
	 TRI       *tri,
         Mid_soln  *midsoln,
         int       rk_iter,
         Front     *fr,
         size_t    l_sizest,
         int       comput_time)
{
         Locstate    st, st2, nbst[3];
         int         i, dim = 2, indx, side;
         float       tmp_alpha;
         float       max_dt; 
         // TRI         *nbtri[3];

         if(rk_iter == RK_STEP)
             st = tri->st;
         else
             st = midsoln[tri->id].st[rk_iter];
         st2 = midsoln[tri->id].st[0];

         assign(st, st2, l_sizest);

         /*
         for(side = 0; side < 3; side++)
         {
             nbtri[side] = Tri_on_side(tri,side);
             if(rk_iter == RK_STEP)
                 nbst[side] = nbtri[side]->st;
             else
                 nbst[side] = midsoln[nbtri[side]].st[rk_iter];
         }
         */
         /*
         if(YES == find_tri(fg_centroid(tri)))
         {
             printf("IN update_coef, print HR solution of tri %d\n", tri->id);
             verbose_print_state("HR state",st);
         }
         */

         if(N_EQN != 1)
         {
             if(isnan(Dens(st)) || isnan(Mom(st)[0]) ||
                isnan(Mom(st)[1]) || isnan(Energy(st)))
             {
                 printf("ERROR: update_coef()\n");
                 printf("nan state detected\n");
                 printf("Average state of TRI (%d) after limiting:\n", tri->id);
                 print_general_vector("Tri_pt", Coords(Point_of_tri(tri)[0]), dim, "\n");
                 print_general_vector("Tri_pt", Coords(Point_of_tri(tri)[1]), dim, "\n");
                 print_general_vector("Tri_pt", Coords(Point_of_tri(tri)[2]), dim, "\n");
                 clean_up(ERROR);
             }
         }

         if(comput_time == NO)
             return;
         if(rk_iter == RK_STEP)
         {
             max_dt = (*fr->_time_step_on_tri)(fr, tri);
             if(max_dt < newdt)
                    time_on_tri = tri;
             newdt = min(newdt, max_dt); 
         }
}
         

// average of 2nd degree terms of polynomial
EXPORT void R_degree2_term_average(
         TRI       *tri,
         Locstate  st,
         float     *ave)
{
         float      area;
         int        dim = 2, i;
         double     **mass_matrix = tri->Bmass_matrix;

         area = mass_matrix[0][0];
         if(N_EQN == 8)
         {
             ave[0] =(dg_Dens(st)[3]*mass_matrix[0][3] + 
                      dg_Dens(st)[4]*mass_matrix[0][4] +
                      dg_Dens(st)[5]*mass_matrix[0][5])/area; 
             ave[4] =(dg_Energy(st)[3]*mass_matrix[0][3] + 
                      dg_Energy(st)[4]*mass_matrix[0][4] +
                      dg_Energy(st)[5]*mass_matrix[0][5])/area; 
             for(i = 0; i < 3; i++)
                 ave[i+1] =(dg_Mom(st)[i][3]*mass_matrix[0][3] + 
                            dg_Mom(st)[i][4]*mass_matrix[0][4] +
                            dg_Mom(st)[i][5]*mass_matrix[0][5])/area; 
             for(i = 0; i < 3; i++)
                 ave[i+5] =(dg_B(st)[i][3]*mass_matrix[0][3] + 
                            dg_B(st)[i][4]*mass_matrix[0][4] +
                            dg_B(st)[i][5]*mass_matrix[0][5])/area; 
         }
         else
         {
             ave[0] =(dg_Dens(st)[3]*mass_matrix[0][3] + 
                      dg_Dens(st)[4]*mass_matrix[0][4] +
                      dg_Dens(st)[5]*mass_matrix[0][5])/area; 
             ave[1] =(dg_Mom(st)[0][3]*mass_matrix[0][3] + 
                      dg_Mom(st)[0][4]*mass_matrix[0][4] +
                      dg_Mom(st)[0][5]*mass_matrix[0][5])/area; 
             ave[2] =(dg_Mom(st)[1][3]*mass_matrix[0][3] + 
                      dg_Mom(st)[1][4]*mass_matrix[0][4] +
                      dg_Mom(st)[1][5]*mass_matrix[0][5])/area; 
             ave[3] =(dg_Energy(st)[3]*mass_matrix[0][3] + 
                      dg_Energy(st)[4]*mass_matrix[0][4] +
                      dg_Energy(st)[5]*mass_matrix[0][5])/area; 
         }
}

// average of 2nd degree terms of polynomial
EXPORT void R_degree2_term_average_Liu(
         TRI       *tri,
         Locstate  st,
         double    **tmpmass_1st_row,
         float     *ave)
{
         double     area;
         int        i;

         area = tmpmass_1st_row[0][0];
         if(N_EQN == 8)
         {
             ave[0] =(dg_Dens(st)[3]*tmpmass_1st_row[0][3] +
                      dg_Dens(st)[4]*tmpmass_1st_row[0][4] +
                      dg_Dens(st)[5]*tmpmass_1st_row[0][5])/area;
             ave[4] =(dg_Energy(st)[3]*tmpmass_1st_row[0][3] +
                      dg_Energy(st)[4]*tmpmass_1st_row[0][4] +
                      dg_Energy(st)[5]*tmpmass_1st_row[0][5])/area;
             for(i = 0; i < 3; i++)
             {
                 ave[i+1] =(dg_Mom(st)[i][3]*tmpmass_1st_row[0][3] +
                            dg_Mom(st)[i][4]*tmpmass_1st_row[0][4] +
                            dg_Mom(st)[i][5]*tmpmass_1st_row[0][5])/area;
                 ave[i+5] =(dg_B(st)[i][3]*tmpmass_1st_row[0][3] +
                            dg_B(st)[i][4]*tmpmass_1st_row[0][4] +
                            dg_B(st)[i][5]*tmpmass_1st_row[0][5])/area;
             }
         }
         else
         {
             ave[0] =(dg_Dens(st)[3]*tmpmass_1st_row[0][3] + 
                      dg_Dens(st)[4]*tmpmass_1st_row[0][4] +
                      dg_Dens(st)[5]*tmpmass_1st_row[0][5])/area; 
             ave[1] =(dg_Mom(st)[0][3]*tmpmass_1st_row[0][3] + 
                      dg_Mom(st)[0][4]*tmpmass_1st_row[0][4] +
                      dg_Mom(st)[0][5]*tmpmass_1st_row[0][5])/area; 
             ave[2] =(dg_Mom(st)[1][3]*tmpmass_1st_row[0][3] + 
                      dg_Mom(st)[1][4]*tmpmass_1st_row[0][4] +
                      dg_Mom(st)[1][5]*tmpmass_1st_row[0][5])/area; 
             ave[3] =(dg_Energy(st)[3]*tmpmass_1st_row[0][3] + 
                      dg_Energy(st)[4]*tmpmass_1st_row[0][4] +
                      dg_Energy(st)[5]*tmpmass_1st_row[0][5])/area; 
         }
}

// average of d_x u polynomial
EXPORT void u_x_average(
         TRI       *tri,
         Locstate  st,
         float     *uxave)
{
         int i;    
         /*
         float      area;
         int        dim = 2;
         double **Lmass_matrix = tri->Lmass_matrix;

         // comp_mass_matrix_1st_row(MAX_N_COEF,tri,dim,Lmass_matrix);
         area = Lmass_matrix[0][0];
          
         uxave[0] = dg_Dens(st)[1] + 2.0*dg_Dens(st)[3]*Lmass_matrix[0][1]/area+
                     dg_Dens(st)[4]*Lmass_matrix[0][2]/area;
         uxave[1] = dg_Mom(st)[0][1] + 2.0*dg_Mom(st)[0][3]*Lmass_matrix[0][1]/area+
                     dg_Mom(st)[0][4]*Lmass_matrix[0][2]/area;
         uxave[2] = dg_Mom(st)[1][1] + 2.0*dg_Mom(st)[1][3]*Lmass_matrix[0][1]/area+
                     dg_Mom(st)[1][4]*Lmass_matrix[0][2]/area;
         uxave[3] = dg_Energy(st)[1] + 2.0*dg_Energy(st)[3]*Lmass_matrix[0][1]/area+
                     dg_Energy(st)[4]*Lmass_matrix[0][2]/area;
         */
         if(N_EQN == 8)
         {
             uxave[0] = dg_Dens(st)[1];
             uxave[4] = dg_Energy(st)[1];
             for(i = 0; i < 3; i++)
             {
                uxave[i+1] = dg_Mom(st)[i][1];
                uxave[i+5] = dg_B(st)[i][1];
             }
         }
         else
         {
             uxave[0] = dg_Dens(st)[1];
             uxave[1] = dg_Mom(st)[0][1];
             uxave[2] = dg_Mom(st)[1][1];
             uxave[3] = dg_Energy(st)[1];
         }
}

// average of d_y u polynomial
EXPORT void u_y_average(
         TRI       *tri,
         Locstate  st,
         float     *uyave)
{
         int    i;
         /*
         float      area;
         int        dim = 2;
         double **Lmass_matrix = tri->Lmass_matrix;

         // comp_mass_matrix_1st_row(MAX_N_COEF,tri,dim,Lmass_matrix);
         area = Lmass_matrix[0][0];

         uyave[0] = dg_Dens(st)[2] + dg_Dens(st)[4]*Lmass_matrix[0][1]/area+
                     2.0*dg_Dens(st)[5]*Lmass_matrix[0][2]/area;
         uyave[1] = dg_Mom(st)[0][2] + dg_Mom(st)[0][4]*Lmass_matrix[0][1]/area+
                     2.0*dg_Mom(st)[0][5]*Lmass_matrix[0][2]/area;
         uyave[2] = dg_Mom(st)[1][2] + dg_Mom(st)[1][4]*Lmass_matrix[0][1]/area+
                     2.0*dg_Mom(st)[1][5]*Lmass_matrix[0][2]/area;
         uyave[3] = dg_Energy(st)[2] + dg_Energy(st)[4]*Lmass_matrix[0][1]/area+
                     2.0*dg_Energy(st)[5]*Lmass_matrix[0][2]/area;
         */
         if(N_EQN == 8)
         {
             uyave[0] = dg_Dens(st)[2];
             uyave[4] = dg_Energy(st)[2]; 
             for(i = 0; i < 3; i++)
             {
                uyave[i+1] = dg_Mom(st)[i][2];
                uyave[i+5] = dg_B(st)[i][2];
             }
         }
         else
         {
             uyave[0] = dg_Dens(st)[2];
             uyave[1] = dg_Mom(st)[0][2];
             uyave[2] = dg_Mom(st)[1][2];
             uyave[3] = dg_Energy(st)[2]; 
         }
}


// Reconstructed coeffs. are stored in RK_STEP[0]
LIB_LOCAL void limiting_3rd_degreeP3(
	 TRI       *tri,
         TRI       *nbtri[], 
         int       tri_n,
         Mid_soln  *midsoln,
         int       rk_iter,
         int       detect_extr)
{
         Locstate  st, nbst[20], st2;
         double    uxxave[10], nbuxxave[20][8];
         double    uxyave[10], nbuxyave[20][8];
         double    uyyave[10], nbuyyave[20][8];
         int       i, dim = 2, k;
         double    *cent, *nbcent[20], rside[3], rside2[3], area, sqrt_area, tmp_area, cube_factor;
         // float    Axx[3][2][2], Axy[3][2][2], Ayy[3][2][2]; 
         double    coefxx[20][2], coefxy[20][2], coefyy[20][2];
         double    u6, u7, u8, u9, u7_0, u7_1, u8_0, u8_1, 
                   avg3, avg4, avg5, arrya[20], arryb[20], w[20];
         // double **Lmass_matrix = tri->Lmass_matrix;
         double    eps = 0.9;
         double    dirx[3], diry[3], theta[3]; // dirx, diry: cos of the angle
         double    ud[3][2]; // soln uxx, uxy, uyy computed using one line
         int       idirx, idiry;
         // float    c_num_xx[6], c_num_xy[6], c_num_yy[6]; // condition number of stencils
         double    *c_num_xx, *c_num_xy, *c_num_yy; // condition number of stencils
         double    ***Axx, ***Axy, ***Ayy; 
         double    diam, sqr_diam, sqr_sqr_diam, sv_coef[6];
         int       is_bad_stenxx[20], is_bad_stenxy[20], is_bad_stenyy[20];
         double    A_edgexx[3][2], A_edgexy[3][2], A_edgeyy[3][2], ls_soln1[3], ls_Axx[3][2],
                   ls_Axy[3][2], ls_Ayy[3][2], mid[3][2];
         double    wxx[20], wxy[20], wyy[20];
         int       debug = NO;

         if(NULL == midsoln)
         {
             st = tri->st;
             for(i = 0; i < tri_n; i++)
                 nbst[i] = nbtri[i]->st;
         }
         else
         {
             if(rk_iter == RK_STEP)
             {
                 st = tri->st;
                 for(i = 0; i < tri_n; i++)
                     nbst[i] = nbtri[i]->st;
             }
             else
             {
                 st = midsoln[tri->id].st[rk_iter];
                 for(i = 0; i < tri_n; i++)
                     nbst[i] = midsoln[nbtri[i]->id].st[rk_iter];
             }
         }

         // if(YES == find_tri(fg_centroid(tri)))
         /***
         if(tri->id == 14)
         {
	     printf("\n\n---------*******************--------------\n");
             printf("TRI[%d] entered limiting_3rd_degreeP3\n",tri->id);

             // printf("print neighbr [%d] (%g %g) states:", nbtri[0]->id,
             //        fg_centroid(nbtri[0])[0], fg_centroid(nbtri[0])[1]);
             // verbose_print_state("state:",nbst[0]); 
             // printf("print neighbr [%d] (%g %g) states:", nbtri[1]->id,
             //        fg_centroid(nbtri[1])[0], fg_centroid(nbtri[1])[1]);
             // verbose_print_state("state:",nbst[1]); 
             // printf("print neighbr [%d] (%g %g) states:", nbtri[2]->id,
             //        fg_centroid(nbtri[2])[0], fg_centroid(nbtri[2])[1]);
             // verbose_print_state("state:",nbst[2]); 
             debug = YES;
         }
         ***/

         st2 = midsoln[tri->id].st[0];
         Set_params(st2,st);
         set_type_of_state(st2,state_type(st));

         // diam = fg_diam(tri);
	 diam = 1.0;
         // sqr_diam = sqr(diam);
         // sqr_sqr_diam = sqr(sqr_diam);
         area = fg_area(tri);
         sqrt_area = sqrt(fg_area(tri));
         cube_factor = area*sqrt_area;

         cent = fg_centroid(tri);
         for(i = 0; i < tri_n; i++)
             nbcent[i] = fg_centroid(nbtri[i]);

         u_average_indx(tri,st,3,uxxave);
         u_average_indx(tri,st,4,uxyave);
         u_average_indx(tri,st,5,uyyave);

         for(k = 0; k < N_EQN; k++)
         {
             uxxave[k] *= (2.0/area);
             uyyave[k] *= (2.0/area);
         }
         for(k = 0; k < N_EQN; k++)
             uxyave[k] /= area;
         
         for(i = 0; i < tri_n; i++)
         {
             tmp_area = fg_area(nbtri[i]);

             u_average_indx(nbtri[i],nbst[i],3,nbuxxave[i]);
             u_average_indx(nbtri[i],nbst[i],4,nbuxyave[i]);
             u_average_indx(nbtri[i],nbst[i],5,nbuyyave[i]);
             for(k = 0; k < N_EQN; k++)
             {
                 nbuxxave[i][k] *= (2.0/tmp_area);
                 nbuyyave[i][k] *= (2.0/tmp_area);
         
             }
             for(k = 0; k < N_EQN; k++)
                 nbuxyave[i][k] /= tmp_area;

             //// TMP
             if(debug == YES)
             {
                 printf("nbtri[%d], cent[%g %g], uxxave[%g %g %g %g]\n",
                      nbtri[i]->id, nbcent[i][0], nbcent[i][1], 
                      nbuxxave[i][0], nbuxxave[i][1], nbuxxave[i][2], nbuxxave[i][3]);  
             }  
         }

         if(detect_extr == YES)
         {
             NEW_extrema_detec(uxxave,nbuxxave,tri_n,is_bad_stenxx);
             NEW_extrema_detec(uxyave,nbuxyave,tri_n,is_bad_stenxy);
             NEW_extrema_detec(uyyave,nbuyyave,tri_n,is_bad_stenyy);
             // extrema_detec(uxxave,nbuxxave,is_bad_stenxx);
             // extrema_detec(uxyave,nbuxyave,is_bad_stenxy);
             // extrema_detec(uyyave,nbuyyave,is_bad_stenyy);
         }
         else
         {
             for(k = 0; k < N_EQN; k++)
                 is_bad_stenxx[k] = is_bad_stenxy[k] = is_bad_stenyy[k] = NO;
         }

         /*
         for(i = 0; i < 3; i++)
         {
             Ayy[i][0][0] = Axy[i][0][0] = Axx[i][0][0] = (nbcent[i][0]-cent[0]);
             Ayy[i][0][1] = Axy[i][0][1] = Axx[i][0][1] = (nbcent[i][1]-cent[1]);
             Ayy[i][1][0] = Axy[i][1][0] = Axx[i][1][0] = (nbcent[(i+1)%3][0]-cent[0]);
             Ayy[i][1][1] = Axy[i][1][1] = Axx[i][1][1] = (nbcent[(i+1)%3][1]-cent[1]);
             c_num_yy[i] = c_num_xy[i] = c_num_xx[i] = cond_num(Axx[i]);
         }
         */

         /***
         for(i = 0; i < 3; i++)
         {
             Axx[i][0][0] = 6.0*(nbcent[i][0]-cent[0]);
             Axx[i][0][1] = 2.0*(nbcent[i][1]-cent[1]);
             Axx[i][1][0] = 6.0*(nbcent[(i+1)%3][0]-cent[0]);
             Axx[i][1][1] = 2.0*(nbcent[(i+1)%3][1]-cent[1]);
             // ls_Axx[i][0] = 6.0*(nbcent[i][0]-cent[0]);
             // ls_Axx[i][1] = 2.0*(nbcent[i][1]-cent[1]);
             c_num_xx[i] = cond_num(Axx[i]); 

             Axy[i][0][0] = 2.0*(nbcent[i][0]-cent[0]);
             Axy[i][0][1] = 2.0*(nbcent[i][1]-cent[1]);
             Axy[i][1][0] = 2.0*(nbcent[(i+1)%3][0]-cent[0]);
             Axy[i][1][1] = 2.0*(nbcent[(i+1)%3][1]-cent[1]);
             // ls_Axy[i][0] = 2.0*(nbcent[i][0]-cent[0]);
             // ls_Axy[i][1] = 2.0*(nbcent[i][1]-cent[1]);
             c_num_xy[i] = cond_num(Axy[i]); 

             Ayy[i][0][0] = 2.0*(nbcent[i][0]-cent[0]);
             Ayy[i][0][1] = 6.0*(nbcent[i][1]-cent[1]);
             Ayy[i][1][0] = 2.0*(nbcent[(i+1)%3][0]-cent[0]);
             Ayy[i][1][1] = 6.0*(nbcent[(i+1)%3][1]-cent[1]);
             // ls_Ayy[i][0] = 2.0*(nbcent[i][0]-cent[0]);
             // ls_Ayy[i][1] = 6.0*(nbcent[i][1]-cent[1]);
             c_num_yy[i] = cond_num(Ayy[i]); 
         }
         ***/

         if(alltri_HR_sten[tri->id].HR_sten_set_3rd == NO)
         {
             tri_array(&(Axx),tri_n,2,2,sizeof(double));
             tri_array(&(Axy),tri_n,2,2,sizeof(double));
             tri_array(&(Ayy),tri_n,2,2,sizeof(double));
             vector(&(c_num_xx),tri_n,sizeof(double));
             vector(&(c_num_xy),tri_n,sizeof(double));
             vector(&(c_num_yy),tri_n,sizeof(double));

             for(i = 0; i < tri_n; i++)
             {
                 Axx[i][0][0] = 6.0*(nbcent[i][0]-cent[0])/cube_factor;
                 Axx[i][0][1] = 2.0*(nbcent[i][1]-cent[1])/cube_factor;
                 Axx[i][1][0] = 6.0*(nbcent[(i+1)%tri_n][0]-cent[0])/cube_factor;
                 Axx[i][1][1] = 2.0*(nbcent[(i+1)%tri_n][1]-cent[1])/cube_factor;
                 c_num_xx[i] = cond_num2(Axx,i);
    
                 Axy[i][0][0] = 2.0*(nbcent[i][0]-cent[0])/cube_factor;
                 Axy[i][0][1] = 2.0*(nbcent[i][1]-cent[1])/cube_factor;
                 Axy[i][1][0] = 2.0*(nbcent[(i+1)%tri_n][0]-cent[0])/cube_factor;
                 Axy[i][1][1] = 2.0*(nbcent[(i+1)%tri_n][1]-cent[1])/cube_factor;
                 c_num_xy[i] = cond_num2(Axy,i);

                 Ayy[i][0][0] = 2.0*(nbcent[i][0]-cent[0])/cube_factor;
                 Ayy[i][0][1] = 6.0*(nbcent[i][1]-cent[1])/cube_factor;
                 Ayy[i][1][0] = 2.0*(nbcent[(i+1)%tri_n][0]-cent[0])/cube_factor;
                 Ayy[i][1][1] = 6.0*(nbcent[(i+1)%tri_n][1]-cent[1])/cube_factor;
                 c_num_yy[i] = cond_num2(Ayy,i);
             }
             alltri_HR_sten[tri->id].c_num_xx = c_num_xx; 
             alltri_HR_sten[tri->id].c_num_xy = c_num_xy;
             alltri_HR_sten[tri->id].c_num_yy = c_num_yy;
             alltri_HR_sten[tri->id].Axx = Axx;
             alltri_HR_sten[tri->id].Axy = Axy;
             alltri_HR_sten[tri->id].Ayy = Ayy;
             alltri_HR_sten[tri->id].HR_sten_set_3rd = YES;
         }
         else
         {
             Axx = alltri_HR_sten[tri->id].Axx; Axy = alltri_HR_sten[tri->id].Axy;
             Ayy = alltri_HR_sten[tri->id].Ayy;
             c_num_xx = alltri_HR_sten[tri->id].c_num_xx; c_num_xy = alltri_HR_sten[tri->id].c_num_xy;
             c_num_yy = alltri_HR_sten[tri->id].c_num_yy;
         }

         if(debugging("3rd_cent_bias"))
         {
             unify_weight(c_num_xx, 3, wxx); // For center biased limiting
             unify_weight(c_num_xy, 3, wxy); // For center biased limiting
             unify_weight(c_num_yy, 3, wyy); // For center biased limiting
         }
         /**
         for(i = 0; i < 3; i++)
         {
             mid[i][0] = (Coords(Point_of_tri(tri)[i])[0] + Coords(Point_of_tri(tri)[(i+1)%3])[0])*0.5;
             mid[i][1] = (Coords(Point_of_tri(tri)[i])[1] + Coords(Point_of_tri(tri)[(i+1)%3])[1])*0.5;
             A_edgexx[i][0] = 6.0*(mid[i][0] - cent[0]);
             A_edgexx[i][1] = 2.0*(mid[i][1] - cent[1]);
             A_edgexy[i][0] = 2.0*(mid[i][0] - cent[0]);
             A_edgexy[i][1] = 2.0*(mid[i][1] - cent[1]);
             A_edgeyy[i][0] = 2.0*(mid[i][0] - cent[0]);
             A_edgeyy[i][1] = 6.0*(mid[i][1] - cent[1]);
         }
         **/

         ////// TMP, remove effect of condition number
         // for(i = 0; i < 3; i++)
             // c_num_xx[i] = c_num_xy[i] = c_num_yy[i] = w[i] = 1.0/3.0;

         for(k = 0; k < N_EQN; k++)
         {
             // tri, nb0, nb1 // tri, nb1, nb2 // tri, nb2, nb0
             for(i = 0; i < tri_n; i++)
             {
                 rside[0] = nbuxxave[i][k] - uxxave[k];
                 rside[1] = nbuxxave[(i+1)%tri_n][k] - uxxave[k];
                 comp_coef2(Axx,rside,coefxx[i],i);

                 rside[0] = nbuxyave[i][k] - uxyave[k];
                 rside[1] = nbuxyave[(i+1)%tri_n][k] - uxyave[k];
                 comp_coef2(Axy,rside,coefxy[i],i);

                 rside[0] = nbuyyave[i][k] - uyyave[k];
                 rside[1] = nbuyyave[(i+1)%tri_n][k] - uyyave[k];
                 comp_coef2(Ayy,rside,coefyy[i],i);
             }

             /////////////////// Least_square + WENO weight
             /** it seems that negative weight can not be avoided.
             for(i = 0; i < 3; i++)
                 rside2[i] = nbuxxave[i][k] - uxxave[k];
             weno_weight_ls(coefxx, ls_Axx, rside2, A_edgexx, 3, ls_soln1);
             u6 = ls_soln1[0]; u7_0 = ls_soln1[1];

             for(i = 0; i < 3; i++)
                 rside2[i] = nbuxyave[i][k] - uxyave[k];
             weno_weight_ls(coefxy, ls_Axy, rside2, A_edgexy, 3, ls_soln1);
             u7_1 = ls_soln1[0]; u8_0 = ls_soln1[1];

             for(i = 0; i < 3; i++)
                 rside2[i] = nbuyyave[i][k] - uyyave[k];
             weno_weight_ls(coefyy, ls_Ayy, rside2, A_edgeyy, 3, ls_soln1);
             u8_1 = ls_soln1[0]; u9 = ls_soln1[1];

             u7 = minmod((1+0.05)*minmod(u7_0,u7_1), 0.5*(u7_0 + u7_1)); // eps = 0.05, 0.01
             u8 = minmod((1+0.05)*minmod(u8_0,u8_1), 0.5*(u8_0 + u8_1));
             **/
             /////////////////// End  Least_square + WENO weight
             //////////////// harmonic weight
             /***
             ///// u_xx polynomial
             for(i = 0; i < 3; i++)
             {
                 arrya[i] = coefxx[i][0];
                 arryb[i] = coefxx[i][1];
             }
             // harmonic_mod(arrya,c_num_xx,3,w);
             u6   = w[0]*coefxx[0][0] + w[1]*coefxx[1][0] + w[2]*coefxx[2][0];
             // harmonic_mod(arryb,c_num_xx,3,w);
             u7_0 = w[0]*coefxx[0][1] + w[1]*coefxx[1][1] + w[2]*coefxx[2][1];
             if(is_bad_stenxx[k] == YES)
             {
                 u6 = 0.0; u7_0 = 0.0;
             }
             ///// u_xy polynomial
             for(i = 0; i < 3; i++)
             {
                 arrya[i] = coefxy[i][0];
                 arryb[i] = coefxy[i][1];
             }
             // harmonic_mod(arrya,c_num_xy,3,w);
             u7_1 = w[0]*coefxy[0][0] + w[1]*coefxy[1][0] + w[2]*coefxy[2][0];
             // harmonic_mod(arryb,c_num_xy,3,w);
             u8_0 = w[0]*coefxy[0][1] + w[1]*coefxy[1][1] + w[2]*coefxy[2][1];
             if(is_bad_stenxy[k] == YES)
             {
                 u7_1 = 0.0; u8_0 = 0.0;
             }
             ///// u_yy polynomial
             for(i = 0; i < 3; i++)
             {
                 arrya[i] = coefyy[i][0];
                 arryb[i] = coefyy[i][1];
             }
             // harmonic_mod(arrya,c_num_yy,3,w);
             u8_1 = w[0]*coefyy[0][0] + w[1]*coefyy[1][0] + w[2]*coefyy[2][0];
             // harmonic_mod(arryb,c_num_yy,3,w);
             u9   = w[0]*coefyy[0][1] + w[1]*coefyy[1][1] + w[2]*coefyy[2][1];
             if(is_bad_stenyy[k] == YES)
             {
                 u8_1 = 0.0; u9 = 0.0;
             }
             u7 = minmod2((1+0.05)*minmod2(u7_0,u7_1), 0.5*(u7_0 + u7_1)); // eps = 0.05, 0.01
             u8 = minmod2((1+0.05)*minmod2(u8_0,u8_1), 0.5*(u8_0 + u8_1));
             *****/ 
             /////////// End harmonic weight
             /////////////////// WENO
             ///// u_xx polynomial
             for(i = 0; i < tri_n; i++)
             {
                 arrya[i] = coefxx[i][0];
                 arryb[i] = coefxx[i][1];
             }
             WENO_mod_on_3rd(arrya,arryb,c_num_xx,tri_n,diam,w);
             // WENO_mod_on_3rd(arrya,arryb,c_num_xx,3,1.0,w);
             // WENO_mod_cand1_P3(arrya,c_num_xx,3,sqr_sqr_diam,w);
             // u6   = w[0]*coefxx[0][0] + w[1]*coefxx[1][0] + w[2]*coefxx[2][0];
             // WENO_mod_cand1_P3(arryb,c_num_xx,3,sqr_sqr_diam,w);
             // u7_0 = w[0]*coefxx[0][1] + w[1]*coefxx[1][1] + w[2]*coefxx[2][1];

             u6 = u7_0 = 0.0;
             for(i = 0; i < tri_n; i++)
                 u6   += w[i]*coefxx[i][0];
             for(i = 0; i < tri_n; i++)
                 u7_0 += w[i]*coefxx[i][1];

             if(is_bad_stenxx[k] == YES && detect_extr == YES)
             {
                 u6 = 0.0; u7_0 = 0.0;
             }

             ///// u_xy polynomial
             for(i = 0; i < tri_n; i++)
             {
                 arrya[i] = coefxy[i][0];
                 arryb[i] = coefxy[i][1];
             }
             WENO_mod_on_3rd(arrya,arryb,c_num_xy,tri_n,diam,w);
             // WENO_mod_on_3rd(arrya,arryb,c_num_xy,3,1.0,w);
             // WENO_mod_cand1_P3(arrya,c_num_xy,3,sqr_sqr_diam,w);
             // u7_1 = w[0]*coefxy[0][0] + w[1]*coefxy[1][0] + w[2]*coefxy[2][0];
             // WENO_mod_cand1_P3(arryb,c_num_xy,3,sqr_sqr_diam,w);
             // u8_0 = w[0]*coefxy[0][1] + w[1]*coefxy[1][1] + w[2]*coefxy[2][1];

             u7_1 = u8_0 = 0.0;
             for(i = 0; i < tri_n; i++)
                 u7_1 += w[i]*coefxy[i][0];
             for(i = 0; i < tri_n; i++)
                 u8_0 += w[i]*coefxy[i][1];

             if(is_bad_stenxy[k] == YES && detect_extr == YES)
             {
                 u7_1 = 0.0; u8_0 = 0.0;
             }
             ///// u_yy polynomial
             for(i = 0; i < tri_n; i++)
             {
                 arrya[i] = coefyy[i][0];
                 arryb[i] = coefyy[i][1];
             }
             WENO_mod_on_3rd(arrya,arryb,c_num_yy,tri_n,diam,w);
             // WENO_mod(arrya,arryb,c_num_yy,3,1.0,w);
             // WENO_mod_cand1_P3(arrya,c_num_yy,3,sqr_sqr_diam,w);
             // u8_1 = w[0]*coefyy[0][0] + w[1]*coefyy[1][0] + w[2]*coefyy[2][0];
             // WENO_mod_cand1_P3(arryb,c_num_yy,3,sqr_sqr_diam,w);
             // u9   = w[0]*coefyy[0][1] + w[1]*coefyy[1][1] + w[2]*coefyy[2][1];
            
             u8_1 = u9 = 0.0;
             for(i = 0; i < tri_n; i++)
                 u8_1 += w[i]*coefyy[i][0];
             for(i = 0; i < tri_n; i++)
                 u9   += w[i]*coefyy[i][1];

             if(is_bad_stenyy[k] == YES && detect_extr == YES)
             {
                 u8_1 = 0.0; u9 = 0.0;
             }

             u7 = minmod2((1+0.05)*minmod2(u7_0,u7_1), 0.5*(u7_0 + u7_1)); // eps = 0.05, 0.01
             u8 = minmod2((1+0.05)*minmod2(u8_0,u8_1), 0.5*(u8_0 + u8_1));
             // sv_coef[0] = u6; sv_coef[1] = u7;  
             // sv_coef[2] = u8; sv_coef[3] = u9;  
             /////////////////// End WENO
             ///////////////////// center_biased
             /***
             if(debugging("3rd_cent_bias"))
             {
                 avg3 = (wxx[0]*coefxx[0][0] + wxx[1]*coefxx[1][0] + wxx[2]*coefxx[2][0]);
                 u6 = minmod(coefxx[0][0],coefxx[1][0]);
                 u6 = minmod(coefxx[2][0],u6);
                 u6 = minmod(((1+eps)*u6), avg3);
                 avg3 = (wxx[0]*coefxx[0][1] + wxx[1]*coefxx[1][1] + wxx[2]*coefxx[2][1]);
                 u7_0 = minmod(coefxx[0][1],coefxx[1][1]);
                 u7_0 = minmod(coefxx[2][1],u7_0);
                 u7_0 = minmod(((1+eps)*u7_0), avg3);

                 avg3 = (wxy[0]*coefxy[0][0] + wxy[1]*coefxy[1][0] + wxy[2]*coefxy[2][0]);
                 u7_1 = minmod(coefxy[0][0],coefxy[1][0]);
                 u7_1 = minmod(coefxy[2][0],u7_1);
                 u7_1 = minmod(((1+eps)*u7_1), avg3);
                 avg3 = (wxy[0]*coefxy[0][1] + wxy[1]*coefxy[1][1] + wxy[2]*coefxy[2][1]);
                 u8_0 = minmod(coefxy[0][1],coefxy[1][1]);
                 u8_0 = minmod(coefxy[2][1],u8_0);
                 u8_0 = minmod(((1+eps)*u8_0), avg3);

                 avg3 = (wyy[0]*coefyy[0][0] + wyy[1]*coefyy[1][0] + wyy[2]*coefyy[2][0]);
                 u8_1 = minmod(coefyy[0][0],coefyy[1][0]);
                 u8_1 = minmod(coefyy[2][0],u8_1);
                 u8_1 = minmod(((1+eps)*u8_1), avg3);

                 avg3 = (wyy[0]*coefyy[0][1] + wyy[1]*coefyy[1][1] + wyy[2]*coefyy[2][1]);
                 u9 = minmod(coefyy[0][1],coefyy[1][1]);
                 u9 = minmod(coefyy[2][1],u9);
                 u9 = minmod(((1+eps)*u9), avg3);
                 u7 = minmod2((1+0.05)*minmod2(u7_0,u7_1), 0.5*(u7_0 + u7_1)); // eps = 0.05, 0.01
                 u8 = minmod2((1+0.05)*minmod2(u8_0,u8_1), 0.5*(u8_0 + u8_1));
             }
             ***/
             //////////////////// END center_biased
             ///////////// WENO, weight is added to each component of the polynomials
             /**
             ///// u_xx polynomial
             for(i = 0; i < 3; i++)
             {
                 arrya[i] = coefxx[i][0];
                 arryb[i] = coefxx[i][1];
             }
             for(i = 3; i < 6; i++)
             {
                 arryb[i] = coefxy[i-3][0];
                 c_num_xx[i] = c_num_xy[i-3];
             }
             WENO_mod_cand1_P3(arrya,c_num_xx,3,sqr_sqr_diam,w);
             u6   = w[0]*coefxx[0][0] + w[1]*coefxx[1][0] + w[2]*coefxx[2][0];
             // u6   = (coefxx[0][0] + coefxx[1][0] + coefxx[2][0])/3.0;
             
             WENO_mod_cand1_P3(arryb,c_num_xx,6,sqr_sqr_diam,w);
             u7 = w[0]*coefxx[0][1] + w[1]*coefxx[1][1] + w[2]*coefxx[2][1] +
                  w[4]*coefxy[0][0] + w[5]*coefxy[1][0] + w[6]*coefxy[2][0];
             // u7 = (coefxx[0][1] + coefxx[1][1] + coefxx[2][1] +
             //       coefxy[0][0] + coefxy[1][0] + coefxy[2][0])/6.0;

             ///// u_xy polynomial
             for(i = 0; i < 3; i++)
             {
                 arryb[i] = coefxy[i][1];
             }
             for(i = 3; i < 6; i++)
             {
                 arryb[i] = coefyy[i-3][0];
                 c_num_xy[i] = c_num_yy[i-3]; 
             }
             WENO_mod_cand1_P3(arryb,c_num_xy,6,sqr_sqr_diam,w);
             u8 = w[0]*coefxy[0][1] + w[1]*coefxy[1][1] + w[2]*coefxy[2][1] +
                  w[3]*coefyy[0][0] + w[4]*coefyy[1][0] + w[5]*coefyy[2][0];
             // u8 = (coefxy[0][1] + coefxy[1][1] + coefxy[2][1] +
             //       coefyy[0][0] + coefyy[1][0] + coefyy[2][0])/6.0;

             ///// u_yy polynomial
             for(i = 0; i < 3; i++)
             {
                 arryb[i] = coefyy[i][1];
             }
             WENO_mod_cand1_P3(arryb,c_num_yy,3,sqr_sqr_diam,w);
             u9   = w[0]*coefyy[0][1] + w[1]*coefyy[1][1] + w[2]*coefyy[2][1];
             // u9   = (coefyy[0][1] + coefyy[1][1] + coefyy[2][1])/3.0;
             **/
             /////////// End WENO, weight is added to each component of the polynomials
             switch(k)
             {
             case 0:
                 dg_Dens(st2)[6] = u6;
                 dg_Dens(st2)[7] = u7;
                 dg_Dens(st2)[8] = u8;
                 dg_Dens(st2)[9] = u9;
             break;
             case 1:
                 dg_Mom(st2)[0][6] = u6;
                 dg_Mom(st2)[0][7] = u7;
                 dg_Mom(st2)[0][8] = u8;
                 dg_Mom(st2)[0][9] = u9;
             break;
             case 2:
                 dg_Mom(st2)[1][6] = u6;
                 dg_Mom(st2)[1][7] = u7;
                 dg_Mom(st2)[1][8] = u8;
                 dg_Mom(st2)[1][9] = u9;
             break;
             case 3:
                 dg_Mom(st2)[2][6] = u6;
                 dg_Mom(st2)[2][7] = u7;
                 dg_Mom(st2)[2][8] = u8;
                 dg_Mom(st2)[2][9] = u9;
             break;
             case 4:
                 dg_Energy(st2)[6] = u6;
                 dg_Energy(st2)[7] = u7;
                 dg_Energy(st2)[8] = u8;
                 dg_Energy(st2)[9] = u9;
             break;
             case 5:
                 dg_B(st2)[0][6] = u6;
                 dg_B(st2)[0][7] = u7;
                 dg_B(st2)[0][8] = u8;
                 dg_B(st2)[0][9] = u9;
             break;
             case 6:
                 dg_B(st2)[1][6] = u6;
                 dg_B(st2)[1][7] = u7;
                 dg_B(st2)[1][8] = u8;
                 dg_B(st2)[1][9] = u9;
             break;
             case 7:
                 dg_B(st2)[2][6] = u6;
                 dg_B(st2)[2][7] = u7;
                 dg_B(st2)[2][8] = u8;
                 dg_B(st2)[2][9] = u9;
             break;
             }
         }

         // re-enforce divergence-free 
        if(dg_B(st2)[0][6] >= 0.0)
            dg_B(st2)[0][6] = 0.5*(fabs(dg_B(st2)[0][6]) + fabs(dg_B(st2)[1][7])/3.0);
        else
            dg_B(st2)[0][6] = -0.5*(fabs(dg_B(st2)[0][6]) + fabs(dg_B(st2)[1][7])/3.0);
        dg_B(st2)[1][7] = -3.0*dg_B(st2)[0][6];

        if(dg_B(st2)[0][7] >= 0.0)
            dg_B(st2)[0][7] = 0.5*(fabs(dg_B(st2)[0][7]) + fabs(dg_B(st2)[1][8]));
        else
            dg_B(st2)[0][7] = -0.5*(fabs(dg_B(st2)[0][7]) + fabs(dg_B(st2)[1][8]));
        dg_B(st2)[1][8] = -dg_B(st2)[0][7];

        if(dg_B(st2)[0][8] >= 0.0)
            dg_B(st2)[0][8] = 0.5*(fabs(dg_B(st2)[0][8]) + 3.0*fabs(dg_B(st2)[1][9]));
        else
            dg_B(st2)[0][8] = -0.5*(fabs(dg_B(st2)[0][8]) + 3.0*fabs(dg_B(st2)[1][9]));
        dg_B(st2)[1][9] = -dg_B(st2)[0][8]/3.0;
}

// Reconstructed coeffs. are stored in RK_STEP[0]. reconstruction is based on characteristics
LOCAL void limiting_3rd_degreeP3_char(
	 TRI       *tri,
         TRI       *nbtri[3], 
         Mid_soln  *midsoln,
         int       rk_iter,
         int       detect_extr,
         Locstate  *Roe_st,
         float     **L[],
         float     **R[])
{
         Locstate st, nbst[3], st2;
         float    uxxave[4], nbuxxave[3][4], chuxxave[4], chnbuxxave[3][4]; //[side][#eqn]
         float    uxyave[4], nbuxyave[3][4], chuxyave[4], chnbuxyave[3][4];
         float    uyyave[4], nbuyyave[3][4], chuyyave[4], chnbuyyave[3][4];
         int      i, dim = 2, k, side;
         double   *cent, *nbcent[3];
         float    rside[3], Axx[3][2][2], Axy[3][2][2], Ayy[3][2][2], 
                     rside2[3]; 
         float    coefxx[6][2], coefxy[6][2], coefyy[6][2];
         float    chcoefxx[3][6][2], chcoefxy[3][6][2], chcoefyy[3][6][2], nor[2], t[2]; // [direction][#sten][]
         float    limt_slp[3][4][4], chlimt_slp[3][4][4]; // [side][#eqn][#coeff]
         float    u6, u7, u8, u9, u7_0, u7_1, u8_0, u8_1, 
                   avg3, avg4, avg5, arrya[9], arryb[9], w[9];
         double **Lmass_matrix = tri->Lmass_matrix;
         float    eps = 0.9;
         float    dirx[3], diry[3], theta[3]; // dirx, diry: cos of the angle
         float    ud[3][2]; // soln uxx, uxy, uyy computed using one line
         int      idirx, idiry;
         float    c_num_xx[6], c_num_xy[6], c_num_yy[6]; // condition number of stencils
         float    diam, sqr_diam, sqr_sqr_diam, sv_coef[6];
         int      is_bad_stenxx[4], is_bad_stenxy[4], is_bad_stenyy[4];
         float    A_edgexx[3][2], A_edgexy[3][2], A_edgeyy[3][2], ls_soln1[3], ls_Axx[3][2],
                  ls_Axy[3][2], ls_Ayy[3][2], mid[3][2];
         float    wxx[3], wxy[3], wyy[3], D[4];
         int      debug = NO;
	 // static Locstate Roe_st = NULL;  //Roe mean value
         // static float  **L[3], **R[3];

         if(rk_iter == RK_STEP)
         {
             st = tri->st;
             nbst[0] = nbtri[0]->st;
             nbst[1] = nbtri[1]->st;
             nbst[2] = nbtri[2]->st;
         }
         else
         {
             st = midsoln[tri->id].st[rk_iter];
             for(i = 0; i < 3; i++)
                 nbst[i] = midsoln[nbtri[i]->id].st[rk_iter];
         }

         for(i = 0; i < 3; i++)
             assign(Roe_st[i], st, Params(st)->sizest);

         // TMP
         // if(YES == find_tri(fg_centroid(tri)))
         if(45 == tri->id)
         {
	     printf("\n\n---------*******************--------------\n");
             printf("TRI[%d] entered limiting_3rd_degreeP3\n",tri->id);

             // printf("print neighbr [%d] (%g %g) states:", nbtri[0]->id,
             //        fg_centroid(nbtri[0])[0], fg_centroid(nbtri[0])[1]);
             // verbose_print_state("state:",nbst[0]); 
             // printf("print neighbr [%d] (%g %g) states:", nbtri[1]->id,
             //        fg_centroid(nbtri[1])[0], fg_centroid(nbtri[1])[1]);
             // verbose_print_state("state:",nbst[1]); 
             // printf("print neighbr [%d] (%g %g) states:", nbtri[2]->id,
             //        fg_centroid(nbtri[2])[0], fg_centroid(nbtri[2])[1]);
             // verbose_print_state("state:",nbst[2]); 
             debug = YES;
         }

         st2 = midsoln[tri->id].st[0];
         Set_params(st2,st);
         set_type_of_state(st2,state_type(st));

         diam = fg_diam(tri);
         sqr_diam = sqr(diam);
         sqr_sqr_diam = sqr(sqr_diam);

         cent = fg_centroid(tri);
         for(i = 0; i < 3; i++)
             nbcent[i] = fg_centroid(nbtri[i]);

         u_average_indx(tri,st,3,uxxave);
         u_average_indx(tri,st,4,uxyave);
         u_average_indx(tri,st,5,uyyave);
         for(k = 0; k < N_EQN; k++)
         {
             uxxave[k] *= 2.0;
             uyyave[k] *= 2.0;
         }
         
         for(i = 0; i < 3; i++)
         {
             u_average_indx(nbtri[i],nbst[i],3,nbuxxave[i]);
             u_average_indx(nbtri[i],nbst[i],4,nbuxyave[i]);
             u_average_indx(nbtri[i],nbst[i],5,nbuyyave[i]);
             for(k = 0; k < N_EQN; k++)
             {
                 nbuxxave[i][k] *= 2.0;
                 nbuyyave[i][k] *= 2.0;
             }
         }

         if(detect_extr == YES)
         {
             extrema_detec(uxxave,nbuxxave,is_bad_stenxx);
             extrema_detec(uxyave,nbuxyave,is_bad_stenxy);
             extrema_detec(uyyave,nbuyyave,is_bad_stenyy);
         }
         else
         {
             for(k = 0; k < N_EQN; k++)
                 is_bad_stenxx[k] = is_bad_stenxy[k] = is_bad_stenyy[k] = NO;
         }

         for(i = 0; i < 3; i++)
         {
             Axx[i][0][0] = 6.0*(nbcent[i][0]-cent[0]);
             Axx[i][0][1] = 2.0*(nbcent[i][1]-cent[1]);
             Axx[i][1][0] = 6.0*(nbcent[(i+1)%3][0]-cent[0]);
             Axx[i][1][1] = 2.0*(nbcent[(i+1)%3][1]-cent[1]);
             // ls_Axx[i][0] = 6.0*(nbcent[i][0]-cent[0]);
             // ls_Axx[i][1] = 2.0*(nbcent[i][1]-cent[1]);
             c_num_xx[i] = cond_num(Axx[i]); 

             Axy[i][0][0] = 2.0*(nbcent[i][0]-cent[0]);
             Axy[i][0][1] = 2.0*(nbcent[i][1]-cent[1]);
             Axy[i][1][0] = 2.0*(nbcent[(i+1)%3][0]-cent[0]);
             Axy[i][1][1] = 2.0*(nbcent[(i+1)%3][1]-cent[1]);
             // ls_Axy[i][0] = 2.0*(nbcent[i][0]-cent[0]);
             // ls_Axy[i][1] = 2.0*(nbcent[i][1]-cent[1]);
             c_num_xy[i] = cond_num(Axy[i]); 

             Ayy[i][0][0] = 2.0*(nbcent[i][0]-cent[0]);
             Ayy[i][0][1] = 6.0*(nbcent[i][1]-cent[1]);
             Ayy[i][1][0] = 2.0*(nbcent[(i+1)%3][0]-cent[0]);
             Ayy[i][1][1] = 6.0*(nbcent[(i+1)%3][1]-cent[1]);
             // ls_Ayy[i][0] = 2.0*(nbcent[i][0]-cent[0]);
             // ls_Ayy[i][1] = 6.0*(nbcent[i][1]-cent[1]);
             c_num_yy[i] = cond_num(Ayy[i]); 
         }
         /**
         for(i = 0; i < 3; i++)
         {
             mid[i][0] = (Coords(Point_of_tri(tri)[i])[0] + Coords(Point_of_tri(tri)[(i+1)%3])[0])*0.5;
             mid[i][1] = (Coords(Point_of_tri(tri)[i])[1] + Coords(Point_of_tri(tri)[(i+1)%3])[1])*0.5;
             A_edgexx[i][0] = 6.0*(mid[i][0] - cent[0]);
             A_edgexx[i][1] = 2.0*(mid[i][1] - cent[1]);
             A_edgexy[i][0] = 2.0*(mid[i][0] - cent[0]);
             A_edgexy[i][1] = 2.0*(mid[i][1] - cent[1]);
             A_edgeyy[i][0] = 2.0*(mid[i][0] - cent[0]);
             A_edgeyy[i][1] = 6.0*(mid[i][1] - cent[1]);
         }
         **/

         // use 3 normal directions to do char. reconstruction
         for(side = 0; side < 3; side++)
         {
             for(i = 0; i < dim; i++) 
                 t[i] = fg_side_vector(tri)[side][i];
             nor[0] = t[1];
             nor[1] = -t[0];
             compute_Roemean(Roe_st[side],st,nbst[side],nor,D);
             LR_matrix_in_dir(nor, Roe_st[side], L[side], R[side]);

             d_matrix_vec_mult(L[side], uxxave, 4, 4, chuxxave);
             d_matrix_vec_mult(L[side], uxyave, 4, 4, chuxyave);
             d_matrix_vec_mult(L[side], uyyave, 4, 4, chuyyave);
             for(i = 0; i < 3; i++)
             {
                 d_matrix_vec_mult(L[side], nbuxxave[i], 4, 4, chnbuxxave[i]);
                 d_matrix_vec_mult(L[side], nbuxyave[i], 4, 4, chnbuxyave[i]);
                 d_matrix_vec_mult(L[side], nbuyyave[i], 4, 4, chnbuyyave[i]);
             }
             for(k = 0; k < N_EQN; k++)
             {
                 // tri, nb0, nb1 // tri, nb1, nb2 // tri, nb2, nb0
                 for(i = 0; i < 3; i++)
                 {
                     rside[0] = chnbuxxave[i][k] - chuxxave[k];
                     rside[1] = chnbuxxave[(i+1)%3][k] - chuxxave[k];
                     comp_coef(Axx[i],rside,chcoefxx[side][i]);

                     rside[0] = chnbuxyave[i][k] - chuxyave[k];
                     rside[1] = chnbuxyave[(i+1)%3][k] - chuxyave[k];
                     comp_coef(Axy[i],rside,chcoefxy[side][i]);

                     rside[0] = chnbuyyave[i][k] - chuyyave[k];
                     rside[1] = chnbuyyave[(i+1)%3][k] - chuyyave[k];
                     comp_coef(Ayy[i],rside,chcoefyy[side][i]);
                 }
                 ///// characteristic u_xx polynomial
                 for(i = 0; i < 3; i++)
                 {
                     arrya[i] = chcoefxx[side][i][0];
                     arryb[i] = chcoefxx[side][i][1];
                 }
                 WENO_mod_on_3rd(arrya,arryb,c_num_xx,3,diam,w);
                 u6   = w[0]*chcoefxx[side][0][0] + w[1]*chcoefxx[side][1][0] + w[2]*chcoefxx[side][2][0];
                 u7_0 = w[0]*chcoefxx[side][0][1] + w[1]*chcoefxx[side][1][1] + w[2]*chcoefxx[side][2][1];
                 ///// characteristic u_xy polynomial
                 for(i = 0; i < 3; i++)
                 {
                     arrya[i] = chcoefxy[side][i][0];
                     arryb[i] = chcoefxy[side][i][1];
                 }
                 WENO_mod_on_3rd(arrya,arryb,c_num_xy,3,diam,w);
                 u7_1 = w[0]*chcoefxy[side][0][0] + w[1]*chcoefxy[side][1][0] + w[2]*chcoefxy[side][2][0];
                 u8_0 = w[0]*chcoefxy[side][0][1] + w[1]*chcoefxy[side][1][1] + w[2]*chcoefxy[side][2][1];
                 ///// characteristic u_yy polynomial
                 for(i = 0; i < 3; i++)
                 {
                     arrya[i] = chcoefyy[side][i][0];
                     arryb[i] = chcoefyy[side][i][1];
                 }
                 WENO_mod_on_3rd(arrya,arryb,c_num_yy,3,diam,w);
                 u8_1 = w[0]*chcoefyy[side][0][0] + w[1]*chcoefyy[side][1][0] + w[2]*chcoefyy[side][2][0];
                 u9   = w[0]*chcoefyy[side][0][1] + w[1]*chcoefyy[side][1][1] + w[2]*chcoefyy[side][2][1];

                 u7 = minmod2((1+0.05)*minmod2(u7_0,u7_1), 0.5*(u7_0 + u7_1)); // eps = 0.05, 0.01
                 u8 = minmod2((1+0.05)*minmod2(u8_0,u8_1), 0.5*(u8_0 + u8_1));

                 // arrya[0] = u6; arrya[1] = u7; arrya[2] = u8; arrya[3] = u9;
                 chlimt_slp[side][k][0] = u6; chlimt_slp[side][k][1] = u7; 
                 chlimt_slp[side][k][2] = u8; chlimt_slp[side][k][3] = u9;
                 // d_matrix_vec_mult(R[side], arrya, 4, 4, limt_slp[side][k]);
             }
             /// uxxx, uxxy, uxyy, uyyy of system
             for(i = 0; i < 4; i++)
             {
                 for(k = 0; k < N_EQN; k++)
                     arrya[k] = chlimt_slp[side][k][i]; 
                 d_matrix_vec_mult(R[side], arrya, 4, 4, arryb);
                 for(k = 0; k < N_EQN; k++) 
                     limt_slp[side][k][i] = arryb[k]; 
             }
             // printf("side[%d], chnbuyyave = [%g, %g, %g], nbuyyave = [%g, %g, %g]\n", 
             //   side, chnbuyyave[0][0], chnbuyyave[1][0], chnbuyyave[2][0],
             //   nbuyyave[0][0], nbuyyave[1][0], nbuyyave[2][0]);  
             // printf("side[%d] char slp uyyy = %g, slp uyyy = %g\n", side, chlimt_slp[side][0][3], limt_slp[side][0][3]);
         }

         for(k = 0; k < N_EQN; k++)
         {
             for(side = 0; side < 3; side++)
                 arrya[side] = limt_slp[side][k][0]; /// uxxx
             WENO_mod_cand1_3rd_P3(arrya,3,diam,w);
             u6   = w[0]*arrya[0] + w[1]*arrya[1] + w[2]*arrya[2];
             for(side = 0; side < 3; side++)
                 arrya[side] = limt_slp[side][k][1]; /// uxxy
             WENO_mod_cand1_3rd_P3(arrya,3,diam,w);
             u7   = w[0]*arrya[0] + w[1]*arrya[1] + w[2]*arrya[2];
             for(side = 0; side < 3; side++)
                 arrya[side] = limt_slp[side][k][2]; /// uxyy
             WENO_mod_cand1_3rd_P3(arrya,3,diam,w);
             u8   = w[0]*arrya[0] + w[1]*arrya[1] + w[2]*arrya[2];
             for(side = 0; side < 3; side++)
                 arrya[side] = limt_slp[side][k][3]; /// uyyy

             // if(k == 0)
             //     printf("uyyy side[%g %g %g]\n", limt_slp[0][0][3], limt_slp[1][0][3], limt_slp[2][0][3]);

             WENO_mod_cand1_3rd_P3(arrya,3,diam,w);
             u9   = w[0]*arrya[0] + w[1]*arrya[1] + w[2]*arrya[2];

             if(is_bad_stenxx[k] == YES && detect_extr == YES)
             {
                 u6 = 0.0; u7 = 0.0;
             }
             if(is_bad_stenxy[k] == YES && detect_extr == YES)
             {
                 u7 = 0.0; u8 = 0.0;
             }
             if(is_bad_stenyy[k] == YES && detect_extr == YES)
             {
                 u8 = 0.0; u9 = 0.0;
             }
             // if(k == 0)
             //      printf("chara. fixed uxxx, uxxy, uxyy, uyyy %g %g %g %g\n",
             //                    u6, u7, u8, u9);
             if(debug == YES)
                 printf("EQn[%d]char . fixed uxxx, uxxy, uxyy, uyyy %g %g %g %g\n",
                                k, u6, u7, u8, u9);
             switch(k)
             {
             case 0:
                 dg_Dens(st2)[6] = u6;
                 dg_Dens(st2)[7] = u7;
                 dg_Dens(st2)[8] = u8;
                 dg_Dens(st2)[9] = u9;
                 if(debug == YES)
                 {
                     // printf("...... fixed uxxx, uxxy, uxyy, uyyy %g %g %g %g\n",
                     //            u6, u7, u8, u9);
                     // if(fabs(u6) > 100.0 || fabs(u7) > 100.0  ||
                     //    fabs(u8) > 100.0 || fabs(u9) > 100.0)
                     //     clean_up(0);
                 }
             break;
             case 1:
                 dg_Mom(st2)[0][6] = u6;
                 dg_Mom(st2)[0][7] = u7;
                 dg_Mom(st2)[0][8] = u8;
                 dg_Mom(st2)[0][9] = u9;
             break;
             case 2:
                 dg_Mom(st2)[1][6] = u6;
                 dg_Mom(st2)[1][7] = u7;
                 dg_Mom(st2)[1][8] = u8;
                 dg_Mom(st2)[1][9] = u9;
             break;
             case 3:
                 dg_Energy(st2)[6] = u6;
                 dg_Energy(st2)[7] = u7;
                 dg_Energy(st2)[8] = u8;
                 dg_Energy(st2)[9] = u9;
             break;
             }
         }

         // return;
 
         for(k = 0; k < N_EQN; k++)
         {
             // tri, nb0, nb1 // tri, nb1, nb2 // tri, nb2, nb0
             for(i = 0; i < 3; i++)
             {
                 rside[0] = nbuxxave[i][k] - uxxave[k];
                 rside[1] = nbuxxave[(i+1)%3][k] - uxxave[k];
                 comp_coef(Axx[i],rside,coefxx[i]);

                 rside[0] = nbuxyave[i][k] - uxyave[k];
                 rside[1] = nbuxyave[(i+1)%3][k] - uxyave[k];
                 comp_coef(Axy[i],rside,coefxy[i]);

                 rside[0] = nbuyyave[i][k] - uyyave[k];
                 rside[1] = nbuyyave[(i+1)%3][k] - uyyave[k];
                 comp_coef(Ayy[i],rside,coefyy[i]);
             }

             /////// limit by edge center values
             /** fail to keep accuracy
             limit_by_edge_cent_val(coefxx,A_edgexx,rside);
             u6 = rside[0]; u7_0 = rside[1];
             limit_by_edge_cent_val(coefxy,A_edgexy,rside);
             u7_1 = rside[0]; u8_0 = rside[1];
             limit_by_edge_cent_val(coefyy,A_edgeyy,rside);
             u8_1 = rside[0]; u9 = rside[1];
             u7 = minmod((1+0.05)*minmod(u7_0,u7_1), 0.5*(u7_0 + u7_1)); // eps = 0.05, 0.01
             u8 = minmod((1+0.05)*minmod(u8_0,u8_1), 0.5*(u8_0 + u8_1));
             */
             /////// END of limit by edge center values

             /////////////////// WENO
             ///// u_xx polynomial
             for(i = 0; i < 3; i++)
             {
                 arrya[i] = coefxx[i][0];
                 arryb[i] = coefxx[i][1];
             }
             WENO_mod_on_3rd(arrya,arryb,c_num_xx,3,diam,w);
             // WENO_mod_on_3rd(arrya,arryb,c_num_xx,3,1.0,w);
             // WENO_mod_cand1_P3(arrya,c_num_xx,3,sqr_sqr_diam,w);
             u6   = w[0]*coefxx[0][0] + w[1]*coefxx[1][0] + w[2]*coefxx[2][0];
             // WENO_mod_cand1_P3(arryb,c_num_xx,3,sqr_sqr_diam,w);
             u7_0 = w[0]*coefxx[0][1] + w[1]*coefxx[1][1] + w[2]*coefxx[2][1];

             if(is_bad_stenxx[k] == YES && detect_extr == YES)
             {
                 u6 = 0.0; u7_0 = 0.0;
             }

             ///// u_xy polynomial
             for(i = 0; i < 3; i++)
             {
                 arrya[i] = coefxy[i][0];
                 arryb[i] = coefxy[i][1];
             }
             WENO_mod_on_3rd(arrya,arryb,c_num_xy,3,diam,w);
             // WENO_mod_on_3rd(arrya,arryb,c_num_xy,3,1.0,w);
             // WENO_mod_cand1_P3(arrya,c_num_xy,3,sqr_sqr_diam,w);
             u7_1 = w[0]*coefxy[0][0] + w[1]*coefxy[1][0] + w[2]*coefxy[2][0];
             // WENO_mod_cand1_P3(arryb,c_num_xy,3,sqr_sqr_diam,w);
             u8_0 = w[0]*coefxy[0][1] + w[1]*coefxy[1][1] + w[2]*coefxy[2][1];

             if(is_bad_stenxy[k] == YES && detect_extr == YES)
             {
                 u7_1 = 0.0; u8_0 = 0.0;
             }
             ///// u_yy polynomial
             for(i = 0; i < 3; i++)
             {
                 arrya[i] = coefyy[i][0];
                 arryb[i] = coefyy[i][1];
             }
             WENO_mod_on_3rd(arrya,arryb,c_num_yy,3,diam,w);
             // WENO_mod(arrya,arryb,c_num_yy,3,1.0,w);
             // WENO_mod_cand1_P3(arrya,c_num_yy,3,sqr_sqr_diam,w);
             u8_1 = w[0]*coefyy[0][0] + w[1]*coefyy[1][0] + w[2]*coefyy[2][0];
             // WENO_mod_cand1_P3(arryb,c_num_yy,3,sqr_sqr_diam,w);
             u9   = w[0]*coefyy[0][1] + w[1]*coefyy[1][1] + w[2]*coefyy[2][1];

             if(is_bad_stenyy[k] == YES && detect_extr == YES)
             {
                 u8_1 = 0.0; u9 = 0.0;
             }

             u7 = minmod2((1+0.05)*minmod2(u7_0,u7_1), 0.5*(u7_0 + u7_1)); // eps = 0.05, 0.01
             u8 = minmod2((1+0.05)*minmod2(u8_0,u8_1), 0.5*(u8_0 + u8_1));
             // sv_coef[0] = u6; sv_coef[1] = u7;  
             // sv_coef[2] = u8; sv_coef[3] = u9;  
             /////////////////// End WENO
             if(debug == YES)
                 printf("EQn[%d]...... fixed uxxx, uxxy, uxyy, uyyy %g %g %g %g\n",
                                k, u6, u7, u8, u9);
             switch(k)
             {
             case 0:
                 dg_Dens(st2)[6] = u6;
                 dg_Dens(st2)[7] = u7;
                 dg_Dens(st2)[8] = u8;
                 dg_Dens(st2)[9] = u9;
                 // if(debug == YES)
                 {
                     // printf("...... fixed uxxx, uxxy, uxyy, uyyy %g %g %g %g\n",
                     //            u6, u7, u8, u9);
                     // if(fabs(u6) > 100.0 || fabs(u7) > 100.0  ||
                     //    fabs(u8) > 100.0 || fabs(u9) > 100.0)
                     //     clean_up(0);
                 }
             break;
             case 1:
                 dg_Mom(st2)[0][6] = u6;
                 dg_Mom(st2)[0][7] = u7;
                 dg_Mom(st2)[0][8] = u8;
                 dg_Mom(st2)[0][9] = u9;
             break;
             case 2:
                 dg_Mom(st2)[1][6] = u6;
                 dg_Mom(st2)[1][7] = u7;
                 dg_Mom(st2)[1][8] = u8;
                 dg_Mom(st2)[1][9] = u9;
             break;
             case 3:
                 dg_Energy(st2)[6] = u6;
                 dg_Energy(st2)[7] = u7;
                 dg_Energy(st2)[8] = u8;
                 dg_Energy(st2)[9] = u9;
             break;
             }
         }
}

// Reconstructed coeffs. are stored in RK_STEP[0]
LIB_LOCAL void limiting_2nd_degreeP3(
         TRI       *tri,
         TRI       *nbtri[],
         int       tri_n,
         Mid_soln  *midsoln,
         int       rk_iter,
	 int       detect_extr)
{
         Locstate st, nbst[20], st2, nbst2[20];
         float    uxave[8], nbuxave[20][8], uyave[8], nbuyave[20][8];
         float    Rxave[8], nbRxave[20][8], Ryave[8], nbRyave[20][8];
         float    Lxave[8], nbLxave[20][8], Lyave[8], nbLyave[20][8];
         int      i, dim = 2, indx, k;
         double    *cent, *nbcent[20], rside[3], rside2[3], least_soln1[3], area, sqrt_area, tmp_sqrt_area;
         // float    Ax[3][2][2], Ay[3][2][2];
         float    coefx[20][2], coefy[20][2];
         float    u3, u4, u5, u4_0, u4_1, avg1, avg2, wx[20], wy[20], arrya[20], arryb[20], w[20];
         double **Lmass_matrix = tri->Lmass_matrix;
         float    eps = 1.5, diam; // 0.5, 0.8 for all_cent
         int      idir, is_bad_stenx[20], is_bad_steny[20];
         float    Shu_ux[3][4], Shu_uy[3][4];
         double   *c_num_x, *c_num_y;
         double   ***Ax, ***Ay;
         float    re_val[3][4]; // remainder values at mitpts on edges
         float    mid[3][2], ux_range[2], uy_range[2]; //midpt on side
         float    Ax_edge[3][2], Ay_edge[3][2], sv_coef[6];
         int      debug = NO;

         if(NULL == midsoln)
         {
             st = tri->st;
             for(i = 0; i < tri_n; i++)
                 nbst[i] = nbtri[i]->st;
         }
         else
         {
             if(rk_iter == RK_STEP)
             {
                 st = tri->st;
                 for(i = 0; i < tri_n; i++)
                     nbst[i] = nbtri[i]->st;
             }
             else
             {
                 st = midsoln[tri->id].st[rk_iter];
                 for(i = 0; i < tri_n; i++)
                     nbst[i] = midsoln[nbtri[i]->id].st[rk_iter];
             }
         }

         st2 = midsoln[tri->id].st[0];
         // diam = fg_diam(tri);
         diam = 1.0;
         area = fg_area(tri);
         sqrt_area = sqrt(fg_area(tri));

         u_average_indx(tri,st,1,uxave);
         u_average_indx(tri,st,2,uyave);
         for(k = 0; k < N_EQN; k++)
             uxave[k] /= sqrt_area;
         for(k = 0; k < N_EQN; k++)
             uyave[k] /= sqrt_area;

         R_degree3_term_averageP3(tri,st,tri->Bmass_matrix,0,sqrt_area,Rxave);
         R_degree3_term_averageP3(tri,st,tri->Bmass_matrix,1,sqrt_area,Ryave);
         for(k = 0; k < N_EQN; k++)
         {
             uxave[k] += Rxave[k];
             uyave[k] += Ryave[k];
         }
         R_degree3_term_averageP3(tri,st2,tri->Bmass_matrix,0,sqrt_area,Rxave);
         R_degree3_term_averageP3(tri,st2,tri->Bmass_matrix,1,sqrt_area,Ryave);

         for(i = 0; i < tri_n; i++)
         {
             tmp_sqrt_area = sqrt(fg_area(nbtri[i]));

             u_average_indx(nbtri[i],nbst[i],1,nbuxave[i]);
             for(k = 0; k < N_EQN; k++)
                 nbuxave[i][k] /= tmp_sqrt_area;

             u_average_indx(nbtri[i],nbst[i],2,nbuyave[i]);
             for(k = 0; k < N_EQN; k++)
                 nbuyave[i][k] /= tmp_sqrt_area;

             R_degree3_term_averageP3(nbtri[i],nbst[i],nbtri[i]->Bmass_matrix,0,tmp_sqrt_area,nbRxave[i]);
             R_degree3_term_averageP3(nbtri[i],nbst[i],nbtri[i]->Bmass_matrix,1,tmp_sqrt_area,nbRyave[i]);
             for(k = 0; k < N_EQN; k++)
             {
                 nbuxave[i][k] += nbRxave[i][k];
                 nbuyave[i][k] += nbRyave[i][k];
             }

             // comp_mass_matrix_1st_row(MAX_N_COEF,nbtri[i],dim,fg_centroid(tri),mass_1st_rows[i]);
             comp_Mag_mass_matrix_1st_row(MAX_N_COEF,nbtri[i],dim,fg_centroid(tri),sqrt_area,mass_1st_rows[i]);

             R_degree3_term_averageP3(nbtri[i],st2,mass_1st_rows[i],0,sqrt_area,nbRxave[i]);
             R_degree3_term_averageP3(nbtri[i],st2,mass_1st_rows[i],1,sqrt_area,nbRyave[i]);
         }

         for(k = 0; k < N_EQN; k++)
         {
             Lxave[k] = uxave[k]-Rxave[k];
             Lyave[k] = uyave[k]-Ryave[k];
             for(i = 0; i < tri_n; i++)
             {
                 nbLxave[i][k] = nbuxave[i][k]-nbRxave[i][k];
                 nbLyave[i][k] = nbuyave[i][k]-nbRyave[i][k];
             }
         }

        if(detect_extr == YES)
         {
             // extrema_detec(Lxave,nbLxave,is_bad_stenx);
             // extrema_detec(Lyave,nbLyave,is_bad_steny);
             NEW_extrema_detec(Lxave,nbLxave,tri_n,is_bad_stenx);
             NEW_extrema_detec(Lyave,nbLyave,tri_n,is_bad_steny);
         }
         else
         {
             for(k = 0; k < N_EQN; k++)
                 is_bad_stenx[k] = is_bad_steny[k] = NO;
         }

         // extrema_detec(Lxave,nbLxave,is_bad_stenx);
         // extrema_detec(Lyave,nbLyave,is_bad_steny);

         // TMP
         /*
         // if(tri->id == 42887)
         if(YES == find_tri(fg_centroid(tri)))
         {
             printf("\n\n---------*******************--------------\n");
             printf("TRI[%d] entered limiting_2nd_degreeP3\n",tri->id);
             printf("dens Ux candidate %g (%g %g %g)\n",
                    Lxave[0], nbLxave[0][0], nbLxave[1][0], nbLxave[2][0]);
             printf("dens Uy candidate %g (%g %g %g)\n",
                    Lyave[0], nbLyave[0][0], nbLyave[1][0], nbLyave[2][0]);
             debug = YES;
         }
         */
         // TMP

         cent = fg_centroid(tri);
         for(i = 0; i < tri_n; i++)
             nbcent[i] = fg_centroid(nbtri[i]);

         /**
         for(i = 0; i < 3; i++)
         {
             Ay[i][0][0] = Ax[i][0][0] = (nbcent[i][0]-cent[0]);
             Ay[i][0][1] = Ax[i][0][1] = (nbcent[i][1]-cent[1]);
             Ay[i][1][0] = Ax[i][1][0] = (nbcent[(i+1)%3][0]-cent[0]);
             Ay[i][1][1] = Ax[i][1][1] = (nbcent[(i+1)%3][1]-cent[1]);
             c_num_y[i] = c_num_x[i] = cond_num(Ax[i]); // c_num_x[i] = 1.0;
         }
         **/
         /*****
         for(i = 0; i < 3; i++)
         {
             Ax[i][0][0] = 2.0*(nbcent[i][0]-cent[0]);
             Ax[i][0][1] =     (nbcent[i][1]-cent[1]);
             Ax[i][1][0] = 2.0*(nbcent[(i+1)%3][0]-cent[0]);
             Ax[i][1][1] =     (nbcent[(i+1)%3][1]-cent[1]);
             c_num_x[i] = cond_num(Ax[i]); // c_num_x[i] = 1.0;
             Ay[i][0][0] =     (nbcent[i][0]-cent[0]);
             Ay[i][0][1] = 2.0*(nbcent[i][1]-cent[1]);
             Ay[i][1][0] =     (nbcent[(i+1)%3][0]-cent[0]);
             Ay[i][1][1] = 2.0*(nbcent[(i+1)%3][1]-cent[1]);
             c_num_y[i] = cond_num(Ay[i]); // c_num_y[i] = 1.0;
         }
         *****/
         if(alltri_HR_sten[tri->id].HR_sten_set_2nd == NO)
         {
             tri_array(&(Ax),tri_n,2,2,sizeof(double));
             tri_array(&(Ay),tri_n,2,2,sizeof(double));
             vector(&(c_num_x),tri_n,sizeof(double));
             vector(&(c_num_y),tri_n,sizeof(double));
             for(i = 0; i < tri_n; i++)
             {
                 Ax[i][0][0] = 2.0*(nbcent[i][0]-cent[0])/area;
                 Ax[i][0][1] =     (nbcent[i][1]-cent[1])/area;
                 Ax[i][1][0] = 2.0*(nbcent[(i+1)%tri_n][0]-cent[0])/area;
                 Ax[i][1][1] =     (nbcent[(i+1)%tri_n][1]-cent[1])/area;
                 c_num_x[i] = cond_num2(Ax,i); // c_num_x[i] = 1.0;
                 Ay[i][0][0] =     (nbcent[i][0]-cent[0])/area;
                 Ay[i][0][1] = 2.0*(nbcent[i][1]-cent[1])/area;
                 Ay[i][1][0] =     (nbcent[(i+1)%tri_n][0]-cent[0])/area;
                 Ay[i][1][1] = 2.0*(nbcent[(i+1)%tri_n][1]-cent[1])/area;
                 c_num_y[i] = cond_num2(Ay,i); // c_num_y[i] = 1.0;
             }
             alltri_HR_sten[tri->id].HR_sten_set_2nd = YES;
             alltri_HR_sten[tri->id].Ax = Ax;
             alltri_HR_sten[tri->id].Ay = Ay;
             alltri_HR_sten[tri->id].c_num_x = c_num_x; alltri_HR_sten[tri->id].c_num_y = c_num_y;
         }
         else
         {
             Ax = alltri_HR_sten[tri->id].Ax;
             Ay = alltri_HR_sten[tri->id].Ay;
             c_num_x = alltri_HR_sten[tri->id].c_num_x; c_num_y = alltri_HR_sten[tri->id].c_num_y;
         } 

         if(debugging("cent_bias"))
         {
             unify_weight(c_num_x, tri_n, wx); // For center biased limiting
             unify_weight(c_num_y, tri_n, wy); // For center biased limiting
         }

         // TMP
         // printf("c_num_x[%g %g %g]\n", c_num_x[0], c_num_x[1], c_num_x[2]);
         // printf("c_num_y[%g %g %g]\n", c_num_y[0], c_num_y[1], c_num_y[2]);

         // TMP
         // for(i = 0; i < 3; i++)
         //     c_num_x[i] = c_num_y[i] = wx[i] = wy[i] = w[i] = 1.0/3.0;

         /**
         for(i = 0; i < 3; i++)
         {
             mid[i][0] = (Coords(Point_of_tri(tri)[i])[0] + Coords(Point_of_tri(tri)[(i+1)%3])[0])*0.5;
             mid[i][1] = (Coords(Point_of_tri(tri)[i])[1] + Coords(Point_of_tri(tri)[(i+1)%3])[1])*0.5;
             Ax_edge[i][0] = 2.0*(mid[i][0] - cent[0]);
             Ax_edge[i][1] = (mid[i][1] - cent[1]);
             Ay_edge[i][0] = (mid[i][0] - cent[0]);
             Ay_edge[i][1] = 2.0*(mid[i][1] - cent[1]);
         }
         **/

         /**
         {
             float tmpuxx[4], tmpuxy_0[4], tmpuxy_1[4], tmpuyy[4];
             Shu_V_sten_coeff(tri, midsoln, rk_iter, Lxave, nbLxave, tmpuxx, tmpuxy_0);
             Shu_V_sten_coeff(tri, midsoln, rk_iter, Lyave, nbLyave, tmpuxy_1, tmpuyy);
             for(k = 0; k < N_EQN; k++)
             {
                 u3 = tmpuxx[k];
                 u3 /= 2.0;
                 u4 = minmod2((1+0.05)*minmod2(tmpuxy_0[k],tmpuxy_1[k]), 0.5*(tmpuxy_0[k] + tmpuxy_1[k])); 
                 // u4 /= 2.0;
                 u5 = tmpuyy[k];
                 u5 /= 2.0;
                 switch(k)
                 {
                 case 0:
                     dg_Dens(st2)[3] = u3;
                     dg_Dens(st2)[4] = u4;
                     dg_Dens(st2)[5] = u5;
                 // TMP
                     printf("fixed u_xx, u_xy, u_yy coeff: %g %g %g\n\n",
                        dg_Dens(st2)[3], dg_Dens(st2)[4],dg_Dens(st2)[5]);
                 // END TMP
                 break;
                 case 1:
                     dg_Mom(st2)[0][3] = u3;
                     dg_Mom(st2)[0][4] = u4;
                     dg_Mom(st2)[0][5] = u5;
                 break;
                 case 2:
                     dg_Mom(st2)[1][3] = u3;
                     dg_Mom(st2)[1][4] = u4;
                     dg_Mom(st2)[1][5] = u5;
                 break;
                 case 3:
                     dg_Energy(st2)[3] = u3;
                     dg_Energy(st2)[4] = u4;
                     dg_Energy(st2)[5] = u5;
                 break;
                 }
             }
             return;
         }
         **/

         for(k = 0; k < N_EQN; k++)
         {
             for(i = 0; i < tri_n; i++)
             {
                 rside[0] = nbLxave[i][k] - Lxave[k];
                 rside[1] = nbLxave[(i+1)%tri_n][k] - Lxave[k];
                 comp_coef2(Ax,rside,coefx[i],i);

                 rside[0] = nbLyave[i][k] - Lyave[k];
                 rside[1] = nbLyave[(i+1)%tri_n][k] - Lyave[k];
                 comp_coef2(Ay,rside,coefy[i],i);
             }

             // TMP
             if(debug == YES && k == 0)
             {
                 printf("u_xx candidate   : %g %g %g\n", coefx[0][0], coefx[1][0], coefx[2][0]);
                 printf("u_xy candidate(1): %g %g %g\n", coefx[0][1], coefx[1][1], coefx[2][1]);
                 printf("u_xy candidate(2): %g %g %g\n", coefy[0][0], coefy[1][0], coefy[2][0]);
                 printf("u_yy candidate   : %g %g %g\n", coefy[0][1], coefy[1][1], coefy[2][1]);
             }
             // END TMP

             /////// limit by edge center values
             /**
             limit_by_edge_cent_val(coefx,Ax_edge,rside);
             u3 = rside[0]; u4_0 = rside[1];
             limit_by_edge_cent_val(coefy,Ay_edge,rside);
             u4_1 = rside[0]; u5 = rside[1];
             u4 = minmod2((1+0.05)*minmod2(u4_0,u4_1), 0.5*(u4_0 + u4_1));
             sv_coef[0] = u3; sv_coef[1] = u4; sv_coef[2] = u5; 
             **/
             /////// END of limit by edge center values

             //////////// WENO weight ///////////////
             // if(debugging("weno_w"))
             {
                 for(i = 0; i < tri_n; i++)
                 {
                     arrya[i] = coefx[i][0];
                     arryb[i] = coefx[i][1];
                 }
                 WENO_mod_on_2nd(arrya,arryb,c_num_x,tri_n,diam,w);
                 // WENO_mod_cand1_P3(arrya,c_num_x,3,sqr_diam,w);
                 // u3   = w[0]*coefx[0][0] + w[1]*coefx[1][0] + w[2]*coefx[2][0];
                 // u3 = (coefx[0][0] + coefx[1][0] + coefx[2][0])/3.0;
                 // WENO_mod_cand1_P3(arryb,c_num_x,3,sqr_diam,w);
                 // u4_0 = w[0]*coefx[0][1] + w[1]*coefx[1][1] + w[2]*coefx[2][1];
                 // u4 = (coefx[0][1] + coefx[1][1] + coefx[2][1] + 
                 //       coefy[0][0] + coefy[1][0] + coefy[2][0])/6.0;

                 u3 = 0.0, u4_0 = 0.0;
                 for(i = 0; i < tri_n; i++)
                 {
                     u3 += w[i]*coefx[i][0];
                     u4_0 += w[i]*coefx[i][1];
                 }

                 if(detect_extr == YES && is_bad_stenx[k] == YES)
                 {
                     u3 = 0.0; u4_0 = 0.0;
                 }

                 for(i = 0; i < tri_n; i++)
                 {
                     arrya[i] = coefy[i][0];
                     arryb[i] = coefy[i][1];
                 }
                 WENO_mod_on_2nd(arrya,arryb,c_num_y,tri_n,diam,w);
                 // WENO_mod_cand1_P3(arrya,c_num_y,3,sqr_diam,w);
                 // u4_1 = w[0]*coefy[0][0] + w[1]*coefy[1][0] + w[2]*coefy[2][0];
                 // WENO_mod_cand1_P3(arryb,c_num_y,3,sqr_diam,w);
                 // u5   = w[0]*coefy[0][1] + w[1]*coefy[1][1] + w[2]*coefy[2][1];
                 // u5 =   (coefy[0][1] + coefy[1][1] + coefy[2][1])/3.0;

                 u4_1 = 0.0; u5 = 0.0;
                 for(i = 0; i < tri_n; i++)
                 {
                     u4_1 += w[i]*coefy[i][0];
                     u5 += w[i]*coefy[i][1];
                 }

                 if(detect_extr == YES && is_bad_steny[k] == YES)
                 {
                     u4_1 = 0.0; u5 = 0.0;
                 }

                 u4 = minmod2((1+0.05)*minmod2(u4_0,u4_1), 0.5*(u4_0 + u4_1)); // 0.05, 0.01
                 // sv_coef[0] = u3; sv_coef[1] = u4; sv_coef[2] = u5; 
             }
             //////////// END WENO weight ///////////

             ////////// Centered biased ////////////////////
         /*
         if(debugging("cent_bias"))
         {
             for(i = 0; i < 3; i++)
             {
                 arrya[i] = coefx[i][0];
                 arryb[i] = coefx[i][1];
             }
             // avg1 = 1.0/3.0*(coefx[0][0] + coefx[1][0] + coefx[2][0]);
             // harmonic_mod(arrya,c_num_x,3,wx);
             avg1 = (wx[0]*coefx[0][0] + wx[1]*coefx[1][0] + wx[2]*coefx[2][0]);
             u3 = minmod(coefx[0][0],coefx[1][0]);
             u3 = minmod(coefx[2][0],u3);
             //eps = fabs(max(max(coefx[0][0], coefx[1][0]),coefx[2][0]) - 
             //           min(min(coefx[0][0], coefx[1][0]),coefx[2][0]));
             //eps = 10.0*diam/(diam + sqr(eps));
             u3 = minmod(((1+eps)*u3), avg1);
             // u3 = minmod(((1+eps)*u3), sv_coef[0]);

             // avg2 = 1.0/3.0*(coefx[0][1] + coefx[1][1] + coefx[2][1]);
             // harmonic_mod(arryb,c_num_x,3,wx);
             avg2 = (wx[0]*coefx[0][1] + wx[1]*coefx[1][1] + wx[2]*coefx[2][1]);
             u4_0 = minmod(coefx[0][1],coefx[1][1]);
            