• Main Page
  • Classes
  • Files
  • File List
  • File Members

init_mesh.c

Go to the documentation of this file.
00001 #include "copyright.h"
00002 /*============================================================================*/
00003 /*! \file init_mesh.c
00004  *  \brief General initialization of the nested mesh hierarchy. 
00005  *
00006  * PURPOSE: General initialization of the nested mesh hierarchy.  Works for both
00007  *   nested and uniform meshes, on single and multiple processors.  Each Mesh
00008  *   contains one or more levels, each level contains one or more Domains
00009  *   (regions with the same grid resolution), and depending on the
00010  *   parallelization each Domain contains one or more Grids (however there can
00011  *   never be more than one Grid per Domain being updated on any given 
00012  *   processor).  In the Mesh, this hierarchy is stored as an "array" of Domains
00013  *   indexed as Domain[nlevel][ndomain].  Since ndomain is different for each
00014  *   level, this "array" is not square, (really it is nlevel pointers, each to
00015  *   ndomain[nlevel] Domains).
00016  *
00017  *   Note for a uniform mesh on a single processor:
00018  *   - # of Mesh levels = # of Domains = # of Grids = 1
00019  *   For a uniform mesh on multiple processors:
00020  *   - # of Mesh levels = # of Domains = 1; # of Grids = # of processors
00021  *   For a nested mesh on a single processor:
00022  *   - # of Domains = # of Grids
00023  *
00024  *   For a nested mesh on multiple processors, there is no relationship between
00025  *   these quantaties in general.
00026  *
00027  *   This function: 
00028  *  - (1) sets properties of each Domain read from <domain> blocks in input file
00029  *  - (2) allocates and initializes the array of Domains,
00030  *  - (3) divides each Domain into one or more Grids depending on the
00031  *        parallelization.
00032  *
00033  *   This function supercedes init_domain() from v3.2.
00034  *   The init_grid() function initializes the data in each Grid structure in 
00035  *   each Domain, including finding all child and parent Grids with SMR.
00036  *
00037  * CONTAINS PUBLIC FUNCTIONS: 
00038  * - init_mesh()
00039  * - get_myGridIndex()                                                        
00040  *
00041  * PRIVATE FUNCTION PROTOTYPES:
00042  * - dom_decomp()    - calls auto domain decomposition functions 
00043  * - dom_decomp_2d() - finds optimum domain decomposition in 2D 
00044  * - dom_decomp_3d() - finds optimum domain decomposition in 3D               */
00045 /*============================================================================*/
00046 
00047 #include <math.h>
00048 #include <stdlib.h>
00049 #include "defs.h"
00050 #include "athena.h"
00051 #include "globals.h"
00052 #include "prototypes.h"
00053 
00054 /*==============================================================================
00055  * PRIVATE FUNCTION PROTOTYPES:
00056  *   dom_decomp()    - calls auto domain decomposition functions 
00057  *   dom_decomp_2d() - finds optimum domain decomposition in 2D 
00058  *   dom_decomp_3d() - finds optimum domain decomposition in 3D 
00059  *============================================================================*/
00060 #ifdef MPI_PARALLEL
00061 /*! \fn static int dom_decomp(const int Nx, const int Ny, const int Nz,
00062  *                            const int Np, int *pNGx, int *pNGy, int *pNGz)
00063  *  \brief calls auto domain decomposition functions */
00064 static int dom_decomp(const int Nx, const int Ny, const int Nz,const int Np,
00065   int *pNGx, int *pNGy, int *pNGz);
00066 
00067 /*! \fn static int dom_decomp_2d(const int Nx, const int Ny, const int Np,
00068  *                               int *pNGx, int *pNGy)
00069  *  \brief finds optimum domain decomposition in 2D */
00070 static int dom_decomp_2d(const int Nx, const int Ny, const int Np,
00071   int *pNGx, int *pNGy);
00072 
00073 /*! \fn static int dom_decomp_3d(const int Nx, const int Ny, const int Nz, 
00074  *                               const int Np, int *pNGx, int *pNGy, int *pNGz) 
00075  *  \brief finds optimum domain decomposition in 3D  */
00076 static int dom_decomp_3d(const int Nx, const int Ny, const int Nz, const int Np,
00077   int *pNGx, int *pNGy, int *pNGz);
00078 #endif
00079 
00080 /*----------------------------------------------------------------------------*/
00081 /*! \fn void init_mesh(MeshS *pM)
00082  *  \brief General initialization of the nested mesh hierarchy.               */
00083 
00084 void init_mesh(MeshS *pM)
00085 {
00086   int nblock,num_domains,nd,nl,level,maxlevel=0,nd_this_level;
00087   int nDim,nDim_test,dim;
00088   int *next_domainid;
00089   char block[80];
00090   int ncd,ir,irefine,l,m,n,roffset;
00091   int i,Nx[3],izones;
00092   div_t xdiv[3];  /* divisor with quot and rem members */
00093   Real root_xmin[3], root_xmax[3];  /* min/max of x in each dir on root grid */
00094   int Nproc_Comm_world=1,nproc=0,next_procID;
00095   SideS D1,D2;
00096   DomainS *pD, *pCD;
00097 #ifdef MPI_PARALLEL
00098   int ierr,child_found,groupn,Nranks,Nranks0,max_rank,irank,*ranks;
00099   MPI_Group world_group;
00100 
00101 /* Get total # of processes, in MPI_COMM_WORLD */
00102   ierr = MPI_Comm_size(MPI_COMM_WORLD, &Nproc_Comm_world);
00103 #endif
00104 
00105 /* Start by initializing some quantaties in Mesh structure */
00106 
00107   pM->time = 0.0;
00108   pM->nstep = 0;
00109   pM->outfilename = par_gets("job","problem_id");
00110 
00111 /*--- Step 1: Figure out how many levels and domains there are. --------------*/
00112 /* read levels of each domain block in input file and calculate max level */
00113 
00114   num_domains = par_geti("job","num_domains");
00115 #ifndef STATIC_MESH_REFINEMENT
00116   if (num_domains > 1) 
00117     ath_error("[init_mesh]: num_domains=%d; for num_domains > 1 configure with --enable-smr\n",num_domains);
00118 #endif
00119 
00120   for (nblock=1; nblock<=num_domains; nblock++){
00121     sprintf(block,"domain%d",nblock);
00122     if (par_exist(block,"level") == 0)
00123       ath_error("[init_mesh]: level does not exist in block %s\n",block);
00124     level = par_geti(block,"level");
00125     maxlevel = MAX(maxlevel,level);
00126   }
00127 
00128 /* set number of levels in Mesh, and allocate DomainsPerLevel array */
00129 
00130   pM->NLevels = maxlevel + 1;  /* level counting starts at 0 */
00131 
00132   pM->DomainsPerLevel = (int*)calloc_1d_array(pM->NLevels,sizeof(int));
00133   if (pM->DomainsPerLevel == NULL)
00134     ath_error("[init_mesh]: malloc returned a NULL pointer\n");
00135 
00136 /* Now figure out how many domains there are at each level */
00137 
00138   for (nl=0; nl<=maxlevel; nl++){
00139     nd_this_level=0;
00140     for (nblock=1; nblock<=num_domains; nblock++){
00141       sprintf(block,"domain%d",nblock);
00142       if (par_geti(block,"level") == nl) nd_this_level++; 
00143     }
00144 
00145 /* Error if there are any levels with no domains.  Else set DomainsPerLevel */
00146 
00147     if (nd_this_level == 0) {
00148       ath_error("[init_mesh]: Level %d has zero domains\n",nl);
00149     } else {
00150       pM->DomainsPerLevel[nl] = nd_this_level;
00151     }
00152 if (myID_Comm_world==0){
00153 printf("level=%d, domains=%d\n",nl,pM->DomainsPerLevel[nl]);
00154 }
00155   }
00156 
00157 /*--- Step 2: Set up root level.  --------------------------------------------*/
00158 /* Find the <domain> block in the input file corresponding to the root level,
00159  * and set root level properties in Mesh structure  */
00160 
00161   if (pM->DomainsPerLevel[0] != 1)
00162     ath_error("[init_mesh]: Level 0 has %d domains\n",pM->DomainsPerLevel[0]);
00163 
00164   for (nblock=1; nblock<=num_domains; nblock++){
00165     sprintf(block,"domain%d",nblock);
00166     level = par_geti(block,"level");
00167     if (level == 0){
00168       root_xmin[0] = par_getd(block,"x1min");
00169       root_xmax[0] = par_getd(block,"x1max");
00170       root_xmin[1] = par_getd(block,"x2min");
00171       root_xmax[1] = par_getd(block,"x2max");
00172       root_xmin[2] = par_getd(block,"x3min");
00173       root_xmax[2] = par_getd(block,"x3max");
00174       Nx[0] = par_geti(block,"Nx1");
00175       Nx[1] = par_geti(block,"Nx2");
00176       Nx[2] = par_geti(block,"Nx3");
00177 
00178 /* number of dimensions of root level, to test against all other inputs */
00179       nDim=0;
00180       for (i=0; i<3; i++) if (Nx[i]>1) nDim++;
00181       if (nDim==0) ath_error("[init_mesh] None of Nx1,Nx2,Nx3 > 1\n");
00182 
00183 /* some error tests of root grid */
00184 
00185       for (i=0; i<3; i++) {
00186         if (Nx[i] < 1) {
00187           ath_error("[init_mesh]: Nx%d in %s must be >= 1\n",(i+1),block);
00188         }
00189         if(root_xmax[i] < root_xmin[i]) {
00190           ath_error("[init_mesh]: x%dmax < x%dmin in %s\n",(i+1),block);
00191         }
00192       }
00193       if (nDim==1 && Nx[0]==1) {
00194         ath_error("[init_mesh]:1D requires Nx1>1: in %s Nx1=1,Nx2=%d,Nx3=%d\n",
00195         block,Nx[1],Nx[2]);
00196       }
00197       if (nDim==2 && Nx[2]>1) {ath_error(
00198         "[init_mesh]:2D requires Nx1,Nx2>1: in %s Nx1=%d,Nx2=%d,Nx3=%d\n",
00199         block,Nx[0],Nx[1],Nx[2]);
00200       }
00201 
00202 /* Now that everything is OK, set root grid properties in Mesh structure  */
00203 
00204       for (i=0; i<3; i++) {
00205         pM->Nx[i] = Nx[i];
00206         pM->RootMinX[i] = root_xmin[i];
00207         pM->RootMaxX[i] = root_xmax[i];
00208         pM->dx[i] = (root_xmax[i] - root_xmin[i])/(Real)(Nx[i]);
00209       }
00210 
00211 /* Set BC flags on root domain */
00212 
00213       pM->BCFlag_ix1 = par_geti_def(block,"bc_ix1",0);
00214       pM->BCFlag_ix2 = par_geti_def(block,"bc_ix2",0);
00215       pM->BCFlag_ix3 = par_geti_def(block,"bc_ix3",0);
00216       pM->BCFlag_ox1 = par_geti_def(block,"bc_ox1",0);
00217       pM->BCFlag_ox2 = par_geti_def(block,"bc_ox2",0);
00218       pM->BCFlag_ox3 = par_geti_def(block,"bc_ox3",0);
00219     }
00220   }
00221 
00222 /*--- Step 3: Allocate and initialize domain array. --------------------------*/
00223 /* Allocate memory and set pointers for Domain array in Mesh.  Since the
00224  * number of domains nd depends on the level nl, this is a strange array
00225  * because it is not [nl]x[nd].  Rather it is nl pointers to nd[nl] Domains.
00226  * Compare to the calloc_2d_array() function in ath_array.c
00227  */
00228       
00229   if((pM->Domain = (DomainS**)calloc((maxlevel+1),sizeof(DomainS*))) == NULL){
00230     ath_error("[init_mesh] failed to allocate memory for %d Domain pointers\n",
00231      (maxlevel+1));
00232   }
00233 
00234   if((pM->Domain[0]=(DomainS*)calloc(num_domains,sizeof(DomainS))) == NULL){
00235     ath_error("[init_mesh] failed to allocate memory for Domains\n");
00236   }
00237 
00238   for(nl=1; nl<=maxlevel; nl++)
00239     pM->Domain[nl] = (DomainS*)((unsigned char *)pM->Domain[nl-1] +
00240       pM->DomainsPerLevel[nl-1]*sizeof(DomainS));
00241 
00242 /* Loop over every <domain> block in the input file, and initialize each Domain
00243  * in the mesh hierarchy (the Domain array), including the root level Domain  */
00244 
00245   next_domainid = (int*)calloc_1d_array(pM->NLevels,sizeof(int));
00246   for(nl=0; nl<=maxlevel; nl++) next_domainid[nl] = 0;
00247 
00248   for (nblock=1; nblock<=num_domains; nblock++){
00249     sprintf(block,"domain%d",nblock);
00250 
00251 /* choose nd coordinate in Domain array for this <domain> block according
00252  * to the order it appears in input */
00253 
00254     nl = par_geti(block,"level");
00255 if(myID_Comm_world==0){
00256 printf("level=%d next_domainid=%d pM->DomainsPerLevel=%d\n",
00257 nl,next_domainid[nl],pM->DomainsPerLevel[nl]);
00258 }
00259     if (next_domainid[nl] > (pM->DomainsPerLevel[nl])-1)
00260       ath_error("[init_mesh]: Exceeded available domain ids on level %d\n",nl);
00261     nd = next_domainid[nl];
00262     next_domainid[nl]++;
00263     irefine = 1;
00264     for (ir=1;ir<=nl;ir++) irefine *= 2;   /* C pow fn only takes doubles !! */
00265 
00266 /* Initialize level, number, input <domain> block number, and total number of
00267  * cells in this Domain */
00268 
00269     pM->Domain[nl][nd].Level = nl;
00270     pM->Domain[nl][nd].DomNumber = nd;
00271     pM->Domain[nl][nd].InputBlock = nblock;
00272 
00273     pM->Domain[nl][nd].Nx[0] = par_geti(block,"Nx1");
00274     pM->Domain[nl][nd].Nx[1] = par_geti(block,"Nx2");
00275     pM->Domain[nl][nd].Nx[2] = par_geti(block,"Nx3");
00276 
00277 /* error tests: dimensions of domain */
00278 
00279     nDim_test=0;
00280     for (i=0; i<3; i++) if (pM->Domain[nl][nd].Nx[i]>1) nDim_test++;
00281     if (nDim_test != nDim) {
00282       ath_error("[init_mesh]: in %s grid is %dD, but in root level it is %dD\n",
00283       block,nDim_test,nDim);
00284     }
00285     for (i=0; i<3; i++) {
00286       if (pM->Domain[nl][nd].Nx[i] < 1) {
00287         ath_error("[init_mesh]: %s/Nx%d = %d must be >= 1\n",
00288           block,(i+1),pM->Domain[nl][nd].Nx[i]);
00289       }
00290     }
00291     if (nDim==1 && pM->Domain[nl][nd].Nx[0]==1) {ath_error(
00292       "[init_mesh]: 1D requires Nx1>1 but in %s Nx1=1,Nx2=%d,Nx3=%d\n",
00293       block,pM->Domain[nl][nd].Nx[1],pM->Domain[nl][nd].Nx[2]);
00294     }
00295     if (nDim==2 && pM->Domain[nl][nd].Nx[2]>1) {ath_error(
00296       "[init_mesh]:2D requires Nx1,Nx2 > 1 but in %s Nx1=%d,Nx2=%d,Nx3=%d\n",
00297       block,pM->Domain[nl][nd].Nx[0],pM->Domain[nl][nd].Nx[1],
00298       pM->Domain[nl][nd].Nx[2]);
00299     }
00300     for (i=0; i<nDim; i++) {
00301       xdiv[i] = div(pM->Domain[nl][nd].Nx[i], irefine);
00302       if (xdiv[i].rem != 0){
00303         ath_error("[init_mesh]: %s/Nx%d = %d must be divisible by %d\n",
00304           block,(i+1),pM->Domain[nl][nd].Nx[i],irefine);
00305       }
00306     }
00307 
00308 /* Set cell size based on level of domain, but only if Ncell > 1 */
00309 
00310     for (i=0; i<3; i++) {
00311       if (pM->Domain[nl][nd].Nx[i] > 1) {
00312         pM->Domain[nl][nd].dx[i] = pM->dx[i]/(Real)(irefine);
00313       } else {
00314         pM->Domain[nl][nd].dx[i] = pM->dx[i];
00315       }
00316     }
00317 
00318 /* Set displacement of Domain from origin. By definition, root level has 0
00319  * displacement, so only read for levels other than root  */
00320 
00321     for (i=0; i<3; i++) pM->Domain[nl][nd].Disp[i] = 0;
00322     if (nl != 0) {  
00323       if (par_exist(block,"iDisp") == 0)
00324         ath_error("[init_mesh]: iDisp does not exist in block %s\n",block);
00325       pM->Domain[nl][nd].Disp[0] = par_geti(block,"iDisp");
00326 
00327 /* jDisp=0 if problem is only 1D */
00328       if (pM->Nx[1] > 1) {
00329         if (par_exist(block,"jDisp") == 0)
00330           ath_error("[init_mesh]: jDisp does not exist in block %s\n",block);
00331         pM->Domain[nl][nd].Disp[1] = par_geti(block,"jDisp");
00332       }
00333 
00334 /* kDisp=0 if problem is only 2D */
00335       if (pM->Nx[2] > 1) {
00336         if (par_exist(block,"kDisp") == 0)
00337           ath_error("[init_mesh]: kDisp does not exist in block %s\n",block);
00338         pM->Domain[nl][nd].Disp[2] = par_geti(block,"kDisp");
00339       }
00340     }
00341 
00342     for (i=0; i<nDim; i++) {
00343       xdiv[i] = div(pM->Domain[nl][nd].Disp[i], irefine);
00344       if (xdiv[i].rem != 0){
00345         ath_error("[init_mesh]: %s/Disp%d = %d must be divisible by %d\n",
00346           block,(i+1),pM->Domain[nl][nd].Disp[i],irefine);
00347       }
00348     }
00349 
00350 /* Use cell size and displacement from origin to compute min/max of x1/x2/x3 on
00351  * this domain.  Ensure that if Domain touches root grid boundary, the min/max
00352  * of this Domain are set IDENTICAL to values in root grid  */
00353 
00354     for (i=0; i<3; i++){
00355 
00356       if (pM->Domain[nl][nd].Disp[i] == 0) {
00357         pM->Domain[nl][nd].MinX[i] = root_xmin[i];
00358       } else { 
00359         pM->Domain[nl][nd].MinX[i] = root_xmin[i] 
00360           + ((Real)(pM->Domain[nl][nd].Disp[i]))*pM->Domain[nl][nd].dx[i];
00361       }
00362 
00363       izones= (pM->Domain[nl][nd].Disp[i] + pM->Domain[nl][nd].Nx[i])/irefine;
00364       if(izones == pM->Nx[i]){
00365         pM->Domain[nl][nd].MaxX[i] = root_xmax[i];
00366       } else {
00367         pM->Domain[nl][nd].MaxX[i] = pM->Domain[nl][nd].MinX[i] 
00368           + ((Real)(pM->Domain[nl][nd].Nx[i]))*pM->Domain[nl][nd].dx[i];
00369       }
00370 
00371       pM->Domain[nl][nd].RootMinX[i] = root_xmin[i];
00372       pM->Domain[nl][nd].RootMaxX[i] = root_xmax[i];
00373     }
00374 
00375   }  /*---------- end loop over domain blocks in input file ------------------*/
00376     
00377 /*--- Step 4: Check that domains on the same level are non-overlapping. ------*/
00378 /* Compare the integer coordinates of the sides of Domains at the same level.
00379  * Print error if Domains overlap or touch. */
00380 
00381   for (nl=maxlevel; nl>0; nl--){     /* start at highest level, and skip root */
00382   for (nd=0; nd<(pM->DomainsPerLevel[nl])-1; nd++){
00383     for (i=0; i<3; i++) {
00384       D1.ijkl[i] = pM->Domain[nl][nd].Disp[i];
00385       D1.ijkr[i] = pM->Domain[nl][nd].Disp[i] + pM->Domain[nl][nd].Nx[i];
00386     }
00387 
00388     for (ncd=nd+1; ncd<(pM->DomainsPerLevel[nl]); ncd++) {
00389       for (i=0; i<3; i++) {
00390         D2.ijkl[i] = pM->Domain[nl][ncd].Disp[i];
00391         D2.ijkr[i] = pM->Domain[nl][ncd].Disp[i] + pM->Domain[nl][ncd].Nx[i];
00392       }
00393 
00394       if (D1.ijkl[0] <= D2.ijkr[0] && D1.ijkr[0] >= D2.ijkl[0] &&
00395           D1.ijkl[1] <= D2.ijkr[1] && D1.ijkr[1] >= D2.ijkl[1] &&
00396           D1.ijkl[2] <= D2.ijkr[2] && D1.ijkr[2] >= D2.ijkl[2]){
00397           ath_error("Domains %d and %d at same level overlap or touch\n",
00398           pM->Domain[nl][nd].InputBlock,pM->Domain[nl][ncd].InputBlock);
00399       }
00400     }
00401   }}
00402 
00403 /*--- Step 5: Check for illegal geometry of child/parent Domains -------------*/
00404 
00405   for (nl=0; nl<maxlevel; nl++){
00406   for (nd=0; nd<pM->DomainsPerLevel[nl]; nd++){
00407     pD = (DomainS*)&(pM->Domain[nl][nd]);  /* set ptr to this Domain */
00408 
00409     for (i=0; i<3; i++) {
00410       D1.ijkl[i] = pD->Disp[i];
00411       D1.ijkr[i] = pD->Disp[i] + pD->Nx[i];
00412     }
00413 
00414     for (ncd=0; ncd<pM->DomainsPerLevel[nl+1]; ncd++){
00415       pCD = (DomainS*)&(pM->Domain[nl+1][ncd]);  /* set ptr to potential child*/
00416 
00417       for (i=0; i<3; i++) {
00418         D2.ijkl[i] = pCD->Disp[i]/2;
00419         D2.ijkr[i] = 1;
00420         if (pCD->Nx[i] > 1) D2.ijkr[i] = (pCD->Disp[i] + pCD->Nx[i])/2;
00421       }
00422 
00423       if (D1.ijkl[0] <= D2.ijkr[0] && D1.ijkr[0] >= D2.ijkl[0] &&
00424           D1.ijkl[1] <= D2.ijkr[1] && D1.ijkr[1] >= D2.ijkl[1] &&
00425           D1.ijkl[2] <= D2.ijkr[2] && D1.ijkr[2] >= D2.ijkl[2]){
00426 
00427 /* check for child Domains that touch edge of parent (and are not at edges of
00428  * root), extends past edge of parent, or are < nghost/2 from edge of parent  */
00429 
00430         for (dim=0; dim<nDim; dim++){
00431           irefine = 1;
00432           for (i=1;i<=nl;i++) irefine *= 2; /* parent refinement lev */
00433           roffset = (pCD->Disp[dim] + pCD->Nx[dim])/(2*irefine) - pM->Nx[dim];
00434 
00435           if (((D2.ijkl[dim] == D1.ijkl[dim]) && (pD->Disp[dim] != 0)) ||
00436               ((D2.ijkr[dim] == D1.ijkr[dim]) && (roffset != 0))) {
00437             for (i=0; i<nDim; i++) {
00438               D1.ijkl[i] /= irefine;  /* report indices scaled to root */
00439               D1.ijkr[i] /= irefine;
00440               D2.ijkl[i] /= irefine; 
00441               D2.ijkr[i] /= irefine;
00442             }
00443             ath_error("[init_mesh] child Domain D%d[is,ie,js,je,ks,ke]=[%d %d %d %d %d %d] touches parent D%d[is,ie,js,je,ks,ke]=[%d %d %d %d %d %d]\n",
00444               pCD->InputBlock,D2.ijkl[0],D2.ijkr[0],D2.ijkl[1],D2.ijkr[1],
00445               D2.ijkl[2],D2.ijkr[2],pD->InputBlock,D1.ijkl[0],D1.ijkr[0],
00446               D1.ijkl[1],D1.ijkr[1],D1.ijkl[2],D1.ijkr[2]);
00447           }
00448 
00449           if ((D2.ijkl[dim] < D1.ijkl[dim]) ||
00450               (D2.ijkr[dim] > D1.ijkr[dim])) {
00451             for (i=0; i<nDim; i++) {
00452               D1.ijkl[i] /= irefine;  /* report indices scaled to root */
00453               D1.ijkr[i] /= irefine;
00454               D2.ijkl[i] /= irefine; 
00455               D2.ijkr[i] /= irefine;
00456             }
00457             ath_error("[init_mesh] child Domain D%d[is,ie,js,je,ks,ke]=[%d %d %d %d %d %d] extends past parent D%d[is,ie,js,je,ks,ke]=[%d %d %d %d %d %d]\n",
00458               pCD->InputBlock,D2.ijkl[0],D2.ijkr[0],D2.ijkl[1],D2.ijkr[1],
00459               D2.ijkl[2],D2.ijkr[2],pD->InputBlock,D1.ijkl[0],D1.ijkr[0],
00460               D1.ijkl[1],D1.ijkr[1],D1.ijkl[2],D1.ijkr[2]);
00461           }
00462 
00463           if (((2*(D2.ijkl[dim]-D1.ijkl[dim]) < nghost) &&
00464                (2*(D2.ijkl[dim]-D1.ijkl[dim]) > 0     )) ||
00465               ((2*(D1.ijkr[dim]-D2.ijkr[dim]) < nghost) &&
00466                (2*(D1.ijkr[dim]-D2.ijkr[dim]) > 0     ))) {
00467             for (i=0; i<nDim; i++) {
00468               D1.ijkl[i] /= irefine;  /* report indices scaled to root */
00469               D1.ijkr[i] /= irefine;
00470               D2.ijkl[i] /= irefine; 
00471               D2.ijkr[i] /= irefine;
00472             }
00473             ath_error("[init_mesh] child Domain D%d[is,ie,js,je,ks,ke]=[%d %d %d %d %d %d] closer than nghost/2 to parent D%d[is,ie,js,je,ks,ke]=[%d %d %d %d %d %d]\n",
00474               pCD->InputBlock,D2.ijkl[0],D2.ijkr[0],D2.ijkl[1],D2.ijkr[1],
00475               D2.ijkl[2],D2.ijkr[2],pD->InputBlock,D1.ijkl[0],D1.ijkr[0],
00476               D1.ijkl[1],D1.ijkr[1],D1.ijkl[2],D1.ijkr[2]);
00477           }
00478 
00479         }
00480       }
00481     }
00482   }}
00483 
00484 /*--- Step 6: Divide each Domain into Grids, and allocate to processor(s)  ---*/
00485 /* Get the number of Grids in each direction.  These are given either in the
00486  * <domain?> block in the input file, or by automatic decomposition given the
00487  * number of processor desired for this domain.   */
00488 
00489   next_procID = 0;  /* start assigning processors to Grids at ID=0 */
00490 
00491   for (nl=0; nl<=maxlevel; nl++){
00492     for (nd=0; nd<(pM->DomainsPerLevel[nl]); nd++){
00493       pD = (DomainS*)&(pM->Domain[nl][nd]);  /* set ptr to this Domain */
00494       sprintf(block,"domain%d",pD->InputBlock);
00495 
00496 #ifndef MPI_PARALLEL
00497       for (i=0; i<3; i++) pD->NGrid[i] = 1;
00498 #else
00499       nproc = par_geti_def(block,"AutoWithNProc",0);
00500 
00501 /* Read layout of Grids from input file */
00502 
00503       if (nproc == 0){
00504         pD->NGrid[0] = par_geti_def(block,"NGrid_x1",1);
00505         pD->NGrid[1] = par_geti_def(block,"NGrid_x2",1);
00506         pD->NGrid[2] = par_geti_def(block,"NGrid_x3",1);
00507         if (pD->NGrid[0] == 0)
00508           ath_error("[init_mesh] Cannot enter NGrid_x1=0 in %s\n",block);
00509         if (pD->NGrid[1] == 0)
00510           ath_error("[init_mesh] Cannot enter NGrid_x2=0 in %s\n",block);
00511         if (pD->NGrid[2] == 0)
00512           ath_error("[init_mesh] Cannot enter NGrid_x3=0 in %s\n",block);
00513       }
00514 
00515 /* Auto decompose Domain into Grids.  To use this option, set "AutoWithNProc"
00516  * to number of processors desired for this Domain  */
00517 
00518       else if (nproc > 0){
00519         if(dom_decomp(pD->Nx[0],pD->Nx[1],pD->Nx[2],nproc,
00520            &(pD->NGrid[0]),&(pD->NGrid[1]),&(pD->NGrid[2])))
00521            ath_error("[init_mesh]: Error in automatic Domain decomposition\n");
00522 
00523         /* Store the domain decomposition in the par database */
00524         par_seti(block,"NGrid_x1","%d",pD->NGrid[0],"x1 decomp");
00525         par_seti(block,"NGrid_x2","%d",pD->NGrid[1],"x2 decomp");
00526         par_seti(block,"NGrid_x3","%d",pD->NGrid[2],"x3 decomp");
00527 
00528       } else {
00529         ath_error("[init_mesh] invalid AutoWithNProc=%d in %s\n",nproc,block);
00530       }
00531 #endif /* MPI_PARALLEL */
00532 
00533 /* test for conflicts between number of grids and dimensionality */
00534 
00535       for (i=0; i<3; i++){
00536         if(pD->NGrid[i] > 1 && pD->Nx[i] <= 1)
00537           ath_error("[init_mesh]: %s/NGrid_x%d = %d and Nx%d = %d\n",block,
00538           (i+1),pD->NGrid[i],(i+1),pD->Nx[i]);
00539       }
00540 
00541 /* check there are more processors than Grids needed by this Domain. */
00542 
00543       nproc = (pD->NGrid[0])*(pD->NGrid[1])*(pD->NGrid[2]);
00544       if(nproc > Nproc_Comm_world) ath_error(
00545         "[init_mesh]: %d Grids requested by block %s and only %d procs\n"
00546         ,nproc,block,Nproc_Comm_world); 
00547 
00548 /* Build 3D array to store data on Grids in this Domain */
00549 
00550       if ((pD->GData = (GridsDataS***)calloc_3d_array(pD->NGrid[2],pD->NGrid[1],
00551         pD->NGrid[0],sizeof(GridsDataS))) == NULL) ath_error(
00552         "[init_mesh]: GData calloc returned a NULL pointer\n");
00553 
00554 /* Divide the domain into blocks */
00555 
00556       for (i=0; i<3; i++) {
00557         xdiv[i] = div(pD->Nx[i], pD->NGrid[i]);
00558       }
00559 
00560 /* Distribute cells in Domain to Grids.  Assign each Grid to a processor ID in
00561  * the MPI_COMM_WORLD communicator.  For single-processor jobs, there is only
00562  * one ID=0, and the GData array will have only one element. */
00563 
00564       for(n=0; n<(pD->NGrid[2]); n++){
00565       for(m=0; m<(pD->NGrid[1]); m++){
00566       for(l=0; l<(pD->NGrid[0]); l++){
00567         for (i=0; i<3; i++) pD->GData[n][m][l].Nx[i] = xdiv[i].quot;
00568         pD->GData[n][m][l].ID_Comm_world = next_procID++;
00569         if (next_procID > ((Nproc_Comm_world)-1)) next_procID=0;
00570       }}}
00571 
00572 /* If the Domain is not evenly divisible put the extra cells on the first
00573  * Grids in each direction  */
00574 
00575       while (xdiv[0].rem > 0){
00576         for(n=0; n<(pD->NGrid[2]); n++){
00577           for(m=0; m<(pD->NGrid[1]); m++){
00578             pD->GData[n][m][0].Nx[0]++;
00579           }
00580         }
00581         xdiv[0].rem--;
00582       }
00583 
00584       while (xdiv[1].rem > 0){
00585         for(n=0; n<(pD->NGrid[2]); n++){
00586           for(l=0; l<(pD->NGrid[0]); l++){
00587             pD->GData[n][0][l].Nx[1]++;
00588           }
00589         }
00590         xdiv[1].rem--;
00591       }
00592 
00593       while (xdiv[2].rem > 0){
00594         for(m=0; m<(pD->NGrid[1]); m++){
00595           for(l=0; l<(pD->NGrid[0]); l++){
00596             pD->GData[0][m][l].Nx[2]++;
00597           }
00598         }
00599         xdiv[2].rem--;
00600       }
00601 
00602 /* Initialize displacements from origin for each Grid */
00603 
00604       for(n=0; n<(pD->NGrid[2]); n++){
00605         for(m=0; m<(pD->NGrid[1]); m++){
00606           pD->GData[n][m][0].Disp[0] = pD->Disp[0];
00607           for(l=1; l<(pD->NGrid[0]); l++){
00608             pD->GData[n][m][l].Disp[0] = pD->GData[n][m][l-1].Disp[0] + 
00609                                          pD->GData[n][m][l-1].Nx[0];
00610           }
00611         }
00612       }
00613 
00614       for(n=0; n<(pD->NGrid[2]); n++){
00615         for(l=0; l<(pD->NGrid[0]); l++){
00616           pD->GData[n][0][l].Disp[1] = pD->Disp[1];
00617           for(m=1; m<(pD->NGrid[1]); m++){
00618             pD->GData[n][m][l].Disp[1] = pD->GData[n][m-1][l].Disp[1] + 
00619                                          pD->GData[n][m-1][l].Nx[1];
00620           }
00621         }
00622       }
00623 
00624       for(m=0; m<(pD->NGrid[1]); m++){
00625         for(l=0; l<(pD->NGrid[0]); l++){
00626           pD->GData[0][m][l].Disp[2] = pD->Disp[2];
00627           for(n=1; n<(pD->NGrid[2]); n++){
00628             pD->GData[n][m][l].Disp[2] = pD->GData[n-1][m][l].Disp[2] + 
00629                                          pD->GData[n-1][m][l].Nx[2];
00630           }
00631         }
00632       }
00633 
00634     }  /* end loop over ndomains */
00635   }    /* end loop over nlevels */
00636 
00637 /* check that total number of Grids was partitioned evenly over total number of
00638  * MPI processes available (equal to one for single processor jobs) */ 
00639 
00640   if (next_procID != 0)
00641     ath_error("[init_mesh]:total # of Grids != total # of MPI procs\n");
00642 
00643 /*--- Step 7: Allocate a Grid for each Domain on this processor --------------*/
00644 
00645   for (nl=0; nl<=maxlevel; nl++){
00646     for (nd=0; nd<(pM->DomainsPerLevel[nl]); nd++){
00647       pD = (DomainS*)&(pM->Domain[nl][nd]);  /* set ptr to this Domain */
00648       sprintf(block,"domain%d",pD->InputBlock);
00649       pD->Grid = NULL;
00650 
00651 /* Loop over GData array, and if there is a Grid assigned to this proc, 
00652  * allocate it */
00653 
00654       for(n=0; n<(pD->NGrid[2]); n++){
00655       for(m=0; m<(pD->NGrid[1]); m++){
00656       for(l=0; l<(pD->NGrid[0]); l++){
00657         if (pD->GData[n][m][l].ID_Comm_world == myID_Comm_world) {
00658           if ((pD->Grid = (GridS*)malloc(sizeof(GridS))) == NULL)
00659             ath_error("[init_mesh]: Failed to malloc a Grid for %s\n",block);
00660         }
00661       }}}
00662     }
00663   }
00664 
00665 /*--- Step 8: Create an MPI Communicator for each Domain ---------------------*/
00666 
00667 #ifdef MPI_PARALLEL
00668 /* Allocate memory for ranks[] array */
00669 
00670   max_rank = 0;
00671   for (nl=0; nl<=maxlevel; nl++){
00672   for (nd=0; nd<(pM->DomainsPerLevel[nl]); nd++){
00673     Nranks = (pD->NGrid[0])*(pD->NGrid[1])*(pD->NGrid[2]);
00674     max_rank = MAX(max_rank, Nranks);
00675   }}
00676   ranks = (int*)calloc_1d_array(max_rank,sizeof(int));
00677 
00678 /* Extract handle of group defined by MPI_COMM_WORLD communicator */
00679 
00680   ierr = MPI_Comm_group(MPI_COMM_WORLD, &world_group);
00681 
00682   for (nl=0; nl<=maxlevel; nl++){
00683   for (nd=0; nd<(pM->DomainsPerLevel[nl]); nd++){
00684     pD = (DomainS*)&(pM->Domain[nl][nd]);  /* set ptr to this Domain */
00685 
00686 /* Load integer array with ranks of processes in MPI_COMM_WORLD updating Grids
00687  * on this Domain.  The ranks of these processes in the new Comm_Domain
00688  * communicator created below are equal to the indices of this array */
00689 
00690     Nranks = (pD->NGrid[0])*(pD->NGrid[1])*(pD->NGrid[2]);
00691     groupn = 0;
00692 
00693     for(n=0; n<(pD->NGrid[2]); n++){
00694     for(m=0; m<(pD->NGrid[1]); m++){
00695     for(l=0; l<(pD->NGrid[0]); l++){
00696       ranks[groupn] = pD->GData[n][m][l].ID_Comm_world;
00697       pD->GData[n][m][l].ID_Comm_Domain = groupn;
00698       groupn++;
00699     }}}
00700 
00701 /* Create a new group for this Domain; use it to create a new communicator */
00702 
00703 printf("Domain_Comm ProcID=%d Nranks=%d ranks=",myID_Comm_world,Nranks);
00704 for (i=0; i<Nranks; i++) printf("%d ",ranks[i]);
00705 printf("\n");
00706     ierr = MPI_Group_incl(world_group,Nranks,ranks,&(pD->Group_Domain));
00707     ierr = MPI_Comm_create(MPI_COMM_WORLD,pD->Group_Domain,&(pD->Comm_Domain));
00708 
00709 /*
00710 int myrank=0;
00711 for (i=0; i<Nranks; i++){
00712 if (myID_Comm_world == ranks[i]){
00713 ierr = MPI_Comm_rank(pD->Comm_Domain, &myrank);
00714 printf("WorldID=%d Domain_CommID=%d\n",myID_Comm_world,myrank);
00715 }}
00716 */
00717   }}
00718 
00719   free_1d_array(ranks);
00720 #endif /* MPI_PARALLEL */
00721 
00722 /*--- Step 9: Create MPI Communicators for Child and Parent Domains ----------*/
00723 
00724 #if defined(MPI_PARALLEL) && defined(STATIC_MESH_REFINEMENT)
00725 /* Initialize communicators to NULL, since not all Domains use them, and
00726  * allocate memory for ranks[] array */
00727 
00728   for (nl=0; nl<=maxlevel; nl++){
00729     for (nd=0; nd<(pM->DomainsPerLevel[nl]); nd++){
00730       pM->Domain[nl][nd].Comm_Parent = MPI_COMM_NULL;
00731       pM->Domain[nl][nd].Comm_Children = MPI_COMM_NULL;
00732     }
00733   }
00734 
00735   if (maxlevel > 0) {
00736     ranks = (int*)calloc_1d_array(Nproc_Comm_world,sizeof(int));
00737   }
00738 
00739 /* For each Domain up to (maxlevel-1), initialize communicator with children */
00740 
00741   for (nl=0; nl<maxlevel; nl++){
00742   for (nd=0; nd<pM->DomainsPerLevel[nl]; nd++){
00743     pD = (DomainS*)&(pM->Domain[nl][nd]);  /* set ptr to this Domain */
00744     child_found = 0;
00745 
00746 /* Load integer array with ranks of processes in MPI_COMM_WORLD updating Grids
00747  * on this Domain, in case a child Domain is found.  Set IDs in Comm_Children
00748  * communicator based on index in rank array, in case child found.  If no
00749  * child is found these ranks will never be used. */
00750 
00751     Nranks = (pD->NGrid[0])*(pD->NGrid[1])*(pD->NGrid[2]);
00752     groupn = 0;
00753 
00754     for(n=0; n<(pD->NGrid[2]); n++){
00755     for(m=0; m<(pD->NGrid[1]); m++){
00756     for(l=0; l<(pD->NGrid[0]); l++){
00757       ranks[groupn] = pD->GData[n][m][l].ID_Comm_world;
00758       pD->GData[n][m][l].ID_Comm_Children = groupn;
00759       groupn++;
00760     }}}
00761 
00762 /* edges of this Domain */
00763     for (i=0; i<3; i++) {
00764       D1.ijkl[i] = pD->Disp[i];
00765       D1.ijkr[i] = pD->Disp[i] + pD->Nx[i];
00766     }
00767 
00768 /* Loop over all Domains at next level, looking for children of this Domain */
00769 
00770     for (ncd=0; ncd<pM->DomainsPerLevel[nl+1]; ncd++){
00771       pCD = (DomainS*)&(pM->Domain[nl+1][ncd]);  /* set ptr to potential child*/
00772 
00773 /* edges of potential child Domain */
00774       for (i=0; i<3; i++) {
00775         D2.ijkl[i] = pCD->Disp[i]/2;
00776         D2.ijkr[i] = 1;
00777         if (pCD->Nx[i] > 1) D2.ijkr[i] = (pCD->Disp[i] + pCD->Nx[i])/2;
00778       }
00779 
00780       if (D1.ijkl[0] < D2.ijkr[0] && D1.ijkr[0] > D2.ijkl[0] &&
00781           D1.ijkl[1] < D2.ijkr[1] && D1.ijkr[1] > D2.ijkl[1] &&
00782           D1.ijkl[2] < D2.ijkr[2] && D1.ijkr[2] > D2.ijkl[2]){
00783         child_found = 1;
00784 
00785 /* Child found.  Add child processors to ranks array, but only if they are
00786  * different from processes currently there (including parent and any previously
00787  * found children).  Set IDs associated with Comm_Parent communicator, since on 
00788  * the child Domain this is the same as the Comm_Children communicator on the
00789  * parent Domain  */
00790 
00791         for(n=0; n<(pCD->NGrid[2]); n++){
00792         for(m=0; m<(pCD->NGrid[1]); m++){
00793         for(l=0; l<(pCD->NGrid[0]); l++){
00794           irank = -1;
00795           for (i=0; i<Nranks; i++) {
00796             if(pCD->GData[n][m][l].ID_Comm_world == ranks[i]) irank = i;
00797           }
00798           if (irank == -1) {
00799             ranks[groupn] = pCD->GData[n][m][l].ID_Comm_world;
00800             pCD->GData[n][m][l].ID_Comm_Parent = groupn;
00801             groupn++;
00802             Nranks++;
00803           } else {
00804             pCD->GData[n][m][l].ID_Comm_Parent = ranks[irank];
00805           }
00806         }}}
00807       }
00808     }
00809 
00810 /* After looping over all potential child Domains, create a new communicator if
00811  * a child was found */
00812 
00813     if (child_found == 1) {
00814 printf("Children_Comm ProcID=%d Nranks=%d ranks=",myID_Comm_world,Nranks);
00815 for (i=0; i<Nranks; i++) printf("%d ",ranks[i]);
00816 printf("\n");
00817       ierr = MPI_Group_incl(world_group, Nranks, ranks, &(pD->Group_Children));
00818       ierr = MPI_Comm_create(MPI_COMM_WORLD,pD->Group_Children,
00819         &pD->Comm_Children);
00820 
00821 /*
00822 int myrank=0;
00823 for (i=0; i<Nranks; i++){
00824 if (myID_Comm_world == ranks[i]){
00825 ierr = MPI_Comm_rank(pD->Comm_Children, &myrank);
00826 printf("WorldID=%d Children_CommID=%d\n",myID_Comm_world,myrank);
00827 }}
00828 */
00829 /* Loop over children to set Comm_Parent communicators */
00830 
00831       for (ncd=0; ncd<pM->DomainsPerLevel[nl+1]; ncd++){
00832         pCD = (DomainS*)&(pM->Domain[nl+1][ncd]);  
00833 
00834         for (i=0; i<3; i++) {
00835           D2.ijkl[i] = pCD->Disp[i]/2;
00836           D2.ijkr[i] = 1;
00837           if (pCD->Nx[i] > 1) D2.ijkr[i] = (pCD->Disp[i] + pCD->Nx[i])/2;
00838         }
00839 
00840         if (D1.ijkl[0] < D2.ijkr[0] && D1.ijkr[0] > D2.ijkl[0] &&
00841             D1.ijkl[1] < D2.ijkr[1] && D1.ijkr[1] > D2.ijkl[1] &&
00842             D1.ijkl[2] < D2.ijkr[2] && D1.ijkr[2] > D2.ijkl[2]){
00843           pCD->Comm_Parent = pD->Comm_Children;
00844         }
00845       }
00846     }
00847   }}
00848 
00849 #endif /* MPI_PARALLEL & STATIC_MESH_REFINEMENT  */
00850 
00851   free(next_domainid);
00852   return;
00853 }
00854 
00855 /*----------------------------------------------------------------------------*/
00856 /*! \fn void get_myGridIndex(DomainS *pD, const int myID, int *pi, 
00857  *                           int *pj, int *pk)
00858  *  \brief Searches GData[][][] array to find i,j,k components
00859  *   of block being updated on this processor.  */
00860 
00861 void get_myGridIndex(DomainS *pD, const int myID,
00862                      int *pi, int *pj, int *pk)
00863 {
00864   int i, j, k;
00865   for (k=0; k<(pD->NGrid[2]); k++){
00866     for (j=0; j<(pD->NGrid[1]); j++){
00867       for (i=0; i<(pD->NGrid[0]); i++){
00868         if (pD->GData[k][j][i].ID_Comm_world == myID) {
00869           *pi = i;  *pj = j;  *pk = k;
00870           return;
00871         }
00872       }
00873     }
00874   }
00875 
00876   ath_error("[get_myGridIndex]: Can't find ID=%i in GData\n", myID);
00877 }
00878 
00879 #ifdef MPI_PARALLEL
00880 /*=========================== PRIVATE FUNCTIONS ==============================*/
00881 /*! \fn static int dom_decomp(const int Nx, const int Ny, const int Nz,
00882  *                    const int Np, int *pNGx, int *pNGy, int *pNGz)
00883  *  \brief Calls apropriate 2D or 3D auto decomposition routines
00884  *   Functions written by T.A.G., added May 2007 */
00885 
00886 static int dom_decomp(const int Nx, const int Ny, const int Nz,
00887                       const int Np, int *pNGx, int *pNGy, int *pNGz)
00888 {
00889   if(Nx > 1 && Ny == 1 && Nz == 1){ /* 1-D */
00890     if(Np > Nx) return 1; /* Too many procs. */
00891     *pNGx = Np;
00892     *pNGy = 1;
00893     *pNGz = 1;
00894     return 0;
00895   }
00896   else if(Nx > 1 && Ny > 1 && Nz == 1){ /* 2-D */
00897     *pNGz = 1;
00898     return dom_decomp_2d(Nx, Ny, Np, pNGx, pNGy);
00899   }
00900   else if(Nx > 1 && Ny > 1 && Nz > 1){ /* 3-D */
00901     return dom_decomp_3d(Nx, Ny, Nz, Np, pNGx, pNGy, pNGz);
00902   }
00903 
00904   return 1; /* Error - particular case not expected */
00905 }
00906 
00907 /*----------------------------------------------------------------------------*/
00908 /*! \fn static int dom_decomp_2d(const int Nx, const int Ny,
00909  *                               const int Np, int *pNGx, int *pNGy)
00910  *  \brief Pptimizes domain decomposition in 2D.  
00911  *
00912  *    The TOTAL amount of
00913  *   data communicated (summed over all processes and all INTERNAL boundaries)
00914  *   divided by 2*nghost (where the 2 is for two messages per internal
00915  *   interface) is computed and stored in the variable I, the minimum of which
00916  *   is I0.  This assumes that in the x-direction we communicate only
00917  *   computational cells, while in the y-direction we comunicate the
00918  *   computational cells and x-direction ghost cells.  Then
00919  *     Total x-communication is (rx - 1)*Ny*(2*nghost) 
00920  *     Total y-communication is (ry - 1)*(Nx + rx*(2*nghost))*(2*nghost)
00921  */
00922 
00923 static int dom_decomp_2d(const int Nx, const int Ny,
00924                          const int Np, int *pNGx, int *pNGy){
00925 
00926   int rx, ry, I, rxs, rx_min, rx_max;
00927   int rx0=1, ry0=1, I0=0, init=1;
00928   div_t dv;
00929 
00930   /* Compute the ideal decomposition, truncated to an integer, which
00931      minimizes the amount of communication. */
00932   rxs = (int)sqrt((double)(Nx*Np)/(double)(Ny > 2*nghost ? Ny - 2*nghost : 1));
00933 
00934   /* Constrain the decomposition */
00935   rx_max = Nx < Np ? Nx : Np; /* Require ry >= 1 and rx <= Nx */
00936   if(Ny < Np){ /* Require rx >= 1 and ry <= Ny */
00937     dv = div(Np, Ny);
00938     /* rx_min = the smallest integer >= Np/Ny */
00939     rx_min = dv.quot + (dv.rem > 0 ? 1 : 0);
00940   }
00941   else rx_min = 1;
00942 
00943   /* printf("rx_min = %d, rx_max = %d\n",rx_min, rx_max); */
00944 
00945   /* Constrain rxs to fall in this domain */
00946   rxs = rxs > rx_min ? rxs : rx_min;
00947   rxs = rxs < rx_max ? rxs : rx_max;
00948 
00949   /* Search down for a factor of Np */
00950   for(rx=rxs; rx>=rx_min; rx--){
00951     dv = div(Np,rx);
00952     if(dv.rem == 0){
00953       rx0 = rx;
00954       ry0 = dv.quot;
00955       I0 = (rx0 - 1)*Ny + (ry0 - 1)*(Nx + 2*nghost*rx0);
00956       init = 0;
00957       break;
00958     }
00959   }
00960 
00961   /* Search up for a factor of Np */
00962   for(rx=rxs+1; rx<=rx_max; rx++){
00963     dv = div(Np,rx);
00964     if(dv.rem == 0){
00965       ry = dv.quot;
00966       I = (rx - 1)*Ny + (ry - 1)*(Nx + 2*nghost*rx);
00967 
00968       if(init || I < I0){
00969         rx0 = rx;
00970         ry0 = ry;
00971         I0  = I;
00972         init = 0;
00973       }
00974       break;
00975     }
00976   }
00977 
00978   if(init) return 1; /* Error locating a solution */
00979 
00980   /* printf("Minimum messaging decomposition has: rx = %d, ry = %d, I = %d\n",
00981      rx0, ry0, I0); */
00982 
00983   *pNGx = rx0;
00984   *pNGy = ry0;
00985 
00986   return 0;
00987 }
00988 
00989 /*----------------------------------------------------------------------------*/
00990 /*! \fn static int dom_decomp_3d(const int Nx, const int Ny, const int Nz,
00991  *                               const int Np, int *pNGx, int *pNGy, int *pNGz)
00992  *  \brief Optimizes domain decomposition in 3D.
00993  *
00994  *   See the comments for dom_decomp_2d() for more about the algorithm
00995  */
00996 
00997 static int dom_decomp_3d(const int Nx, const int Ny, const int Nz,
00998                          const int Np, int *pNGx, int *pNGy, int *pNGz){
00999 
01000   div_t dv;
01001   int rx_min, rx_max, rx, ry, rz, I;
01002   int rx0=1, ry0=1, rz0=1, I0=0, init=1;
01003   int ierr, t, Npt;
01004 
01005   /* Constrain the decomposition */
01006   rx_max = Nx < Np ? Nx : Np; /* Require ry >= 1, rz >= 1 and rx <= Nx */
01007   /* Compute a global minimum constraint on rx. */
01008   t = (Ny < Np ? Ny : Np)*(Nz < Np ? Nz : Np); /* t = Max(ry)*Max(rz) */
01009   if(t < Np){ /* Require rx >= 1, ry <= Ny and rz <= Nz */
01010     dv = div(Np, t);
01011     /* rx_min = the smallest integer >= Np/t */
01012     rx_min = dv.quot + (dv.rem > 0 ? 1 : 0);
01013   }
01014   else rx_min = 1;
01015 
01016   /* printf("rx_min = %d, rx_max = %d\n",rx_min, rx_max); */
01017 
01018   for(rx = rx_min; rx <= rx_max; rx++){
01019     dv = div(Np, rx);
01020     if(dv.rem == 0){
01021       Npt = dv.quot; /* Np for transverse (y,z) decomposition */
01022 
01023       ierr = dom_decomp_2d(Ny, Nz, Npt, &ry, &rz);
01024       if(ierr == 0){
01025         /* Now compute the amount of messaging */
01026         I = (rx - 1)*Ny*Nz + (ry - 1)*(Nx + 2*nghost*rx)*Nz
01027           + (rz - 1)*(Nx + 2*nghost*rx)*(Ny + 2*nghost*ry);
01028 
01029         if(I < 0){ /* Integer Overflow */
01030           /* printf("[3d new] I = %d\n",I); */
01031           continue;
01032         }
01033 
01034         if(init || I < I0){
01035           rx0 = rx;
01036           ry0 = ry;
01037           rz0 = rz;
01038           I0  = I;
01039           init = 0;
01040           /* printf("I(rx = %d, ry = %d, rz = %d) = %d\n",rx,ry,rz,I); */
01041         }
01042       }
01043     }
01044   }
01045 
01046   if(init) return 1; /* Error locating a solution */
01047 
01048   *pNGx = rx0;
01049   *pNGy = ry0;
01050   *pNGz = rz0;
01051 
01052   return 0;
01053 }
01054 
01055 #endif /* MPI_PARALLEL */

Generated on Mon Sep 27 2010 23:03:07 for Athena by  doxygen 1.7.1