/****************************************************************************** * This is a benchmark written to emulate the behavior of a typical * computational fluid dynamics benchmark. The physical space being simulated * is broken into boxes (cubes). All boxes have the same number of cells. * * build: * gcc -DOPEN_MP -fopenmp cfd-mini-c.c -o cfd-mini-c * OR * gcc cfd-mini-c.c -o cfd-mini-c * * usage: * ./cfd-mini-c -b 2 -c 8 -v 1 // 2 blocks, 8^3 cells each, verify on * ./cfd-mini-c -b 32 -c 128 // 32 blocks, 128^3 cells each * * There are several versions of this benchmark - the original written with * the use of CHOMBO. This versions here are written in pure c. * cfd_explain: is meant to be easy to read and understand in * order to explain the algorithm. * cfd_*_pa: uses a 1D data allocation with pointer arithmetic * in order to achieve the same performance as the * baseline CHOMBO version. * * STATUS: The openMP pragmas are not finalized and there is currently a memory * bug associated with using them. * * authors: Catherine Olschanowsky * * Copyright (c) 2014, Colorado State University * All rights reserved. *******************************************************************************/ #include #include #include #include #include #include //#define OPEN_MP #ifdef OPEN_MP #include #endif typedef double Real; const int nGhost=2; const size_t numComp = 5; // Dim(3) + 2 const Real dx = 0.5; const Real factor1 = 1./12.; const Real factor2 = 2.; const Real compMultiplier = 1.; // these are for testing only (test=1 prints data verify=1 does an internal // comparison) int tests = 0; int verify=0; int cfd_mini(int numCell, int numBox, int kernel); // kernel 0 Real** cfd_explain(int numCell, int numBox,int nGhost); // kernel 1 Real** cfd_baseline(int numCell, int numBox); /****************************************************************************** * 1. process command line arguments * 2. call mini-app function * 3. exit *******************************************************************************/ int main(int argc, char **argv) { int index; int c; int numCell = 128; int numBox = 32; // by default the explain kernel is used int kernel = 0; // Step 1: process command line arguments opterr = 0; while ((c = getopt (argc, argv, "tvb:c:k:")) != -1){ switch (c) { case 't': tests = 1; break; case 'v': verify = 1; break; case 'k': kernel = atoi(optarg); break; case 'c': numCell = atoi(optarg); break; case 'b': numBox = atoi(optarg); break; case '?': if (optopt == 'c' || optopt == 'b') fprintf (stderr, "Option -%c requires an argument.\n", optopt); else if (isprint (optopt)) fprintf (stderr, "Unknown option `-%c'.\n", optopt); else fprintf (stderr, "Unknown option character `\\x%x'.\n", optopt); return 1; default: abort (); } } // print out the parameters printf("numComp : %d\nnumCell : %d\nnumBox : %d\n", (int)numComp,(int)numCell,(int)numBox); // Step 2: Call the function to do the work cfd_mini(numCell, numBox, kernel); return 1; } #undef GET_VAL_PTR #define GET_VAL_PTR(b,c,z,y,x) (b)+\ (c)*full_numCell3+\ ((z)+nGhost)*full_numCell2+\ ((y)+nGhost)*full_numCell+\ ((x)+nGhost) #define GET_FACE_VAL_PTR(d,b,c,z,y,x) (b)+\ (c)*(numCell+((d)==2))*(numCell+((d)==1))*(numCell+((d)==0)) +\ (z)*(numCell+((d)==1))*(numCell+((d)==0))+\ (y)*(numCell+((d)==0))+\ (x) /****************************************************************************** * cfd_mini * Input * ----- * numCell - the number of cells in a single dimension of a single box * numBoxes - the number of independent boxes to process *******************************************************************************/ int cfd_mini(int numCell, int numBox, int kernel){ Real ** verify_data_base; Real ** verify_me; if(verify || !kernel){ verify_data_base = cfd_explain(numCell,numBox,nGhost); } switch (kernel) { case 1: verify_me = cfd_baseline(numCell, numBox); break; } if(verify){ int full_numCell = numCell+2*nGhost; int full_numCell2 = full_numCell*full_numCell; int full_numCell3 = full_numCell*full_numCell+full_numCell; int totalCells = full_numCell*full_numCell*full_numCell; int flux_totalSize = numCell*numCell*(numCell+1); int idx,iz,iy,ix; Real e = 0.001; for(idx=0;idx < numBox;idx++){ Real* truth = verify_data_base[idx]; Real* hope = verify_me[idx]; for(iz=0;iz*GET_VAL_PTR(truth,0,iz,iy,ix)+e|| *GET_VAL_PTR(hope,0,iz,iy,ix)<*GET_VAL_PTR(truth,0,iz,iy,ix)-e){ fprintf(stderr,"VERIFICATION FAILURE!!!\n"); return 1; } } } } } fprintf(stderr,"Code is verified\n"); } return 0; } /****************************************************************************** * defines for cfd_basic code -- this makes for easier reading and * debugging ******************************************************************************/ #define p_DATA_old(z,y,x) *(GET_VAL_PTR(old_box,0,z,y,x)) #define e_DATA_old(z,y,x) *(GET_VAL_PTR(old_box,1,z,y,x)) #define u_DATA_old(z,y,x) *(GET_VAL_PTR(old_box,2,z,y,x)) #define v_DATA_old(z,y,x) *(GET_VAL_PTR(old_box,3,z,y,x)) #define w_DATA_old(z,y,x) *(GET_VAL_PTR(old_box,4,z,y,x)) #define p_DATA_new(z,y,x) *(GET_VAL_PTR(new_box,0,z,y,x)) #define e_DATA_new(z,y,x) *(GET_VAL_PTR(new_box,1,z,y,x)) #define u_DATA_new(z,y,x) *(GET_VAL_PTR(new_box,2,z,y,x)) #define v_DATA_new(z,y,x) *(GET_VAL_PTR(new_box,3,z,y,x)) #define w_DATA_new(z,y,x) *(GET_VAL_PTR(new_box,4,z,y,x)) #define p_CACHE_x(z,y,x) *(GET_FACE_VAL_PTR(0,g_cache,0,z,y,x)) #define e_CACHE_x(z,y,x) *(GET_FACE_VAL_PTR(0,g_cache,1,z,y,x)) #define u_CACHE_x(z,y,x) *(GET_FACE_VAL_PTR(0,g_cache,2,z,y,x)) #define v_CACHE_x(z,y,x) *(GET_FACE_VAL_PTR(0,g_cache,3,z,y,x)) #define w_CACHE_x(z,y,x) *(GET_FACE_VAL_PTR(0,g_cache,4,z,y,x)) #define p_CACHE_y(z,y,x) *(GET_FACE_VAL_PTR(1,g_cache,0,z,y,x)) #define e_CACHE_y(z,y,x) *(GET_FACE_VAL_PTR(1,g_cache,1,z,y,x)) #define u_CACHE_y(z,y,x) *(GET_FACE_VAL_PTR(1,g_cache,2,z,y,x)) #define v_CACHE_y(z,y,x) *(GET_FACE_VAL_PTR(1,g_cache,3,z,y,x)) #define w_CACHE_y(z,y,x) *(GET_FACE_VAL_PTR(1,g_cache,4,z,y,x)) #define p_CACHE_z(z,y,x) *(GET_FACE_VAL_PTR(2,g_cache,0,z,y,x)) #define e_CACHE_z(z,y,x) *(GET_FACE_VAL_PTR(2,g_cache,1,z,y,x)) #define u_CACHE_z(z,y,x) *(GET_FACE_VAL_PTR(2,g_cache,2,z,y,x)) #define v_CACHE_z(z,y,x) *(GET_FACE_VAL_PTR(2,g_cache,3,z,y,x)) #define w_CACHE_z(z,y,x) *(GET_FACE_VAL_PTR(2,g_cache,4,z,y,x)) /******************************************************************************* * The following function is meant to be explainatory code. This implementation * is meant purely to explain the control flow and algorithm. Other * implementations include optimizations that sometimes obfuscate the * algorithm. * * Processing the boxes means that we will be reading the data from * old_boxes and writing them to new_boxes * The following are the equations for this calculation * * There are 5 components: p, e, u, v, w (density, energy, velocity (3D)) * Each of these components is represented as a 3D array (initialized * above). * p_{t+1}(z,y,x) = h(p_t,z,y,x) + h'(p_t,z,y,x) + h"(p_t,z,y,x) * e_{t+1}(z,y,x) = h(e_t,z,y,x) + h'(e_t,z,y,x) + h"(e_t,z,y,x) * u_{t+1}(z,y,x) = h(u_t,z,y,x) + h'(u_t,z,y,x) + h"(u_t,z,y,x) * v_{t+1}(z,y,x) = h(v_t,z,y,x) + h'(v_t,z,y,x) + h"(v_t,z,y,x) * w_{t+1}(z,y,x) = h(w_t,z,y,x) + h'(w_t,z,y,x) + h"(w_t,z,y,x) * * Computing face-centered, flux values based on cell-centered values * g(component,z,y,x) is a stencil operation that looks like the following: * g(c,z,y,x) = factor1* * (c[z][y][x-2]+7*(c[z][y][x-1]+c[z][y][x])+c[z][y][x+1]) * similarly for g' and g" * g'(c,z,y,x) = factor1* * (c[z][y-2][x]+7*(c[z][y-1][x]+c[z][y][x])+c[z][y+1][x]) * g"(c,z,y,x) = factor1* * (c[z-2][y][x]+7*(c[z-1][y][x]+c[z][y][x])+c[z+1][y][x]) * * Computing cell-centered values based on face-centered flux values * h(component,z,y,x) is a stencil operation that looks like the following: * h(c,z,y,x) = factor2* * (g(c,z,y,x+1)*g(u_t,z,y,x+1)-g(c,z,y,x)*g(u_t,z,y,x)) * h'(c,z,y,x) = factor2* * (g'(c,z,y+1,x)*g'(v_t,z,y+1,x)-g'(c,z,y,x)*g'(v_t,z,y,x)) * h"(c,z,y,x) = factor2* * (g"(c,z+1,y,x)*g"(w_t,z+1,y,x)-g"(c,z,y,x)*g"(w_t,z,y,x)) * * in this example code we omit some space and time saving optimizations * in order to make the code easy to learn. FIXME: are these correct? * Step 1 is to calculate all of the g() values * Step 2 multiplies the values together for the first column in the * equations above * Step 3 Return to Step 1 for g' and then for g" * * The following is a table describing how the notation above * maps to the storage in code that we are using below * value name | variable name * ---------------------------------- * p_{t+1} | new_data[0] * e_{t+1} | new_data[1] * u_{t+1} | new_data[2] * v_{t+1} | new_data[3] * w_{t+1} | new_data[4] * g(p_t) | g_cache[0] * g(e_t) | g_cache[1] * ... same pattern for u,v,w * g'(p_t) | g_cache[0] * ... continue pattern * g"(p_t) | g_cache[0] * * g_cache can be reused because we accumulate into new_data between * iterations ****************************************************************************/ Real** cfd_explain(int numCell, int numBox,int nGhost){ double time_spent; struct timeval tv1, tv2; // The size of the 3D data is (numCell+2*nGhost)^3 int full_numCell = numCell+2*nGhost; int full_numCell2 = full_numCell*full_numCell; int full_numCell3 = full_numCell*full_numCell+full_numCell; int totalCells = full_numCell*full_numCell*full_numCell; int flux_totalSize = numCell*numCell*(numCell+1); // in order to access each box we need an array of pointers Real **old_boxes = malloc(sizeof(Real*)*numBox); Real **new_boxes = malloc(sizeof(Real*)*numBox); // Allocate the 1D array of space (which is indexed as a 3D space) // numComp is the number of components (think of 5 cubes of data) int idx; for(idx=0;idx