Привет,
Я пытаюсь запустить этот код в параллельных разделах с OpenMP
Я хотел ускорить его выполнение. Эти блоки могут быть рассчитаны независимо на каждом временном шаге. T.
К сожалению, вместо этого код замедляется openmp. Я искал параллелизм, но не видел никаких проблем.
Правильно ли я использовал аргумент section?
Есть ли лучшая структура параллализации, которую я должен использовать?
Всего наилучшего
#define Nx 300
#define Ny 100
int main()
{
int i, j, t=0, it=0;double vort[Nx][Ny], psi[Nx][Ny], ux[Nx][Ny], uy[Nx][Ny];
double fpsi[Nx][Ny];
double a[Nx][Ny], b[Nx][Ny],c[Nx][Ny], d[Nx][Ny], e[Nx][Ny], ee[Nx][Ny];//tridiagonal coefficient
//coefficient for the internal grid for vort. Indexed from 0 to Nx-3 or Ny-3 to calculate the interior grid point only. The idix are shiffted of (+1,+1) compare to vort or psi or ux or uy.
//double DDx[Nx-2], AAx[Nx-2], BBx[Nx-2], CCx[Nx-2], DDy[Ny-2], AAy[Ny-2],BBy[Ny-2], CCy[Ny-2];
double DDx[Nx], AAx[Nx], BBx[Nx], CCx[Nx], DDy[Ny], AAy[Ny],BBy[Ny], CCy[Ny];
double epsx[Nx][Ny], epsy[Nx][Ny]; //same dimension than ux and uy
//double vortx[Nx-2], vorty[Ny-2];
double vortx[Nx], vorty[Ny];
double cx[Nx][Ny], cy[Nx][Ny]; //same dimension than ux and uy//coefficient for the ADI
double n[Nx][Ny];
double DDnx[Nx], AAnx[Nx], BBnx[Nx], CCnx[Nx], DDny[Ny], AAny[Ny],BBny[Ny], CCny[Ny];
double nx[Nx], ny[Ny];
//coefficient for the ADI
double cO2[Nx][Ny];
double DDcx[Nx], AAcx[Nx], BBcx[Nx], CCcx[Nx], DDcy[Ny], AAcy[Ny],BBcy[Ny], CCcy[Ny];
double cO2x[Nx], cO2y[Ny];
//coefficient for the ADI
double cls[Nx][Ny];
double DDclsx[Nx], AAclsx[Nx], BBclsx[Nx], CCclsx[Nx], DDclsy[Ny], AAclsy[Ny],BBclsy[Ny], CCclsy[Ny];
double clsx[Nx], clsy[Ny];
//coefficient for the ADI
double pvd[Nx][Ny];
double DDpvdx[Nx], AApvdx[Nx], BBpvdx[Nx], CCpvdx[Nx], DDpvdy[Ny], AApvdy[Ny],BBpvdy[Ny], CCpvdy[Ny];
double pvdx[Nx], pvdy[Ny];
//// пробел в отображении кода … ///
for (t=tIni; t<tmax; t++)
// for (t=1; t<4; t++)
{
///////////////////////////////////////////////////////////////////////////////////////
//calcul coef upwind differentiation
///////////////////////////////////////////////////////////////////////////////////////
for (j=0; j<Ny; j++)
{
for (i=0; i<Nx; i++)
{
cx[i][j]=ux[i][j]*DtDx;
cy[i][j]=uy[i][j]*DtDx;
if(ux[i][j]>=0)
{
epsx[i][j]=1;
}
else
{
epsx[i][j]=-1;
}
if(uy[i][j]>=0)
{
epsy[i][j]=1;
}
else
{
epsy[i][j]=-1;
}
}
}
#pragma omp parallel sections
{
///////////////////////////////////////////////////////////////////////////////////////
//calcul de la concentration de bacterie
///////////////////////////////////////////////////////////////////////////////////////
#pragma omp section
{
ADI_N(n, cO2, AAnx, BBnx, CCnx, DDnx, AAny, BBny, CCny, DDny, cx, cy, epsx, epsy, nx, ny, DtDbDxs, Dxs2);
}
///////////////////////////////////////////////////////////////////////////////////////
//calcul de la concentration d'oxygene
///////////////////////////////////////////////////////////////////////////////////////
#pragma omp section
{
ADI_C(cO2, n, AAcx, BBcx, CCcx, DDcx, AAcy, BBcy, CCcy, DDcy, cx, cy, epsx, epsy, cO2x, cO2y, DtDO2Dxs, Dxs2, gamma);
}
/*for (i=0; i<Nx; i++)
{
cO2[i][Ny-1]=1.;
}*/
///////////////////////////////////////////////////////////////////////////////////////
//calcul de la concentration de cellulose
///////////////////////////////////////////////////////////////////////////////////////
#pragma omp section
{
ADI_Cls(cls, n, cO2tmp, AAclsx, BBclsx, CCclsx, DDclsx, AAclsy, BBclsy, CCclsy, DDclsy, cx, cy, epsx, epsy, clsx, clsy, DtDclsDxs, Dxs2, Dt*kpvd, seuilCls, seuilStopCls);
}
///////////////////////////////////////////////////////////////////////////////////////
//calcul de la concentration de poyverdine
///////////////////////////////////////////////////////////////////////////////////////
#pragma omp section
{
ADI_Pvd(pvd, n, cO2, AApvdx, BBpvdx, CCpvdx, DDpvdx, AApvdy, BBpvdy, CCpvdy, DDpvdy, cx, cy, epsx, epsy, pvdx, pvdy, DtDpvdDxs, Dxs2, Dt*kpvd, seuilCls, seuilStopCls);
}
///////////////////////////////////////////////////////////////////////////////////////
//calcul de la vorticité
///////////////////////////////////////////////////////////////////////////////////////
#pragma omp section
{
ADI(vort, psi, n, cls, AAx, BBx, CCx, DDx, AAy, BBy, CCy, DDy, cx, cy, epsx, epsy, vortx, vorty, DtDxsRe, Dxs, coefMass, coefMassCls);
}
///////////////////////////////////////////////////////////////////////////////////////
//calcul de la stream function
///////////////////////////////////////////////////////////////////////////////////////
#pragma omp section
{
// Preparation de la force qui agit sur la stream function (potentiel vecteur)
for (i=0; i<Nx; i++)
{
for (j=0; j<Ny; j++)
{fpsi[i][j]= - vort[i][j]*Dxs; //multiplie par Dxs car les coefficient son multiplie par beta=Dxs/Dys. Donc Dxs est l'unite spatiale.
if(isnan(fpsi[i][j]))
{
cout<<"error psi nan "<<__FILE__<<" line "<<__LINE__<<endl;
exit(2);
}
}
}//SOR
//sorPoissonChebyshev(psi, fpsi, a, b, c, d, e, ee, Nx-1, Ny-1, rjac, it, Nitmax, relax, tol);
sorPoissonChebyshev(psi, fpsi, a, b, c, d, e, ee, Nx, Ny, rjac, it, relax, Nitmax, tol);
//condition aux bords. Psi constant sur la frontiere. Mise a zero pour le SOR
for (i=0; i<Nx; i++)
{
psi[i][0]=0;
psi[i][Ny-1]=0;
}
for (j=0; j<Ny; j++)
{
psi[0][j]=0;
psi[Nx-1][j]=0;
}
}
///////////////////////////////////////////////////////////////////////////////////////
//calcul de la vitesse
///////////////////////////////////////////////////////////////////////////////////////
#pragma omp section
{
for (i=1; i<Nx-1; i++)
{
for (j=1; j<Ny-1; j++)
{
ux[i][j]=(psi[i][j+1]-psi[i][j-1])/Dx2;
uy[i][j]=-(psi[i+1][j]-psi[i-1][j])/Dx2;
}
}
//Boudary conditions
for (i=0; i<Nx; i++)
{
ux[i][Ny-1]=ux[i][Ny-2]; //free slip
//ux[i][Ny-1]=0; //no slip
uy[i][Ny-1]=0;
ux[i][0]=0;
uy[i][0]=0;
}
for (j=0; j<Ny; j++)
{
ux[Nx-1][j]=0;
uy[Nx-1][j]=0;
ux[0][j]=0;
uy[0][j]=0;
}
}}
}
}
—РЕДАКТИРОВАТЬ: — Удалить TMP — функция ADI
void ADI_N(double n[][Ny], double cO2[][Ny],
double AAx[], double BBx[], double CCx[], double DDx[],
double AAy[], double BBy[], double CCy[], double DDy[],
double cx[][Ny], double cy[][Ny], double epsx[][Ny], double epsy[][Ny],
double nx[], double ny[], double DbDtDxs, double Dxs2)
{
int i=0, j=0;
for (i=0; i<Nx; i++) //interior points
{
for (j=0; j<Ny; j++) //interior points
{
if (cO2[i][j]>seuilO2) // Seuil en dessous du quel les bacteries ne pousse pas.
{
n[i][j]= n[i][j] * ( 1 + Dt / tempsDeDivBact * cO2[i][j] ); // n et cO2 sont normalizée
}
}
}////////////calcul sur y////////////
//calcul coef ADIfor (i=0; i<Nx; i++) //interior points
{
for (j=1; j<Ny-1; j++) //interior points
{
//non conservative forme
if (i==0)
{
AAy[j] = -.5 * DbDtDxs;
BBy[j] = 1 + DbDtDxs;
CCy[j] = - .5 * DbDtDxs;
DDy[j] = .5 * DbDtDxs * n[1][j]
+ ( 1 - DbDtDxs ) * n[i][j]
+ .5 * DbDtDxs * n[i+1][j];
}
else if (i==Nx-1)
{
AAy[j] = -.5 * DbDtDxs;
BBy[j] = 1 + DbDtDxs;
CCy[j] = - .5 * DbDtDxs;
DDy[j] = .5 * DbDtDxs * n[i-1][j]
+ ( 1 - DbDtDxs ) * n[i][j]
+ .5 * DbDtDxs * n[Nx-2][j];
}
else //conservative forme
{
AAy[j] = -.5 * ( .5 * (1 + epsy[i][j]) * cy[i][j-1] + DbDtDxs);
BBy[j] = 1 + DbDtDxs + .5 * epsy[i][j] * cy[i][j];
CCy[j] = .5 * ( .5 * ( 1 - epsy[i][j] ) * cy[i][j+1] - DbDtDxs);
DDy[j] = .5 * (.5 * ( 1 + epsx[i][j] ) * cx[i-1][j] + DbDtDxs ) * n[i-1][j]
+ ( 1 - DbDtDxs - .5 * epsx[i][j] * cx[i][j] ) * n[i][j]
+ .5 * (- .5 * ( 1 - epsx[i][j] ) * cx[i+1][j] + DbDtDxs ) * n[i+1][j];
/*AAy[j] = -.5 * ( .5 * cy[i][j-1] + DbDtDxs);
BBy[j] = 1 + DbDtDxs ;
CCy[j] = .5 * ( .5 * cy[i][j+1] - DbDtDxs);
DDy[j] = .5 * (.5 * cx[i-1][j] + DbDtDxs ) * n[i-1][j]
+ ( 1 - DbDtDxs ) * n[i][j]
+ .5 * (- .5 * cx[i+1][j] + DbDtDxs ) * n[i+1][j];
*/
}
ny[j] = n[i][j];
}//boundary condition values no flux n[i][j-1]=n[i][j+1] and substraction of a[0]*n[i][j] for tridaig calculation.
//non conservative forme
if (i==0) {
j=0; //DDy[j] = DbDtDxs/2 * n[i-1][j] + (1-DbDtDxs) * n[i][j] + DbDtDxs/2 * n[i+1][j] - -( .5 * cy[i][j] + DbDtDxs/2 ) * n[i][j-1];
DDy[j] = DbDtDxs/2 * n[1][j] + (1-DbDtDxs) * n[i][j] + DbDtDxs/2 * n[i+1][j]
+ DbDtDxs/2 * n[i][1];//AAy
AAy[j] = 0; // pas utilise soustrait à DDy
BBy[j] = 1 + DbDtDxs ;
CCy[j] = .5 * ( .5 * cy[i][j+1] - DbDtDxs);
j=Ny-1;
DDy[j] = .5 * ( DbDtDxs + cx[i][j]/2) * n[1][j] + (1-DbDtDxs) * n[i][j] + .5 * ( DbDtDxs - cx[i][j]/2) * n[i+1][j]
+ DbDtDxs/2 * n[i][Ny-2]; //CCy
AAy[j] = -.5 * ( .5 * cy[i][j] + DbDtDxs);
BBy[j] = 1 + DbDtDxs;
CCy[j] = 0; // pas utilise soustrait à DDy
}
else if(i==Nx-1)
{ j=0; //DDy[j] = DbDtDxs/2 * n[i-1][j] + (1-DbDtDxs) * n[i][j] + DbDtDxs/2 * n[i+1][j] - -( .5 * cy[i][j] + DbDtDxs/2 ) * n[i][j-1];
DDy[j] = DbDtDxs/2 * n[i-1][j] + (1-DbDtDxs) * n[i][j] + DbDtDxs/2 * n[Nx-2][j]
+ DbDtDxs/2 * n[i][1];
AAy[j] = 0; // pas utilise soustrait à DDy
BBy[j] = 1 + DbDtDxs ;
CCy[j] = .5 * ( .5 * cy[i][j+1] - DbDtDxs);
j=Ny-1;
DDy[j] = .5 * ( DbDtDxs + cx[i][j]/2) * n[i-1][j] + (1-DbDtDxs) * n[i][j] + .5 * ( DbDtDxs - cx[i][j]/2) * n[Nx-2][j]
+ DbDtDxs/2 * n[i][Ny-2];
AAy[j] = -.5 * ( .5 * cy[i][j] + DbDtDxs);
BBy[j] = 1 + DbDtDxs;
CCy[j] = 0; // pas utilise soustrait à DDy
}
else
{
j=0; //DDy[j] = DbDtDxs/2 * n[i-1][j] + (1-DbDtDxs) * n[i][j] + DbDtDxs/2 * n[i+1][j] - -( .5 * cy[i][j] + DbDtDxs/2 ) * n[i][j-1];
DDy[j] = DbDtDxs/2 * n[i-1][j] + (1-DbDtDxs) * n[i][j] + DbDtDxs/2 * n[i+1][j]
+ DbDtDxs/2 * n[i][1];
AAy[j] = 0; // pas utilise soustrait à DDy
BBy[j] = 1 + DbDtDxs ;
CCy[j] = .5 * ( .5 * cy[i][j+1] - DbDtDxs);
j=Ny-1;
DDy[j] = .5 * ( DbDtDxs + cx[i][j]/2) * n[i-1][j] + (1-DbDtDxs) * n[i][j] + .5 * ( DbDtDxs - cx[i][j]/2) * n[i+1][j]
+ DbDtDxs/2 * n[i][Ny-2];
AAy[j] = -.5 * ( .5 * cy[i][j] + DbDtDxs);
BBy[j] = 1 + DbDtDxs;
CCy[j] = 0; // pas utilise soustrait à DDy
}tridiag_full(AAy, BBy, CCy, DDy, ny, Ny-1); //calcul les point en 0 et en Ny-1 inclusfor (j=0; j<Ny; j++)
{
n[i][j]=ny[j];
}
}
////////////calcul sur x //////////
//calcul coef ADI
for (j=0; j<Ny; j++)
{for (i=1; i<Nx-1; i++)
{//non conservative forme
if(j==0)
{
AAx[i] = -.5 * DbDtDxs;
BBx[i] = 1 + DbDtDxs;
CCx[i] = - .5 * DbDtDxs ;
DDx[i]= .5 * DbDtDxs * n[i][1]
+ ( 1 - DbDtDxs ) * n[i][j]
+ .5 * DbDtDxs * n[i][j+1];
}
else if(j==Ny-1)
{
AAx[i] = -.5* ( .5 * cx[i][j] + DbDtDxs );
BBx[i] = 1 + DbDtDxs;
CCx[i] = .5 * ( .5 * cx[i][j] - DbDtDxs) ;
DDx[i]= .5 * ( .5 * cy[i][j] + DbDtDxs ) * n[i][j-1]
+ ( 1 - DbDtDxs ) * n[i][j]
+ .5 * ( - .5 * cy[i][j] + DbDtDxs ) * n[i][Ny-2];
}
else //conservative forme
{
AAx[i] = -.5* ( .5 * ( 1 + epsx[i][j] ) * cx[i-1][j] + DbDtDxs );
BBx[i] = 1 + DbDtDxs + .5 * epsx[i][j] * cx[i][j];
CCx[i] = .5 * ( .5 * ( 1 - epsx[i][j] ) * cx[i+1][j] - DbDtDxs) ;
DDx[i]= .5 * ( .5 * ( 1 + epsy[i][j] ) * cy[i][j-1] + DbDtDxs ) * n[i][j-1]
+ ( 1 - DbDtDxs - .5 * epsy[i][j] * cy[i][j] ) * n[i][j]
+ .5 * (-.5 * ( 1 - epsy[i][j] ) * cy[i][j+1] + DbDtDxs ) * n[i][j+1];}nx[i]=n[i][j];
}
if(j==0)
{
i=0;
AAx[i] = -.5 * DbDtDxs;
BBx[i] = 1 + DbDtDxs;
CCx[i] = - .5 * DbDtDxs ;
DDx[i]= .5 * DbDtDxs * n[i][1] + ( 1 - DbDtDxs ) * n[i][j] + .5 * DbDtDxs * n[i][j+1]
-AAx[i];
i=Nx-1;
AAx[i] = -.5 * DbDtDxs;
BBx[i] = 1 + DbDtDxs;
CCx[i] = - .5 * DbDtDxs ;
DDx[i]= .5 * DbDtDxs * n[i][1] + ( 1 - DbDtDxs ) * n[i][j] + .5 * DbDtDxs * n[i][j+1]
-CCx[i];
}
else if(j==Ny-1)
{
i=0;
AAx[i] = -.5 * ( .5 * cx[i][j] + DbDtDxs);
BBx[i] = 1 + DbDtDxs;
CCx[i] = .5 * (.5 * cx[i][j] - DbDtDxs );
DDx[i]= .5 * DbDtDxs * n[i][j-1] + ( 1 - DbDtDxs ) * n[i][j] + .5 * DbDtDxs * n[i][Ny-2]
-AAx[i];
i=Nx-1;
AAx[i] = -.5 * ( .5 * cx[i][j] + DbDtDxs);
BBx[i] = 1 + DbDtDxs;
CCx[i] = .5 * (.5 * cx[i][j] - DbDtDxs );
DDx[i]= .5 * DbDtDxs * n[i][j-1] + ( 1 - DbDtDxs ) * n[i][j] + .5 * DbDtDxs * n[i][Ny-2]
-CCx[i];
}
else
{
i=0;
AAx[i] = -.5 * DbDtDxs;
BBx[i] = 1 + DbDtDxs;
CCx[i] = - .5 * DbDtDxs ;
DDx[i]= .5 * DbDtDxs * n[i][j-1] + ( 1 - DbDtDxs ) * n[i][j] + .5 * DbDtDxs * n[i][j+1]
-AAx[i];
i=Nx-1;
AAx[i] = -.5 * DbDtDxs;
BBx[i] = 1 + DbDtDxs;
CCx[i] = - .5 * DbDtDxs ;
DDx[i]= .5 * DbDtDxs * n[i][j-1] + ( 1 - DbDtDxs ) * n[i][j] + .5 * DbDtDxs * n[i][j+1]
-CCx[i];
}//boundary condition values no flux n[i-1][j]=n[i+1][j] and substraction of a[0]*n[i][j] for tridaig calculation.
i=0;
DDx[i] = (1-DbDtDxs) * n[i][j] + 3*DbDtDxs/2 * n[i+1][j];
AAx[i] = 0 ; //pas utlise soustrait à DDx
BBx[i] = 1 + DbDtDxs ;
CCx[i] = - .5 * DbDtDxs ;
i=Nx-1;
DDx[i] = 3*DbDtDxs/2 * n[i-1][j] + (1-DbDtDxs) * n[i][j];
AAx[i] = - .5* DbDtDxs;
BBx[i] = 1 + DbDtDxs ;
CCx[i] = 0 ; //pas utlise soustrait à DDxtridiag_full(AAx, BBx, CCx, DDx, nx, Nx-1); //calcul les point en 0 et en Nx-1 inclus
for (i=0; i<Nx; i++)
{
n[i][j]=nx[i];
}}
}
Индексные переменные i
& j
распространены среди некоторых ваших разделов, что приведет к проблемам синхронизации между разделами и в большинстве случаев к некорректному выводу. Измените его на либо сделав переменные приватными для каждого раздела #pragma omp section private(i,j)
или инициализировать их в каждом цикле for (int i=1; i<Nx-1; i++)
, Я всегда предпочитаю инициализировать переменные в своих циклах, поэтому я не случайно повторно использую устаревшее значение из предыдущей итерации.
Других решений пока нет …