updatephi_kernel.cu
上传用户:liuping58
上传日期:2022-06-05
资源大小:105k
文件大小:4k
源码类别:

3D图形编程

开发平台:

Visual C++

  1. #define ALPHA  0.02
  2. #define DT  0.25
  3. #define max(x,y)    ((x>y) ? x : y )
  4. #define min(x,y)    ((x<y) ? x : y )
  5. #define INDEX(i,j,j_off)  (i +__mul24(j,j_off))
  6. #define BLOCKDIM_X  32
  7. #define BLOCKDIM_Y  8
  8. __global__ void updatephi( float *d_phi, float *d_phi1, float *d_D, int imageW, int imageH, int pitch)
  9. {
  10. int c= blockIdx.x * blockDim.x + threadIdx.x;
  11. int r= blockIdx.y * blockDim.y + threadIdx.y;
  12. int ind= r*pitch+c;
  13. int   indg, indg_h, indg0;
  14. int   i, j, k, ind2, ind_h, halo, active;
  15. __shared__ float s_data[(BLOCKDIM_X+2)*(BLOCKDIM_Y+2)];
  16. k    =  threadIdx.y*BLOCKDIM_X + threadIdx.x;
  17. halo = k < 2*(BLOCKDIM_X+BLOCKDIM_Y+2);
  18. if (halo) {
  19. if (threadIdx.y<2) {               // y-halos (coalesced)
  20. i = threadIdx.x;
  21. j = threadIdx.y*(BLOCKDIM_Y+1) - 1;
  22. }
  23. else {                             // x-halos (not coalesced)
  24. i = (k%2)*(BLOCKDIM_X+1) - 1;
  25. j =  k/2 - BLOCKDIM_X - 1;
  26. }
  27. ind_h  = INDEX(i+1,j+1,BLOCKDIM_X+2);
  28. i      = INDEX(i,blockIdx.x,BLOCKDIM_X);   // global indices
  29. j      = INDEX(j,blockIdx.y,BLOCKDIM_Y);
  30. indg_h = INDEX(i,j,pitch);
  31. halo   =  (i>=0) && (i<imageW) && (j>=0) && (j<imageH);
  32. }
  33. //
  34. // then set up indices for main block
  35. //
  36. i    = threadIdx.x;
  37. j    = threadIdx.y;
  38. ind2  = INDEX(i+1,j+1,BLOCKDIM_X+2) ;
  39. i    = INDEX(i,blockIdx.x,BLOCKDIM_X);     // global indices
  40. j    = INDEX(j,blockIdx.y,BLOCKDIM_Y);
  41. indg = INDEX(i,j,pitch);
  42. active = (i<imageW) && (j<imageH);
  43. //
  44. // read initial plane of u1 array
  45. //
  46. if (active) s_data[ind2] = d_phi1[indg];
  47. if (halo) s_data[ind_h] = d_phi1[indg_h];
  48. __syncthreads();
  49. if(active){
  50. if(r<imageW&&c<imageH){
  51. float dx,dxplus,dxminus,dxplusy,dxminusy;
  52. float dy, dyplus,dyminus,dyplusx,dyminusx;
  53. float gradphimax, gradphimin, nplusx, nplusy, nminusx, nminusy, curvature;
  54. float F, gradphi;
  55. if(c==0||c==imageW-1){dx=0;} else {dx=(s_data[ind2+1]-s_data[ind2-1])/2;}
  56. if(c==imageW-1){dxplus=0;} else {dxplus=(s_data[ind2+1]-s_data[ind2]);}
  57. if(c==0){dxminus=0;} else {dxminus=(s_data[ind2]-s_data[ind2-1]);}
  58. if(r==0||c==0||c==imageW-1){dxplusy=0;} else {dxplusy=(s_data[ind2-(BLOCKDIM_X+2)+1]-s_data[ind2-(BLOCKDIM_X+2)-1])/2;}
  59. if(r==imageH-1||c==0||c==imageW-1){dxminusy=0;} else {dxminusy=(s_data[ind2+(BLOCKDIM_X+2)+1]-s_data[ind2+(BLOCKDIM_X+2)-1])/2;}
  60. if(r==0||r==imageH-1){dy=0;} else {dy=(s_data[ind2-(BLOCKDIM_X+2)]-s_data[ind2+(BLOCKDIM_X+2)])/2;}
  61. if(r==0){dyplus=0;} else {dyplus=(s_data[ind2-(BLOCKDIM_X+2)]-s_data[ind2]);}
  62. if(r==imageH-1){dyminus=0;} else {dyminus=(s_data[ind2]-s_data[ind2+(BLOCKDIM_X+2)]);}
  63. if(r==0||c==imageW-1||r==imageH-1){dyplusx=0;} else {dyplusx=(s_data[ind2-(BLOCKDIM_X+2)+1]-s_data[ind2+(BLOCKDIM_X+2)+1])/2;}
  64. if(r==0||c==0||r==imageH-1){dyminusx=0;} else {dyminusx=(s_data[ind2-(BLOCKDIM_X+2)-1]-s_data[ind2+(BLOCKDIM_X+2)-1])/2;}
  65. gradphimax=sqrt((sqrt(max(dxplus,0)*max(dxplus,0)+max(-dxminus,0)*max(-dxminus,0)))*(sqrt(max(dxplus,0)*max(dxplus,0)+max(-dxminus,0)*max(-dxminus,0)))
  66. +(sqrt(max(dyplus,0)*max(dyplus,0)+max(-dyminus,0)*max(-dyminus,0)))*(sqrt(max(dyplus,0)*max(dyplus,0)+max(-dyminus,0)*max(-dyminus,0))));
  67. gradphimin=sqrt((sqrt(min(dxplus,0)*min(dxplus,0)+min(-dxminus,0)*min(-dxminus,0)))*(sqrt(min(dxplus,0)*min(dxplus,0)+min(-dxminus,0)*min(-dxminus,0)))
  68. +(sqrt(min(dyplus,0)*min(dyplus,0)+min(-dyminus,0)*min(-dyminus,0)))*(sqrt(min(dyplus,0)*min(dyplus,0)+min(-dyminus,0)*min(-dyminus,0))));
  69. nplusx= dxplus / sqrt(1.192092896e-07F + (dxplus*dxplus) + ((dyplusx + dy)*(dyplusx + dy)*0.25) );
  70. nplusy= dyplus / sqrt(1.192092896e-07F + (dyplus*dyplus) + ((dxplusy + dx)*(dxplusy + dx)*0.25) );
  71. nminusx= dxminus / sqrt(1.192092896e-07F + (dxminus*dxminus) + ((dyminusx + dy)*(dyminusx + dy)*0.25) );
  72. nminusy= dyminus / sqrt(1.192092896e-07F + (dyminus*dyminus) + ((dxminusy + dx)*(dxminusy + dx)*0.25) );
  73. curvature= ((nplusx-nminusx)+(nplusy-nminusy))/2;
  74. F = (-ALPHA * d_D[indg]) + ((1-ALPHA) * curvature);
  75. if(F>0) {gradphi=gradphimax;} else {gradphi=gradphimin;}
  76. d_phi[indg]=s_data[ind2] + (DT * F * gradphi);
  77. }
  78. __syncthreads();
  79. }
  80. }