!======================================================================= ! ! 3D FFT module package ! ! Written 4/24/02 D. A. Steck ! Updated 7/18/03 ! ! Subroutines provided: ! ! 1. fft3d -> perform 3D FFT, output overwrites input ! call as: "call fft3d(integer fftsign, complex(wp) data)" ! ! 2. fft3dt -> perform 3D FFT, with option to accept output in ! transposed form to save communication (a second inverse ! call undoes the transpose) ! call as: ! "call fft3d(integer fftsign, complex(wp) datain, ! complex(wp) dataout)" ! where the first and second dimensions of dataout are ! swapped with respect to datain. Note that the ! datain array is disturbed by the computation. ! ! The 3D array is distributed in blocks only along the last ! dimension, so it is most efficient if the calling program ! also uses this blocking. ! ! Uses internal 1-D fft routine. ! Also requires the integer parameter 'wp' from the module ! 'globals' to define the KIND for the real and complex ! variables. ! !======================================================================= module fft3dlib use globals, only : wp private public :: fft3d, fft3dt contains subroutine fft3d(fftsign, x) implicit none integer, intent(in) :: fftsign complex(kind=wp), dimension(:,:,:), intent(inout) :: x !hpf$ distribute x(*,*,block) complex(kind=wp), dimension(size(x,3),size(x,2),size(x,1)) :: xtemp integer :: i, j, k, n1, n2, n3 !hpf$ align xtemp(:,:,:) with x(:,:,:) n1 = size(x, 1) n2 = size(x, 2) n3 = size(x, 3) ! use fft3dt and then undo transpose call fft3dt(fftsign, x, xtemp) forall ( i = 1:n1, j = 1:n2, k = 1:n3 ) x(i,j,k) = xtemp(k,j,i) end subroutine fft3d subroutine fft3dt(fftsign, xin, xout) implicit none integer, intent(in) :: fftsign complex(kind=wp), dimension(:,:,:), intent(inout) :: xin complex(kind=wp), dimension(:,:,:), intent(out) :: xout !hpf$ distribute xin(*,*,block) !hpf$ distribute xout(*,*,block) ! coefficient array for calls to ffts complex(kind=wp), dimension(:,:), allocatable, save :: coeff !hpf$ distribute coeff(*,*) integer :: n1, n2, n3 integer :: n, log2n, log2n1, log2n2, log2n3 integer :: m, j, k, l real(kind=wp), parameter :: pi = 3.141592653589793238_wp complex(kind=wp), parameter :: i = (0.0_wp, 1.0_wp) integer, save :: nold = 0 n1 = size(xin, 1) n2 = size(xin, 2) n3 = size(xin, 3) if ( n3 .ne. size(xout,1) .or. & n2 .ne. size(xout,2) .or. n1 .ne. size(xout,3) ) then write(0,*) 'Error (FFT3DT): input and output array sizes do not match' stop end if ! initialize coefficient array on first call, or if fft is larger than ! on all previous fft calls ! This array is the U array in Bailey's paper, which stores ! the roots of unity for stride-1 access. ! This part moved out of the usual ffts module for purity of the ! ffts1d subroutine. n = max(n1, n2, n3) log2n1 = int( log(real(n1,kind=wp))/log(2.0_wp) + 0.49 ) log2n2 = int( log(real(n2,kind=wp))/log(2.0_wp) + 0.49 ) log2n3 = int( log(real(n3,kind=wp))/log(2.0_wp) + 0.49 ) if ( iand(n1, n1-1) .ne. 0 ) then write(0,*) "Error (FFT3DT): first array dimension not power of 2" stop end if if ( iand(n2, n2-1) .ne. 0 ) then write(0,*) "Error (FFT3DT): second array dimension not power of 2" stop end if if ( iand(n3, n3-1) .ne. 0 ) then write(0,*) "Error (FFT3DT): third array dimension not power of 2" stop end if log2n = max(log2n1, log2n2, log2n3) if ( nold .lt. n ) then nold = n if ( allocated(coeff) ) deallocate(coeff) allocate(coeff(2*n, 2)) do m = 0, log2n do j = 0, 2**m-1 coeff(2**m+j,1) = exp(i*j*pi/(2**m)) end do end do coeff(:,2) = conjg(coeff(:,1)) end if ! transform one local dimension !hpf$ independent do k = 1, n3 !hpf$ independent do j = 1, n2 call ffts1d( fftsign, n1, log2n1, xin(:,j,k), coeff ) end do end do ! copy to transposed output array so that the remaining 2 dimensions are local forall ( j = 1:n1, k = 1:n2, l = 1:n3 ) xout(l,k,j) = xin(j,k,l) ! transform the unit-stride local dimension !hpf$ independent do k = 1, n1 !hpf$ independent do j = 1, n2 call ffts1d( fftsign, n3, log2n3, xout(:,j,k), coeff ) end do end do ! transform the remaining local dimension !hpf$ independent do k = 1, n1 !hpf$ independent do j = 1, n3 call ffts1d( fftsign, n2, log2n2, xout(j,:,k), coeff ) end do end do contains pure subroutine ffts1d(fftsign, n, log2n, data, coeff) implicit none integer, intent(in) :: fftsign, n, log2n complex(kind=wp), dimension(:), intent(inout) :: data complex(kind=wp), dimension(:,:), intent(in) :: coeff complex(kind=wp), dimension(1:size(data)) :: work integer ls, ns, l, j, m ! divide cases according to whether log2n is even or odd, and "ping-pong" ! the computation between the data and work arrays if ( iand(log2n, 1) .eq. 1 ) then do l = 1, log2n-1, 2 ls = 2**(l-1) ns = n/(ls+ls) call stockv1(fftsign, ls, ns, data, work, coeff) call stockv1(fftsign, ls*2, ns/2, work, data, coeff) end do ls = 2**(log2n-1) ns = n/(ls+ls) call stockv1(fftsign, ls, ns, data, work, coeff) data = work else do l = 1, log2n, 2 ls = 2**(l-1) ns = n/(ls+ls) call stockv1(fftsign, ls, ns, data, work, coeff) call stockv1(fftsign, ls*2, ns/2, work, data, coeff) end do end if end subroutine ffts1d pure subroutine stockv1(fftsign, ls, ns, c, ch, coeff) implicit none integer, intent(in) :: fftsign, ls, ns complex(kind=wp), intent(in), dimension(ls, ns, *) :: c complex(kind=wp), intent(out), dimension(ls, 2, *) :: ch complex(kind=wp), intent(in), dimension(:,:) :: coeff complex(kind=wp) :: omegk, wyk integer :: j, k, spt if ( fftsign .eq. 1 ) then spt = 1 else spt = 2 end if do j = 1, ns ch(1,1,j) = c(1,j,1) + c(1,j,2) ch(1,2,j) = c(1,j,1) - c(1,j,2) do k = 2, ls omegk = coeff(ls+k-1,spt) wyk = omegk * c(k,j,2) ch(k,1,j) = c(k,j,1) + wyk ch(k,2,j) = c(k,j,1) - wyk end do end do end subroutine stockv1 end subroutine fft3dt end module fft3dlib