!============================================================================
!
! Module:
!
! (1) vcoul_generator()      Originally by JRD      Last Modified: 2/09/2009 (JRD)
!
!     Generates the (Truncated) Coulomb Interaction for all G at a particular
!     q.  Outputs what would be 8Pi/q^2 if not for truncation.
!
!============================================================================

#include "f_defs.h"

module vcoul_generator_m

  use global_m
  use random_m
  implicit none

  public ::          &
    vcoul_generator, &
    destroy_qran

  real(DP), allocatable, private :: qran(:,:)
  integer, private :: ifirst = 1

contains

subroutine vcoul_generator(itruncflag,truncval,gvec, &
  bdot,nfk,ncoul,isrtq,iscreen,qvec,q0vec,vcoul, &
  iwritecoul,iparallel,avgcut,oneoverq,qgrid,epshead, &
  work_scell,averagew,wcoul0)

  integer, intent(in) :: itruncflag
  real(DP), intent(in) ::  truncval(3) 
  type (gspace), intent(in) :: gvec
  real(DP), intent(in) :: bdot(3,3)
  integer, intent(in) :: nfk
  integer, intent(in) :: ncoul
  integer, intent(in) :: isrtq(:) ! (ncoul)
  integer, intent(in) :: iscreen
  real(DP), intent(in) :: qvec(3)
  real(DP), intent(in) ::  q0vec(3)
  real(DP), intent(out) :: vcoul(:) ! (ncoul)
  integer, intent(in) :: iwritecoul
  integer, intent(in) :: iparallel
  real(DP), intent(in) :: avgcut
  real(DP), intent(out) :: oneoverq
  integer, intent(in) :: qgrid(3)
  SCALAR, intent(in) :: epshead 
  type (twork_scell), intent(inout) :: work_scell
  logical, intent(in) :: averagew
  SCALAR, intent(inout) :: wcoul0

  integer :: i1, i2, i3, ii, jj, nn, ig, nint, iCommFlag, iCommFlagGlobal
  integer :: gk(3), seed, values(8)
  real(DP), allocatable :: vcoul2(:)
  real(DP) :: trunc, dvalue
  real(DP) :: qk(3), qkxy(3), qlen, q0len, ekinx, kxy
  real(DP) :: kz, zc, qkz(3), dummy, qvec_mod(3)
  real(DP) :: dd(3),fq(3),vlength, lmin, qpp(3) 
  real(DP), allocatable :: ran(:,:)
  real(DP), allocatable :: qran2(:,:)

  real(DP), save :: oneoverq0
  real(DP), save :: vcoul0
  integer, save :: iMiniBZComplete = 1
  integer, save :: ifirsto = 1

  integer, parameter :: ncell = 3 ! Used in the construction of the Wigner-Seitz cell.

  PUSH_SUB(vcoul_generator)

  if(ifirst == 2) call die("you destroyed qran already!")
  if(nfk == 0) call die("Illegal value nfk = 0 in vcoul_generator")

  iCommFlag = 0
  iCommFlagGlobal = 0

  vcoul = 0d0

  nint=100

  qlen  = sqrt(DOT_PRODUCT(qvec,MATMUL(bdot,qvec)))
  q0len = sqrt(DOT_PRODUCT(q0vec,MATMUL(bdot,q0vec)))
  
! JRD: If we have a metal then the divergent components of W cancel
!      and you get a constant.  Epsilon was calculated at q ~ q0vec
!      so we need to use q0vec here.
! MJ:  Not quite. We while W goes to a constant, V does not. See the
!      comment below about metals. Here we set the q to be qvec (i.e.
!      0 0 0) but later make sure we change W to be evaluated at this
!      q. We change epsiloninv such that epsinv*V(q0vec) = epsnewinv*V(q)
!      where V(q) is the averaged quantity.

  if (abs(qlen) .lt. TOL_Zero .and. iscreen .eq. 2 .and. peinf%jobtypeeval .eq. 1 .and. itruncflag .ne. 0) then
    qvec_mod = q0vec
  else
    qvec_mod = qvec
  endif
  
!------------------------------------------------------------------
! Generate random numbers for minibz averaging

  nn = nmc

  if (peinf%jobtypeeval .eq. 1 .and. ifirst .eq. 1 .and. &
   (itruncflag .eq. 0 .or. itruncflag .eq. 6)) then

    if (any(qgrid(1:3) .eq. 0)) then
      if(peinf%inode == 0) then
        write(0,*) 'Error qgrid', qgrid
        write(0,*) 'You must specify qgrid in .inp file'
      endif
      call die ('Zero qgrid. Cannot determine minibz', only_root_writes = .true.)
    endif

    SAFE_ALLOCATE(ran, (3,nn))

    if (peinf%inode .eq. 0) then
      call date_and_time(VALUES=values)
!          seed=((values(3)*24+values(5))*60+values(6))*60
!     >     +values(7)
      seed = 5000
      call genrand_init(put=seed)
      do jj = 1, 3*nn
        call genrand_real4(dummy)
      enddo
      do jj = 1, nn
        do ii = 1, 3
          call genrand_real4(ran(ii, jj))
        enddo
      enddo
    endif
    
#ifdef MPI
    call MPI_Bcast(ran, 3*nn, MPI_REAL_DP, 0, &
      MPI_COMM_WORLD,mpierr)
#endif
    
    dd(1:3) = 1D0 / dble(qgrid(1:3))

    SAFE_ALLOCATE(qran, (3,nn))
    
    qran = 0D0
    
    do jj = 1, nn
      
      if (iparallel .eq. 1) then
        if (mod(jj,peinf%npes).ne.peinf%inode) cycle
!            write(12000+peinf%inode,*) jj, ran(:,jj)
      endif
      
      lmin = INF
      qpp(:) = ran(:,jj)

      do i1 = -ncell+1, ncell
        fq(1) = qpp(1) - dble(i1)
        do i2 = -ncell+1, ncell
          fq(2) = qpp(2) - dble(i2)
          do i3 = -ncell+1, ncell
            fq(3) = qpp(3) - dble(i3)
            vlength = DOT_PRODUCT(fq,MATMUL(bdot,fq))
            if (vlength .lt. lmin) then
              lmin = vlength
              qran(:,jj) = fq(:)
            endif
          enddo
        enddo
      enddo

      qran(1:3,jj)=dd(1:3)*qran(1:3,jj)

    enddo ! jj

    SAFE_DEALLOCATE(ran)

#ifdef MPI
    if (iparallel .eq. 1) then
      SAFE_ALLOCATE(qran2, (3,nn))
      qran2(:,:)=0D0
      call MPI_ALLREDUCE(qran(1,1),qran2(1,1),3*nn,MPI_REAL_DP, MPI_SUM, MPI_COMM_WORLD,mpierr)
      qran(:,:)=qran2(:,:)
      SAFE_DEALLOCATE(qran2)
    endif
#endif

  endif

!-------------------------------------------------------------------
! No Truncation

  if (itruncflag .eq. 0) then

! Calculate Wing Correction Factor - this is not done for Epsilon and Kernel
! since avgcut is zero and qvec_mod is not

    if (peinf%inode .eq. 0 .and. qlen**2 .lt. avgcut .and. peinf%jobtypeeval .eq. 1) then
      if (qlen**2 .gt. TOL_Zero .or. ifirsto .eq. 1) then
        call minibzaverage_3d_oneoverq(nn, &
          bdot,dvalue,qran,qvec_mod)
        oneoverq=dvalue
        if (qlen**2 .le. TOL_Zero) then
          oneoverq0=dvalue
        endif
      else
        oneoverq=oneoverq0
        ifirsto=0
      endif
    endif
    ! otherwise we set oneoverq a little later

    do ig=1,ncoul
      if (iparallel .eq. 1) then
        if(mod(ig-1,peinf%npes).ne.peinf%inode) cycle
      endif
      
      gk(:)=gvec%k(:,isrtq(ig))
      qk(:)=gk+qvec_mod(:)
      ekinx=DOT_PRODUCT(qk,MATMUL(bdot,qk))

! We Do 3D Mini Brillouin Zone Average if q is exactly 0
! and G = 0. This should be the case when constructing W,
! but don`t want this in Epsilon code for example, where you
! have to use a finite Q.

      if ( (ekinx .lt. avgcut .and. peinf%jobtypeeval .eq. 1) ) then
        if (iscreen .eq. 0) then
          if (ekinx .gt. TOL_Zero .or. iMiniBZComplete .eq. 1) then
            call minibzaverage_3d_oneoverq2(nn,bdot,dvalue,qran,qk,averagew,epshead,wcoul0)
            vcoul(ig)=dvalue
            if (ekinx .le. TOL_Zero) then
              vcoul0=dvalue           
              iCommFlag = peinf%inode+1
              iMiniBZComplete = 0
            endif
          else
            vcoul(ig)=vcoul0
          endif

        elseif (iscreen .eq. 1) then
          
          if (ekinx .gt. TOL_Zero .or. iMiniBZComplete .eq. 1) then
            call minibzaverage_3d_oneoverq2(nn,bdot,dvalue,qran,qk,averagew,epshead,wcoul0)

            if (ekinx .lt. TOL_ZERO) then
              vcoul0=dvalue
              call minibzaverage_3d_oneoverq(nn,bdot,dvalue,qran,qk)
              if (q0len .lt. TOL_ZERO) then
                write(0,*) 'You have q0vec=0 but a graphene-type system!!'
                call die('Bad q0vec')
              endif
              wcoul0=epshead*dvalue/q0len
              iCommFlag = peinf%inode+1
              iMiniBZComplete = 0
              vcoul(ig)=vcoul0
            else
! JRD: This seems wrong. We now use correct vcoul average.
!              vcoul(ig)=dvalue/sqrt(ekinx)
            endif
          else
            vcoul(ig)=vcoul0
          endif

        elseif (iscreen .eq. 2) then
! MJ : W(q->0) goes to a constant. But the COH term (as well as the
!      Fock term) also require V(q->0). It is simpler to see this in the
!      context of the COH term in COHSEX which is 0.5*(W-V). Setting V(q->0)
!      based on the q0vec is dangerous because it just adds a constant to all
!      the quasiparticle levels. So we keep both of these quantities
!      at hand. This is only needed in the Sigma code.
          !write(0,*) 'You want cell averaging on a metal!!'
          !write(0,*) 'Specify q0vec and no cell averaging!!'
          !call die('Bad Screening Options')
          if (ekinx .gt. TOL_Zero .or. iMiniBZComplete .eq. 1) then
            call minibzaverage_3d_oneoverq2(nn,bdot,dvalue,qran,qk,averagew,epshead,wcoul0)

!      Keep in mind that while the vcoul average out of minibzaverage_3d_oneoverq2
!      is correct, wcoul0 is not. wcoul0 is just modified so that when multiplied
!      by vcoul it gives the correct result.
            !if (abs(ekinx-q0len) .lt. TOL_Zero) then
            if (ekinx .lt. TOL_Zero) then
              vcoul0=dvalue
              !wcoul0=epshead*(q0len*q0len)/(dvalue/8.0/PI_D)
              wcoul0=epshead*8.0*PI_D/(q0len*q0len)
              iCommFlag = peinf%inode+1
              iMiniBZComplete = 0
              vcoul(ig)=vcoul0
            endif
          else
            vcoul(ig)=vcoul0
          endif

        endif

      else if (ekinx .lt. Tol_zero) then

! We are not in an evaluation job and were sent q=0, we don`t average. We just zero. The outside code should
! do something about this value.

        vcoul(ig) = 0D0

      else

! For metal - we already changed qvec_mod to q0vec above

        vcoul(ig) = 8.0d0*PI_D/ekinx
      endif
    enddo

    if (qlen**2 .ge. avgcut) then
      oneoverq=vcoul(1)*qlen
    endif

#ifdef MPI
    if (iparallel .eq. 1) then
      call MPI_Bcast(oneoverq, 1, MPI_REAL_DP, 0, MPI_COMM_WORLD, mpierr)

      SAFE_ALLOCATE(vcoul2, (ncoul))
      vcoul2(:)=0.0d0
      call MPI_ALLREDUCE(vcoul,vcoul2,ncoul, &
        MPI_REAL_DP, MPI_SUM, MPI_COMM_WORLD,mpierr)
      call MPI_ALLREDUCE(iCommFlag,iCommFlagGlobal,1, &
        MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD,mpierr)
      if (iCommFlagGlobal .ne. 0) then
        iMiniBZComplete=0
        call MPI_BCAST(vcoul0,1,MPI_DOUBLE_PRECISION,iCommFlagGlobal-1, MPI_COMM_WORLD, mpierr)
        call MPI_BCAST(wcoul0,1,MPI_SCALAR,iCommFlagGlobal-1, MPI_COMM_WORLD, mpierr)
      endif
      vcoul(:)=vcoul2(:)
      SAFE_DEALLOCATE(vcoul2)
    endif
#endif

!--------------------------------------------------------------------
! Rectangular Box Truncation

  elseif (itruncflag .eq. 1) then
  
    call die('Rectangular Truncation is No Longer Supported')
  
!------------------------------------------------------------------
! Spherical Truncation

  elseif (itruncflag .eq. 2) then
    
    do ig=1,ncoul
      qk(:)=gvec%k(:,isrtq(ig))+qvec_mod(:)
      ekinx=DOT_PRODUCT(qk,MATMUL(bdot,qk))
      if ( ekinx.lt.TOL_ZERO .and. peinf%jobtypeeval .eq. 1 ) then
        vcoul(ig)=8.0D0*PI_D*((truncval(1))**2)/2D0
      else if ( ekinx.lt.TOL_ZERO ) then
        vcoul(ig) = 0D0
      else
        vcoul(ig)=8.0d0*PI_D/ekinx* &
          (1.0d0-cos(truncval(1)*sqrt(ekinx)))
      endif
    enddo
    
    oneoverq=vcoul(1)*qlen
    wcoul0 = vcoul(1)

!------------------------------------------------------------------
! Cylindrical Box Truncation

  elseif (itruncflag .eq. 3) then

    call die('Cylindrical Truncation is No Longer Supported')
    
!-----------------------------------------------------------------
! Cell Wire Truncation

  elseif (itruncflag .eq. 4) then
    
!    if (peinf%inode .eq. 0) then
!      write(6,*) 'Generating Vcoul_cell_wire with FFT'
!    endif

! JRD: This is hopefully temporary.  q=0 gives NaN
! because the log diverges in Eq. 5 of Ismail-Beigi paper.  
! For all G =/ 0, using the q0vec (small shifted vector) is 
! probably good enough.  
!
! JRD: This is now fixed in trunc_cell_wire where we use the scheme
! of Ismail-Beigi

    if (iparallel .eq. 1) then
      call trunc_cell_wire(gvec,.true.,peinf%inode, &
        peinf%npes,bdot,qvec_mod(3),ncoul,isrtq,vcoul)
    else
      call trunc_cell_wire(gvec,.false.,0, &
        1,bdot,qvec_mod(3),ncoul,isrtq,vcoul)
    endif

! We Do 1D Mini Brillouin Zone Average if q=0, G=0 or less than avgcut
! May not be implemented correctly for graphene-type system... I`m not even sure there is a system with linear DOS in 1D...

    if (iscreen .eq. 0) then
      do ig=1,ncoul
        qk(:)=gvec%k(:,isrtq(ig))+qvec_mod(:)
        ekinx=DOT_PRODUCT(qk,MATMUL(bdot,qk))
        if ( (ekinx .lt. avgcut) .and. peinf%jobtypeeval .eq. 1) then 

          if (ekinx .gt. TOL_Zero .or. iMiniBZComplete .eq. 1) then
            if (peinf%inode .eq. 0) then
              write(6,'(" minibz wire averaging for q =",3f8.3)') qk(1:3)
            endif
            call minibzaverage_1d(gvec,nfk,bdot,dvalue,iparallel,qk,epshead,q0len,averagew,wcoul0)
            vcoul(ig)=dvalue
            if (ekinx .lt. TOL_Zero) then
              vcoul0=dvalue
              iMiniBZComplete=0
            endif
          else
            vcoul(ig)=vcoul0
          endif
        else if (ekinx .lt. Tol_Zero) then
          vcoul(ig) = 0
        endif
      enddo
    endif

    if (peinf%inode .eq. 0) then
      write(6,*)
    endif

    oneoverq=vcoul(1)*qlen

!----------------------------------------------------------------
! Cell Box Truncation

  elseif (itruncflag .eq. 5) then
    
    if (qlen .gt. Tol_ZERO) then
      write(0,*) 'You asked for cell box truncation'
      write(0,*) 'but have more q-points than q=0!!'
      call die('Bad Truncation')
    endif
    
    if (iparallel .eq. 1) then
      call trunc_cell_box_d(gvec,.true.,peinf%inode, &
        peinf%npes,bdot,ncoul,isrtq,vcoul)
    else
      call trunc_cell_box(gvec,.true.,bdot,ncoul,isrtq,vcoul)
    endif
    
    oneoverq=vcoul(1)*qlen
    wcoul0 = vcoul(1)

!----------------------------------------------------------------
! Cell Slab Truncation

! JRD: This is easy because an analytic expression exists.  See Sohrab.

  elseif (itruncflag .eq. 6) then
    
    if (abs(qvec_mod(3)) .gt. Tol_ZERO) then
      write(0,*) 'You asked for cell slab truncation but have'
      write(0,*) 'more q-points in z direction than qz=0!!'
      call die('Bad Truncation')
    endif
    
    do ig=1,ncoul
      
      if (iparallel .eq. 1) then
        if(mod(ig,peinf%npes).ne.peinf%inode) cycle
      endif
      
      qk(:)=gvec%k(:,isrtq(ig))+qvec_mod(:)
      ekinx=DOT_PRODUCT(qk,MATMUL(bdot,qk))
      qkxy(1:2)=qk(1:2)
      qkxy(3)=0D0
      kxy=sqrt(DOT_PRODUCT(qkxy,MATMUL(bdot,qkxy)))
      qkz(1:2)=0D0
      qkz(3)=qk(3)
      kz=sqrt(DOT_PRODUCT(qkz,MATMUL(bdot,qkz)))
      zc=2D0*PI_D/(sqrt(bdot(3,3))*2D0)
      
!          write(6,*) "zc", zc

      if ( (ekinx .lt. avgcut) .and. peinf%jobtypeeval .eq. 1) then
        if (iscreen .ne. 2) then
          if (ekinx .gt. TOL_Zero .or. iMiniBZComplete .eq. 1) then
            call minibzaverage_2d_oneoverq2(nn,bdot, &
              dvalue,qran,qk,kz,zc,epshead,q0len,averagew,wcoul0)
            vcoul(ig)=dvalue
            if (ekinx .lt. TOL_Zero) then
              vcoul0=dvalue
              if (iscreen .eq. 1) then
                wcoul0=vcoul0*epshead
              endif
              iMiniBZComplete=0
              iCommFlag=peinf%inode+1
            endif
          else
            vcoul(ig)=vcoul0
          endif
        else
          write(0,*) 'You have q0vec=0 but a metal!!'
          call die('Bad Screening Options')
        endif
      else if (ekinx .lt. Tol_Zero) then
        vcoul(ig) = 0D0
      else
        vcoul(ig) = 8.0d0*PI_D/ekinx
        vcoul(ig) = vcoul(ig)*(1.0d0-exp(-kxy*zc)*cos(kz*zc))
      endif
    enddo
    
#ifdef MPI
    if (iparallel .eq. 1) then
      SAFE_ALLOCATE(vcoul2, (ncoul))
      vcoul2(:)=0.d0
      call MPI_ALLREDUCE(vcoul,vcoul2,ncoul, &
        MPI_REAL_DP, MPI_SUM, MPI_COMM_WORLD,mpierr)
      vcoul(:)=vcoul2(:)
      SAFE_DEALLOCATE(vcoul2)
      call MPI_ALLREDUCE(iCommFlag,iCommFlagGlobal,1, &
        MPI_INTEGER, MPI_SUM, MPI_COMM_WORLD,mpierr)
      if (iCommFlagGlobal .ne. 0) then
        iMiniBZComplete=0
        call MPI_BCAST(vcoul0,1,MPI_DOUBLE_PRECISION,iCommFlagGlobal-1, MPI_COMM_WORLD, mpierr)
        call MPI_BCAST(wcoul0,1,MPI_SCALAR,iCommFlagGlobal-1, MPI_COMM_WORLD, mpierr)
      endif
    endif
#endif
    
! This is wrong too?

    oneoverq=vcoul(1)*qlen

!----------------------------------------------------------------
! Supercell Box Truncation

  elseif (itruncflag .eq. 7) then
    
    if (iparallel .eq. 1) then
      call trunc_scell_box_d(gvec,.true.,peinf%inode,peinf%npes, &
        bdot,qvec_mod,qgrid,ncoul,isrtq,vcoul,work_scell)
    else
      call trunc_scell_box_d(gvec,.true.,0,1, &
        bdot,qvec_mod,qgrid,ncoul,isrtq,vcoul,work_scell)
    endif
    
    oneoverq=vcoul(1)*qlen
    wcoul0 = vcoul(1)
    
  endif

! Saving qran between calls
!      deallocate(qran)

!-----------------------------------------------------------------
! Print vcoul to file

  if (iwritecoul .eq. 1) then
    if (peinf%inode.eq.0) then
      do ig=1,ncoul
        write(19,'(3f12.8,1x,3i7,1x,e20.8)') &
          qvec_mod(:),gvec%k(:,isrtq(ig)),vcoul(ig)
      enddo
    endif
  endif

  ifirst = 0
  
  POP_SUB(vcoul_generator)
  
  return
end subroutine vcoul_generator

!-----------------------------------------------------------------
subroutine destroy_qran()

  PUSH_SUB(destroy_qran)

  ifirst = 2
  SAFE_DEALLOCATE(qran)

#ifdef VERBOSE
  if(peinf%inode == 0) write(6,'(a)') '*** VERBOSE: Deallocated random numbers.'
#endif

  POP_SUB(destroy_qran)
  return
end subroutine destroy_qran

end module vcoul_generator_m
