subroutine wann_domega(csheet, sheet, rave, cdodq)
!==================================================================!
! !
! Calculate the Gradient of the Wannier Function spread !
! !
! Modified by Valerio Vitale for the SLWF+C method (PRB 90, 165125)!
! Jun 2018, based on previous work by Charles T. Johnson and !
! Radu Miron at Implerial College London
!===================================================================
use w90_parameters, only: num_wann, wb, bk, nntot, m_matrix, num_kpts, &
timing_level, selective_loc, &
slwf_constrain, slwf_num, &
ccentres_cart
use w90_io, only: io_stopwatch, io_error
use w90_parameters, only: lsitesymmetry !RS:
use w90_sitesym, only: sitesym_symmetrize_gradient !RS:
implicit none
complex(kind=dp), intent(in) :: csheet(:, :, :)
real(kind=dp), intent(in) :: sheet(:, :, :)
real(kind=dp), intent(out) :: rave(:, :)
! as we work on the local cdodq, returning the full cdodq array is now
! made optional
complex(kind=dp), intent(out), optional :: cdodq(:, :, :)
!local
complex(kind=dp), allocatable :: cr(:, :)
complex(kind=dp), allocatable :: crt(:, :)
real(kind=dp), allocatable :: r0kb(:, :, :)
! local
integer :: iw, ind, nkp, nn, m, n, ierr, nkp_loc
complex(kind=dp) :: mnn
if (timing_level > 1 .and. on_root) call io_stopwatch('wann: domega', 1)
allocate (cr(num_wann, num_wann), stat=ierr)
if (ierr /= 0) call io_error('Error in allocating cr in wann_domega')
allocate (crt(num_wann, num_wann), stat=ierr)
if (ierr /= 0) call io_error('Error in allocating crt in wann_domega')
if (selective_loc .and. slwf_constrain) then
allocate (r0kb(num_wann, nntot, num_kpts), stat=ierr)
if (ierr /= 0) call io_error('Error in allocating r0kb in wann_domega')
end if
do nkp_loc = 1, counts(my_node_id)
nkp = nkp_loc + displs(my_node_id)
do nn = 1, nntot
do n = 1, num_wann
! Note that this ln_tmp is defined differently wrt the one in wann_omega
ln_tmp_loc(n, nn, nkp_loc) = wb(nn)*(aimag(log(csheet(n, nn, nkp) &
*m_matrix_loc(n, n, nn, nkp_loc))) - sheet(n, nn, nkp))
end do
end do
end do
! recalculate rave
rave = 0.0_dp
do iw = 1, num_wann
do ind = 1, 3
do nkp_loc = 1, counts(my_node_id)
nkp = nkp_loc + displs(my_node_id)
do nn = 1, nntot
rave(ind, iw) = rave(ind, iw) + bk(ind, nn, nkp) &
*ln_tmp_loc(iw, nn, nkp_loc)
enddo
enddo
enddo
enddo
rave = -rave/real(num_kpts, dp)
call comms_allreduce(rave(1, 1), num_wann*3, 'SUM')
! b.r_0n are calculated
if (selective_loc .and. slwf_constrain) then
r0kb = 0.0_dp
do nkp_loc = 1, counts(my_node_id)
nkp = nkp_loc + displs(my_node_id)
do nn = 1, nntot
do n = 1, num_wann
r0kb(n, nn, nkp_loc) = sum(bk(:, nn, nkp)*ccentres_cart(n, :))
enddo
enddo
enddo
end if
rnkb_loc = 0.0_dp
do nkp_loc = 1, counts(my_node_id)
nkp = nkp_loc + displs(my_node_id)
do nn = 1, nntot
do n = 1, num_wann
rnkb_loc(n, nn, nkp_loc) = sum(bk(:, nn, nkp)*rave(:, n))
enddo
enddo
enddo
! cd0dq(m,n,nkp) is calculated
cdodq_loc = cmplx_0
cr = cmplx_0
crt = cmplx_0
do nkp_loc = 1, counts(my_node_id)
nkp = nkp_loc + displs(my_node_id)
do nn = 1, nntot
do n = 1, num_wann ! R^{k,b} and R~^{k,b} have columns of zeroes for the non-objective Wannier functions
mnn = m_matrix_loc(n, n, nn, nkp_loc)
crt(:, n) = m_matrix_loc(:, n, nn, nkp_loc)/mnn
cr(:, n) = m_matrix_loc(:, n, nn, nkp_loc)*conjg(mnn)
enddo
if (selective_loc) then
do n = 1, num_wann
do m = 1, num_wann
if (m <= slwf_num) then
if (n <= slwf_num) then
! A[R^{k,b}]=(R-Rdag)/2
cdodq_loc(m, n, nkp_loc) = cdodq_loc(m, n, nkp_loc) &
+ wb(nn)*0.5_dp*(cr(m, n) - conjg(cr(n, m)))
cdodq_loc(m, n, nkp_loc) = cdodq_loc(m, n, nkp_loc) &
- (crt(m, n)*ln_tmp_loc(n, nn, nkp_loc) &
+ conjg(crt(n, m)*ln_tmp_loc(m, nn, nkp_loc))) &
*cmplx(0.0_dp, -0.5_dp, kind=dp)
cdodq_loc(m, n, nkp_loc) = cdodq_loc(m, n, nkp_loc) &
- (crt(m, n)*rnkb_loc(n, nn, nkp_loc) + conjg(crt(n, m) &
*rnkb_loc(m, nn, nkp_loc))) &
*cmplx(0.0_dp, -0.5_dp, kind=dp)
if (slwf_constrain) then
cdodq_loc(m, n, nkp_loc) = cdodq_loc(m, n, nkp_loc) + lambda_loc &
*(crt(m, n)*ln_tmp_loc(n, nn, nkp_loc) &
+ conjg(crt(n, m)*ln_tmp_loc(m, nn, nkp_loc))) &
*cmplx(0.0_dp, -0.5_dp, kind=dp)
cdodq_loc(m, n, nkp_loc) = cdodq_loc(m, n, nkp_loc) + wb(nn)*lambda_loc &
*(crt(m, n)*rnkb_loc(n, nn, nkp_loc) &
+ conjg(crt(n, m)*rnkb_loc(m, nn, nkp_loc))) &
*cmplx(0.0_dp, -0.5_dp, kind=dp)
cdodq_loc(m, n, nkp_loc) = cdodq_loc(m, n, nkp_loc) - lambda_loc &
*(crt(m, n)*ln_tmp_loc(n, nn, nkp_loc) &
+ conjg(crt(n, m))*ln_tmp_loc(m, nn, nkp_loc)) &
*cmplx(0.0_dp, -0.5_dp, kind=dp)
cdodq_loc(m, n, nkp_loc) = cdodq_loc(m, n, nkp_loc) - wb(nn)*lambda_loc &
*(r0kb(n, nn, nkp_loc)*crt(m, n) &
+ r0kb(m, nn, nkp_loc)*conjg(crt(n, m))) &
*cmplx(0.0_dp, -0.5_dp, kind=dp)
end if
else
cdodq_loc(m, n, nkp_loc) = cdodq_loc(m, n, nkp_loc) - wb(nn) &
*0.5_dp*conjg(cr(n, m)) &
- conjg(crt(n, m)*(ln_tmp_loc(m, nn, nkp_loc) &
+ wb(nn)*rnkb_loc(m, nn, nkp_loc))) &
*cmplx(0.0_dp, -0.5_dp, kind=dp)
if (slwf_constrain) then
cdodq_loc(m, n, nkp_loc) = cdodq_loc(m, n, nkp_loc) + lambda_loc &
*conjg(crt(n, m)*(ln_tmp_loc(m, nn, nkp_loc) &
+ wb(nn)*rnkb_loc(m, nn, nkp_loc))) &
*cmplx(0.0_dp, -0.5_dp, kind=dp) &
- lambda_loc*(conjg(crt(n, m))*ln_tmp_loc(m, nn, nkp_loc)) &
*cmplx(0.0_dp, -0.5_dp, kind=dp)
cdodq_loc(m, n, nkp_loc) = cdodq_loc(m, n, nkp_loc) - wb(nn)*lambda_loc &
*r0kb(m, nn, nkp_loc)*conjg(crt(n, m)) &
*cmplx(0.0_dp, -0.5_dp, kind=dp)
end if
end if
else if (n <= slwf_num) then
cdodq_loc(m, n, nkp_loc) = cdodq_loc(m, n, nkp_loc) + wb(nn)*cr(m, n)*0.5_dp &
- crt(m, n)*(ln_tmp_loc(n, nn, nkp_loc) + wb(nn)*rnkb_loc(n, nn, nkp_loc)) &
*cmplx(0.0_dp, -0.5_dp, kind=dp)
if (slwf_constrain) then
cdodq_loc(m, n, nkp_loc) = cdodq_loc(m, n, nkp_loc) + lambda_loc &
*crt(m, n)*(ln_tmp_loc(n, nn, nkp_loc) + wb(nn)*rnkb_loc(n, nn, nkp_loc)) &
*cmplx(0.0_dp, -0.5_dp, kind=dp) &
- lambda_loc*crt(m, n)*ln_tmp_loc(n, nn, nkp_loc) &
*cmplx(0.0_dp, -0.5_dp, kind=dp)
cdodq_loc(m, n, nkp_loc) = cdodq_loc(m, n, nkp_loc) - wb(nn)*lambda_loc &
*r0kb(n, nn, nkp_loc)*crt(m, n) &
*cmplx(0.0_dp, -0.5_dp, kind=dp)
end if
else
cdodq_loc(m, n, nkp_loc) = cdodq_loc(m, n, nkp_loc)
end if
enddo
enddo
else
do n = 1, num_wann
do m = 1, num_wann
! A[R^{k,b}]=(R-Rdag)/2
cdodq_loc(m, n, nkp_loc) = cdodq_loc(m, n, nkp_loc) &
+ wb(nn)*0.5_dp &
*(cr(m, n) - conjg(cr(n, m)))
! -S[T^{k,b}]=-(T+Tdag)/2i ; T_mn = Rt_mn q_n
cdodq_loc(m, n, nkp_loc) = cdodq_loc(m, n, nkp_loc) - &
(crt(m, n)*ln_tmp_loc(n, nn, nkp_loc) &
+ conjg(crt(n, m)*ln_tmp_loc(m, nn, nkp_loc))) &
*cmplx(0.0_dp, -0.5_dp, kind=dp)
cdodq_loc(m, n, nkp_loc) = cdodq_loc(m, n, nkp_loc) - wb(nn) &
*(crt(m, n)*rnkb_loc(n, nn, nkp_loc) &
+ conjg(crt(n, m)*rnkb_loc(m, nn, nkp_loc))) &
*cmplx(0.0_dp, -0.5_dp, kind=dp)
enddo
enddo
end if
enddo
enddo
cdodq_loc = cdodq_loc/real(num_kpts, dp)*4.0_dp
if (present(cdodq)) then
! each process communicates its result to other processes
call comms_gatherv(cdodq_loc, num_wann*num_wann*counts(my_node_id), &
cdodq, num_wann*num_wann*counts, num_wann*num_wann*displs)
call comms_bcast(cdodq(1, 1, 1), num_wann*num_wann*num_kpts)
if (lsitesymmetry) then
call sitesym_symmetrize_gradient(1, cdodq) !RS:
cdodq_loc(:, :, 1:counts(my_node_id)) = cdodq(:, :, displs(my_node_id) + 1:displs(my_node_id) + counts(my_node_id))
endif
end if
deallocate (cr, stat=ierr)
if (ierr /= 0) call io_error('Error in deallocating cr in wann_domega')
deallocate (crt, stat=ierr)
if (ierr /= 0) call io_error('Error in deallocating crt in wann_domega')
if (timing_level > 1 .and. on_root) call io_stopwatch('wann: domega', 2)
return
end subroutine wann_domega