Extracts an num_wann-dimensional subspace at each k by minimizing Omega_I
! send chunks of wkomegai1 to root node call comms_gatherv(wkomegai1_loc, counts(my_node_id), wkomegai1, counts, displs) ! send back the whole wkomegai1 array to other nodes call comms_bcast(wkomegai1(1), num_kpts)
! send chunks of wkomegai1 to root node call comms_gatherv(wkomegai1_loc, counts(my_node_id), wkomegai1, counts, displs) ! send back the whole wkomegai1 array to other nodes call comms_bcast(wkomegai1(1), num_kpts)
subroutine dis_extract()
!==================================================================!
! !
!! Extracts an num_wann-dimensional subspace at each k by
!! minimizing Omega_I
! !
!==================================================================!
use w90_io, only: io_wallclocktime
use w90_sitesym, only: ir2ik, ik2ir, nkptirr, nsymmetry, kptsym !YN: RS:
implicit none
! MODIFIED:
! u_matrix_opt (At input it contains the initial guess for the optima
! subspace (expressed in terms of the original states inside the window)
! output it contains the states that diagonalize the hamiltonian inside
! optimal subspace (again expressed in terms of the original window stat
! Giving out states that diagonalize the hamiltonian inside the optimal
! subspace (instead of the eigenstates of the Z matrix) is useful for
! performing the Wannier interpolation of the energy bands as described
! Sec. III.F of SMV)
!
! eigval (At input: original energy eigenvalues.
! At output: eigenvalues of the hamiltonian inside optimal subspace)
! ----------------------------------------------------------------------
! TO DO: The complement subspace is computed but is not saved anywhere!
! (Check what was done with it in original code space.f)
! Diagonalize Z matrix only at those k points where ndimwin>num_wann?
! ----------------------------------------------------------------------
! *******************
! SHELLS OF K-VECTORS
! *******************
! nshells number of shells of k-points to be used in the
! finite-difference formulas for the k-derivatives
! aam: wb is now wb(1:nntot) 09/04/2006
! wb(nkp,nnx) weight of the nnx-th b-vector (ordered along shells
! of increasing length) associated with the nkp-th k-p
! wbtot sum of the weights of all b-vectors associated with
! given k-point (k-point 1 is used in calculation)
! nnlist(nkp,nnx) vkpt(1:3,nnlist(nkp,nnx)) is the nnx-th neighboring
! k-point of the nkp-th k-point vkpt(1:3,nkp) (or its
! periodic image in the "home Brillouin zone")
! cm(n,m,nkp,nnx) Overlap matrix <u_nk|u_{m,k+b}>
! Internal variables
integer :: i, j, l, m, n, nn, nkp, nkp2, info, ierr, ndimk, p
integer :: icompflag, iter, ndiff
real(kind=dp) :: womegai, wkomegai, womegai1, rsum, delta_womegai
real(kind=dp), allocatable :: wkomegai1(:)
! for MPI
real(kind=dp), allocatable :: wkomegai1_loc(:)
complex(kind=dp), allocatable :: camp_loc(:, :, :)
complex(kind=dp), allocatable :: u_matrix_opt_loc(:, :, :)
complex(kind=dp), allocatable :: ceamp(:, :, :)
complex(kind=dp), allocatable :: camp(:, :, :)
complex(kind=dp), allocatable :: czmat_in(:, :, :)
complex(kind=dp), allocatable :: czmat_out(:, :, :)
! the z-matrices are now stored in local arrays
complex(kind=dp), allocatable :: czmat_in_loc(:, :, :)
complex(kind=dp), allocatable :: czmat_out_loc(:, :, :)
complex(kind=dp), allocatable :: cham(:, :, :)
integer, allocatable :: iwork(:)
integer, allocatable :: ifail(:)
real(kind=dp), allocatable :: w(:)
real(kind=dp), allocatable :: rwork(:)
complex(kind=dp), allocatable :: cap(:)
complex(kind=dp), allocatable :: cwork(:)
complex(kind=dp), allocatable :: cz(:, :)
complex(kind=dp), allocatable :: cwb(:, :), cww(:, :), cbw(:, :)
real(kind=dp), allocatable :: history(:)
logical :: dis_converged
complex(kind=dp) :: lambda(num_wann, num_wann) !RS:
! Needed to split an array on different nodes
integer, dimension(0:num_nodes - 1) :: counts
integer, dimension(0:num_nodes - 1) :: displs
integer :: nkp_loc
if (timing_level > 1 .and. on_root) call io_stopwatch('dis: extract', 1)
if (on_root) write (stdout, '(/1x,a)') &
' Extraction of optimally-connected subspace '
if (on_root) write (stdout, '(1x,a)') &
' ------------------------------------------ '
allocate (cwb(num_wann, num_bands), stat=ierr)
if (ierr /= 0) call io_error('Error allocating cwb in dis_extract')
allocate (cww(num_wann, num_wann), stat=ierr)
if (ierr /= 0) call io_error('Error allocating cww in dis_extract')
allocate (cbw(num_bands, num_wann), stat=ierr)
if (ierr /= 0) call io_error('Error allocating cbw in dis_extract')
cwb = cmplx_0; cww = cmplx_0; cbw = cmplx_0
allocate (iwork(5*num_bands), stat=ierr)
if (ierr /= 0) call io_error('Error allocating iwork in dis_extract')
allocate (ifail(num_bands), stat=ierr)
if (ierr /= 0) call io_error('Error allocating ifail in dis_extract')
allocate (w(num_bands), stat=ierr)
if (ierr /= 0) call io_error('Error allocating w in dis_extract')
allocate (rwork(7*num_bands), stat=ierr)
if (ierr /= 0) call io_error('Error allocating rwork in dis_extract')
allocate (cap((num_bands*(num_bands + 1))/2), stat=ierr)
if (ierr /= 0) call io_error('Error allocating cap in dis_extract')
allocate (cwork(2*num_bands), stat=ierr)
if (ierr /= 0) call io_error('Error allocating cwork in dis_extract')
allocate (cz(num_bands, num_bands), stat=ierr)
if (ierr /= 0) call io_error('Error allocating cz in dis_extract')
! for MPI
call comms_array_split(num_kpts, counts, displs)
allocate (u_matrix_opt_loc(num_bands, num_wann, max(1, counts(my_node_id))), stat=ierr)
if (ierr /= 0) call io_error('Error allocating u_matrix_opt_loc in dis_extract')
! Copy matrix elements from global U matrix to local U matrix
do nkp_loc = 1, counts(my_node_id)
nkp = nkp_loc + displs(my_node_id)
u_matrix_opt_loc(:, :, nkp_loc) = u_matrix_opt(:, :, nkp)
enddo
allocate (wkomegai1_loc(max(1, counts(my_node_id))), stat=ierr)
if (ierr /= 0) call io_error('Error allocating wkomegai1_loc in dis_extract')
allocate (czmat_in_loc(num_bands, num_bands, max(1, counts(my_node_id))), stat=ierr)
if (ierr /= 0) call io_error('Error allocating czmat_in_loc in dis_extract')
allocate (czmat_out_loc(num_bands, num_bands, max(1, counts(my_node_id))), stat=ierr)
if (ierr /= 0) call io_error('Error allocating czmat_out_loc in dis_extract')
allocate (wkomegai1(num_kpts), stat=ierr)
if (ierr /= 0) call io_error('Error allocating wkomegai1 in dis_extract')
allocate (czmat_in(num_bands, num_bands, num_kpts), stat=ierr)
if (ierr /= 0) call io_error('Error allocating czmat_in in dis_extract')
allocate (czmat_out(num_bands, num_bands, num_kpts), stat=ierr)
if (ierr /= 0) call io_error('Error allocating czmat_out in dis_extract')
allocate (history(dis_conv_window), stat=ierr)
if (ierr /= 0) call io_error('Error allocating history in dis_extract')
! ********************************************
! ENERGY WINDOWS AND SUBSPACES AT EACH K-POINT
! ********************************************
! num_wann dimensionality of the subspace at each k-point
! (number of Wannier functions per unit cell that we w
! NDIMWIN(NKP) number of bands at the nkp-th k-point that fall
! within the outer energy window
! NDIMFROZ(NKP) number of frozen bands at the nkp-th k-point
! INDXNFROZ(I,NKP) INDEX (BETWEEN 1 AND NDIMWIN(NKP)) OF THE I-TH NON-F
! ORIGINAL BAND STATE AT THE NKP-TH K-POINT
! U_MATRIX_OPT(J,L,NKP) AMPLITUDE OF THE J-TH ENERGY EIGENVECTOR INSIDE THE
! ENERGY WINDOW AT THE NKP-TH K-POINT IN THE EXPANSION
! THE L-TH LEADING RLAMBDA EIGENVECTOR AT THE SAME K-P
! If there are M_k frozen states, they occupy the lowe
! entries of the second index of u_matrix_opt, and the leadin
! nabnds-M_k eigenvectors of the Z matrix occupy the
! remaining slots
! CAMP(J,L,NKP) SAME AS U_MATRIX_OPT, BUT FOR THE COMPLEMENT SUBSPACE INSID
! ENERGY WINDOW (I.E., THE NON-LEADING RLAMBDA EIGENVE
! CEAMP(J,L,NKPTS) SAME AS U_MATRIX_OPT, BUT INSTEAD OF RLAMBDA EIGENVECTOR, I
! FOR THE ENERGY EIGENVECTOR OBTAINED BY DIAGONALIZING
! HAMILTONIAN IN THE OPTIMIZED SUBSPACE
! CZMAT_IN(M,N,NKP) Z-MATRIX [Eq. (21) SMV]
! CZMAT_OUT(M,N,NKP) OUTPUT Z-MATRIX FROM THE PRESENT ITERATION
! RLAMBDA An eigenvalue of the Z matrix
! womegai Gauge-invariant Wannier spread, computed usinf all s
! from current iteration
! wkomegai1(NKP) Eq. (18) of SMV
! womegai1 Eq.(11) of SMV (like wowmegai, but neighboring state
! for computing overlaps are from previous iteration.
! become equal at self-consistency)
! alphafixe mixing parameter for the iterative procedure
! nitere total number of iterations
! DEBUG
if (iprint > 2) then
if (on_root) write (stdout, '(a,/)') ' Original eigenvalues inside outer window:'
do nkp = 1, num_kpts
if (on_root) write (stdout, '(a,i3,3x,20(f9.5,1x))') ' K-point ', nkp, &
(eigval_opt(i, nkp), i=1, ndimwin(nkp))
enddo
endif
! ENDDEBUG
! TO DO: Check if this is the best place to initialize icompflag
icompflag = 0
if (on_root) write (stdout, '(1x,a)') &
'+---------------------------------------------------------------------+<-- DIS'
if (on_root) write (stdout, '(1x,a)') &
'| Iter Omega_I(i-1) Omega_I(i) Delta (frac.) Time |<-- DIS'
if (on_root) write (stdout, '(1x,a)') &
'+---------------------------------------------------------------------+<-- DIS'
dis_converged = .false.
! ------------------
! BIG ITERATION LOOP
! ------------------
do iter = 1, dis_num_iter
if (timing_level > 1 .and. on_root) call io_stopwatch('dis: extract_1', 1)
if (iter .eq. 1) then
! Initialize Z matrix at k points w/ non-frozen states
do nkp_loc = 1, counts(my_node_id)
nkp = nkp_loc + displs(my_node_id)
if (num_wann .gt. ndimfroz(nkp)) call internal_zmatrix(nkp, nkp_loc, czmat_in_loc(:, :, nkp_loc))
enddo
if (lsitesymmetry) then
call comms_gatherv(czmat_in_loc, num_bands*num_bands*counts(my_node_id), &
czmat_in, num_bands*num_bands*counts, num_bands*num_bands*displs)
call comms_bcast(czmat_in(1, 1, 1), num_bands*num_bands*num_kpts)
call sitesym_symmetrize_zmatrix(czmat_in, lwindow) !RS:
do nkp_loc = 1, counts(my_node_id)
nkp = nkp_loc + displs(my_node_id)
czmat_in_loc(:, :, nkp_loc) = czmat_in(:, :, nkp)
end do
end if
else
! [iter.ne.1]
! Update Z matrix at k points with non-frozen states, using a mixing sch
do nkp_loc = 1, counts(my_node_id)
nkp = nkp_loc + displs(my_node_id)
if (lsitesymmetry) then !YN: RS:
if (ir2ik(ik2ir(nkp)) .ne. nkp) cycle !YN: RS:
endif !YN: RS:
if (num_wann .gt. ndimfroz(nkp)) then
ndimk = ndimwin(nkp) - ndimfroz(nkp)
do i = 1, ndimk
do j = 1, i
czmat_in_loc(j, i, nkp_loc) = &
cmplx(dis_mix_ratio, 0.0_dp, dp)*czmat_out_loc(j, i, nkp_loc) &
+ cmplx(1.0_dp - dis_mix_ratio, 0.0_dp, dp)*czmat_in_loc(j, i, nkp_loc)
! hermiticity
czmat_in_loc(i, j, nkp_loc) = conjg(czmat_in_loc(j, i, nkp_loc))
enddo
enddo
endif
enddo
endif
! [if iter=1]
if (timing_level > 1 .and. on_root) call io_stopwatch('dis: extract_1', 2)
if (timing_level > 1 .and. on_root) call io_stopwatch('dis: extract_2', 1)
womegai1 = 0.0_dp
! wkomegai1 is defined by Eq. (18) of SMV.
! Contribution to wkomegai1 from frozen states should be calculated now
! every k (before updating any k), so that for iter>1 overlaps are with
! non-frozen neighboring states from the previous iteration
wkomegai1 = real(num_wann, dp)*wbtot
if (lsitesymmetry) then !RS:
do nkp = 1, nkptirr !RS:
wkomegai1(ir2ik(nkp)) = wkomegai1(ir2ik(nkp))*nsymmetry/count(kptsym(:, nkp) .eq. ir2ik(nkp)) !RS:
enddo !RS:
endif !RS:
do nkp_loc = 1, counts(my_node_id)
nkp = nkp_loc + displs(my_node_id)
wkomegai1_loc(nkp_loc) = wkomegai1(nkp)
end do
do nkp_loc = 1, counts(my_node_id)
nkp = nkp_loc + displs(my_node_id)
if (ndimfroz(nkp) .gt. 0) then
if (lsitesymmetry) call io_error('not implemented in symmetry-adapted mode') !YN: RS:
do nn = 1, nntot
nkp2 = nnlist(nkp, nn)
call zgemm('C', 'N', ndimfroz(nkp), ndimwin(nkp2), ndimwin(nkp), cmplx_1, &
u_matrix_opt(:, :, nkp), num_bands, m_matrix_orig_local(:, :, nn, nkp_loc), num_bands, cmplx_0, &
cwb, num_wann)
call zgemm('N', 'N', ndimfroz(nkp), num_wann, ndimwin(nkp2), cmplx_1, &
cwb, num_wann, u_matrix_opt(:, :, nkp2), num_bands, cmplx_0, cww, num_wann)
rsum = 0.0_dp
do n = 1, num_wann
do m = 1, ndimfroz(nkp)
rsum = rsum + real(cww(m, n), dp)**2 + aimag(cww(m, n))**2
enddo
enddo
wkomegai1_loc(nkp_loc) = wkomegai1_loc(nkp_loc) - wb(nn)*rsum
enddo
endif
enddo
if (timing_level > 1 .and. on_root) call io_stopwatch('dis: extract_2', 2)
if (timing_level > 1 .and. on_root) call io_stopwatch('dis: extract_3', 1)
!! ! send chunks of wkomegai1 to root node
!! call comms_gatherv(wkomegai1_loc, counts(my_node_id), wkomegai1, counts, displs)
!! ! send back the whole wkomegai1 array to other nodes
!! call comms_bcast(wkomegai1(1), num_kpts)
! Refine optimal subspace at k points w/ non-frozen states
do nkp_loc = 1, counts(my_node_id)
nkp = nkp_loc + displs(my_node_id)
if (lsitesymmetry) then !RS:
if (ir2ik(ik2ir(nkp)) .ne. nkp) cycle !RS:
end if !RS:
if (lsitesymmetry) then !RS:
call sitesym_dis_extract_symmetry(nkp, ndimwin(nkp), czmat_in_loc(:, :, nkp_loc), lambda, u_matrix_opt_loc(:, :, nkp_loc)) !RS:
do j = 1, num_wann !RS:
wkomegai1_loc(nkp_loc) = wkomegai1_loc(nkp_loc) - real(lambda(j, j), kind=dp) !RS:
enddo !RS:
else !RS:
if (num_wann .gt. ndimfroz(nkp)) then
! Diagonalize Z matrix
do j = 1, ndimwin(nkp) - ndimfroz(nkp)
do i = 1, j
cap(i + ((j - 1)*j)/2) = czmat_in_loc(i, j, nkp_loc)
enddo
enddo
ndiff = ndimwin(nkp) - ndimfroz(nkp)
call ZHPEVX('V', 'A', 'U', ndiff, cap, 0.0_dp, 0.0_dp, 0, 0, &
-1.0_dp, m, w, cz, num_bands, cwork, rwork, iwork, ifail, info)
if (info .lt. 0) then
if (on_root) write (stdout, *) ' *** ERROR *** ZHPEVX WHILE DIAGONALIZING Z MATRIX'
if (on_root) write (stdout, *) ' THE ', -info, ' ARGUMENT OF ZHPEVX HAD AN ILLEGAL VALUE'
call io_error(' dis_extract: error')
endif
if (info .gt. 0) then
if (on_root) write (stdout, *) ' *** ERROR *** ZHPEVX WHILE DIAGONALIZING Z MATRIX'
if (on_root) write (stdout, *) info, ' EIGENVECTORS FAILED TO CONVERGE'
call io_error(' dis_extract: error')
endif
! Update the optimal subspace by incorporating the num_wann-ndimfroz(nkp) l
! eigenvectors of the Z matrix into u_matrix_opt. Also, add contribution from
! non-frozen states to wkomegai1(nkp) (minus the corresponding eigenvalu
m = ndimfroz(nkp)
do j = ndimwin(nkp) - num_wann + 1, ndimwin(nkp) - ndimfroz(nkp)
m = m + 1
wkomegai1_loc(nkp_loc) = wkomegai1_loc(nkp_loc) - w(j)
u_matrix_opt_loc(1:ndimwin(nkp), m, nkp_loc) = cmplx_0
ndimk = ndimwin(nkp) - ndimfroz(nkp)
do i = 1, ndimk
p = indxnfroz(i, nkp)
u_matrix_opt_loc(p, m, nkp_loc) = cz(i, j)
enddo
enddo
endif
! [if num_wann>ndimfroz(nkp)]
endif !RS:
! Now that we have contribs. from both frozen and non-frozen states to
! wkomegai1(nkp), add it to womegai1
womegai1 = womegai1 + wkomegai1_loc(nkp_loc)
if (index(devel_flag, 'compspace') > 0) then
! AT THE LAST ITERATION FIND A BASIS FOR THE (NDIMWIN(NKP)-num_wann)-DIMENS
! COMPLEMENT SPACE
if (iter .eq. dis_num_iter) then
allocate (camp(num_bands, num_bands, num_kpts), stat=ierr)
if (ierr /= 0) call io_error('Error allocating camp in dis_extract')
allocate (camp_loc(num_bands, num_bands, max(1, counts(my_node_id))), stat=ierr)
if (ierr /= 0) call io_error('Error allocating ucamp_loc in dis_extract')
if (ndimwin(nkp) .gt. num_wann) then
do j = 1, ndimwin(nkp) - num_wann
if (num_wann .gt. ndimfroz(nkp)) then
! USE THE NON-LEADING EIGENVECTORS OF THE Z-MATRIX
camp_loc(1:ndimwin(nkp), j, nkp_loc) = cz(1:ndimwin(nkp), j)
else
! Then num_wann=NDIMFROZ(NKP)
! USE THE ORIGINAL NON-FROZEN BLOCH EIGENSTATES
do i = 1, ndimwin(nkp)
camp_loc(i, j, nkp_loc) = cmplx_0
if (i .eq. indxnfroz(j, nkp)) camp_loc(i, j, nkp_loc) = cmplx_1
enddo
endif
enddo
else
icompflag = 1
endif
endif
end if ! index(devel_flag,'compspace')>0
enddo
! [Loop over k points (nkp)]
!! ! send chunks of wkomegai1 to root node
!! call comms_gatherv(wkomegai1_loc, counts(my_node_id), wkomegai1, counts, displs)
!! ! send back the whole wkomegai1 array to other nodes
!! call comms_bcast(wkomegai1(1), num_kpts)
call comms_allreduce(womegai1, 1, 'SUM')
call comms_gatherv(u_matrix_opt_loc, num_bands*num_wann*counts(my_node_id), &
u_matrix_opt, num_bands*num_wann*counts, num_bands*num_wann*displs)
call comms_bcast(u_matrix_opt(1, 1, 1), num_bands*num_wann*num_kpts)
if (lsitesymmetry) call sitesym_symmetrize_u_matrix(num_bands, u_matrix_opt, lwindow) !RS:
if (index(devel_flag, 'compspace') > 0) then
if (iter .eq. dis_num_iter) then
call comms_gatherv(camp_loc, num_bands*num_bands*counts(my_node_id), &
camp, num_bands*num_bands*counts, num_bands*num_bands*displs)
call comms_bcast(camp(1, 1, 1), num_bands*num_bands*num_kpts)
endif
endif
if (timing_level > 1 .and. on_root) call io_stopwatch('dis: extract_3', 2)
womegai1 = womegai1/real(num_kpts, dp)
! DEBUG
! Orthonormality check
! do nkp=1,nkpts
! write(*,*) ' '
! write(*,'(a8,i4)') 'k-point ',nkp
! do l=1,num_wann
! do m=1,l
! ctmp=czero
! do j=1,ndimwin(nkp)
! ctmp=ctmp+conjg(u_matrix_opt(j,m,nkp))*u_matrix_opt(j,l,nkp)
! enddo
! write(*,'(i2,2x,i2,f16.12,1x,f16.12)') l,m,ctmp
! if(l.eq.m) then
! if(abs(ctmp-cmplx(1.0d0,0.0d0)).gt.1.0e-8) then
! write(*,'(a49,i4)')
! 1 '*** ERROR *** with iterative subspace at k-point ',
! 2 nkp
! write(*,*) 'vectors in u_matrix_opt not orthonormal'
! stop
! endif
! else
! if(abs(ctmp).gt.1.0e-8) then
! write(*,'(a49,i4)')
! 1 '*** ERROR *** with iterative subspace at k-point ',
! 2 nkp
! write(*,*) 'vectors in u_matrix_opt not orthonormal'
! stop
! endif
! endif
! enddo
! enddo
! enddo
! ENDDEBUG
! Compute womegai using the updated subspaces at all k, i.e.,
! replacing (i-1) by (i) in Eq. (12) SMV
if (timing_level > 1 .and. on_root) call io_stopwatch('dis: extract_4', 1)
womegai = 0.0_dp
do nkp_loc = 1, counts(my_node_id)
nkp = nkp_loc + displs(my_node_id)
wkomegai = 0.0_dp
do nn = 1, nntot
nkp2 = nnlist(nkp, nn)
call zgemm('C', 'N', num_wann, ndimwin(nkp2), ndimwin(nkp), cmplx_1, &
u_matrix_opt(:, :, nkp), num_bands, m_matrix_orig_local(:, :, nn, nkp_loc), num_bands, cmplx_0, &
cwb, num_wann)
call zgemm('N', 'N', num_wann, num_wann, ndimwin(nkp2), cmplx_1, &
cwb, num_wann, u_matrix_opt(:, :, nkp2), num_bands, cmplx_0, cww, num_wann)
rsum = 0.0_dp
do n = 1, num_wann
do m = 1, num_wann
rsum = rsum + real(cww(m, n), dp)**2 + aimag(cww(m, n))**2
enddo
enddo
wkomegai = wkomegai + wb(nn)*rsum
enddo
wkomegai = real(num_wann, dp)*wbtot - wkomegai
womegai = womegai + wkomegai
enddo
call comms_allreduce(womegai, 1, 'SUM')
womegai = womegai/real(num_kpts, dp)
! [Loop over k (nkp)]
if (timing_level > 1 .and. on_root) call io_stopwatch('dis: extract_4', 2)
delta_womegai = womegai1/womegai - 1.0_dp
if (on_root) write (stdout, 124) iter, womegai1*lenconfac**2, womegai*lenconfac**2, &
delta_womegai, io_wallclocktime()
124 format(2x, i6, 3x, f14.8, 3x, f14.8, 6x, es10.3, 2x, f8.2, 4x, '<-- DIS')
! Construct the updated Z matrix, CZMAT_OUT, at k points w/ non-frozen s
do nkp_loc = 1, counts(my_node_id)
nkp = nkp_loc + displs(my_node_id)
if (num_wann .gt. ndimfroz(nkp)) call internal_zmatrix(nkp, nkp_loc, czmat_out_loc(:, :, nkp_loc))
enddo
if (lsitesymmetry) then
call comms_gatherv(czmat_out_loc, num_bands*num_bands*counts(my_node_id), &
czmat_out, num_bands*num_bands*counts, num_bands*num_bands*displs)
call comms_bcast(czmat_out(1, 1, 1), num_bands*num_bands*num_kpts)
call sitesym_symmetrize_zmatrix(czmat_out, lwindow) !RS:
do nkp_loc = 1, counts(my_node_id)
nkp = nkp_loc + displs(my_node_id)
czmat_out_loc(:, :, nkp_loc) = czmat_out(:, :, nkp)
end do
end if
call internal_test_convergence()
if (dis_converged) then
if (on_root) write (stdout, '(/13x,a,es10.3,a,i2,a)') &
'<<< Delta <', dis_conv_tol, &
' over ', dis_conv_window, ' iterations >>>'
if (on_root) write (stdout, '(13x,a)') '<<< Disentanglement convergence criteria satisfied >>>'
exit
endif
enddo
! [BIG ITERATION LOOP (iter)]
deallocate (czmat_out, stat=ierr)
if (ierr /= 0) call io_error('Error deallocating czmat_out in dis_extract')
deallocate (czmat_in, stat=ierr)
if (ierr /= 0) call io_error('Error deallocating czmat_in in dis_extract')
deallocate (czmat_out_loc, stat=ierr)
if (ierr /= 0) call io_error('Error deallocating czmat_out_loc in dis_extract')
deallocate (czmat_in_loc, stat=ierr)
if (ierr /= 0) call io_error('Error deallocating czmat_in_loc in dis_extract')
if (on_root) then
allocate (ceamp(num_bands, num_bands, num_kpts), stat=ierr)
if (ierr /= 0) call io_error('Error allocating ceamp in dis_extract')
allocate (cham(num_bands, num_bands, num_kpts), stat=ierr)
if (ierr /= 0) call io_error('Error allocating cham in dis_extract')
endif
if (.not. dis_converged) then
if (on_root) write (stdout, '(/5x,a)') &
'<<< Warning: Maximum number of disentanglement iterations reached >>>'
if (on_root) write (stdout, '(10x,a)') '<<< Disentanglement convergence criteria not satisfied >>>'
endif
if (index(devel_flag, 'compspace') > 0) then
if (icompflag .eq. 1) then
if (iprint > 2) then
if (on_root) write (stdout, ('(/4x,a)')) &
'WARNING: Complement subspace has zero dimensions at the following k-points:'
i = 0
if (on_root) write (stdout, '(4x)', advance='no')
do nkp = 1, num_kpts
if (ndimwin(nkp) .eq. num_wann) then
i = i + 1
if (i .le. 12) then
if (on_root) write (stdout, '(i6)', advance='no') nkp
else
i = 1
if (on_root) write (stdout, '(/4x)', advance='no')
if (on_root) write (stdout, '(i6)', advance='no') nkp
endif
endif
enddo
endif
endif
endif
! Write the final womegai. This should remain unchanged during the
! subsequent minimization of Omega_tilde in wannierise.f90
! We store it in the checkpoint file as a sanity check
if (on_root) write (stdout, '(/8x,a,f14.8,a/)') 'Final Omega_I ', &
womegai*lenconfac**2, ' ('//trim(length_unit)//'^2)'
! Set public variable omega_invariant
omega_invariant = womegai
! Currently, this part is not parallelized; thus, we perform the task only on root and then broadcast the result.
if (on_root) then
! DIAGONALIZE THE HAMILTONIAN WITHIN THE OPTIMIZED SUBSPACES
do nkp = 1, num_kpts
do j = 1, num_wann
do i = 1, num_wann
cham(i, j, nkp) = cmplx_0
do l = 1, ndimwin(nkp)
cham(i, j, nkp) = cham(i, j, nkp) + conjg(u_matrix_opt(l, i, nkp)) &
*u_matrix_opt(l, j, nkp)*eigval_opt(l, nkp)
enddo
enddo
enddo
do j = 1, num_wann
do i = 1, j
cap(i + ((j - 1)*j)/2) = cham(i, j, nkp)
enddo
enddo
call ZHPEVX('V', 'A', 'U', num_wann, cap, 0.0_dp, 0.0_dp, 0, 0, -1.0_dp, &
m, w, cz, num_bands, cwork, rwork, iwork, ifail, info)
if (info .lt. 0) then
if (on_root) write (stdout, *) ' *** ERROR *** ZHPEVX WHILE DIAGONALIZING HAMILTONIAN'
if (on_root) write (stdout, *) ' THE ', -info, ' ARGUMENT OF ZHPEVX HAD AN ILLEGAL VALUE'
call io_error(' dis_extract: error')
endif
if (info .gt. 0) then
if (on_root) write (stdout, *) ' *** ERROR *** ZHPEVX WHILE DIAGONALIZING HAMILTONIAN'
if (on_root) write (stdout, *) info, 'EIGENVECTORS FAILED TO CONVERGE'
call io_error(' dis_extract: error')
endif
! Store the energy eigenvalues of the optimal subspace (used in wann_ban
eigval_opt(1:num_wann, nkp) = w(1:num_wann)
! CALCULATE AMPLITUDES OF THE CORRESPONDING ENERGY EIGENVECTORS IN TERMS
! THE ORIGINAL ("WINDOW SPACE") ENERGY EIGENVECTORS
do j = 1, num_wann
do i = 1, ndimwin(nkp)
ceamp(i, j, nkp) = cmplx_0
do l = 1, num_wann
ceamp(i, j, nkp) = ceamp(i, j, nkp) + cz(l, j)*u_matrix_opt(i, l, nkp)
enddo
enddo
enddo
! NKP
enddo
! DEBUG
if (iprint > 2) then
if (on_root) write (stdout, '(/,a,/)') ' Eigenvalues inside optimal subspace:'
do nkp = 1, num_kpts
if (on_root) write (stdout, '(a,i3,2x,20(f9.5,1x))') ' K-point ', &
nkp, (eigval_opt(i, nkp), i=1, num_wann)
enddo
endif
! ENDDEBUG
! Replace u_matrix_opt by ceamp. Both span the
! same space, but the latter is more convenient for the purpose of obtai
! an optimal Fourier-interpolated band structure: see Sec. III.E of SMV.
if (.not. lsitesymmetry) then !YN:
do nkp = 1, num_kpts
do j = 1, num_wann
u_matrix_opt(1:ndimwin(nkp), j, nkp) = ceamp(1:ndimwin(nkp), j, nkp)
enddo
enddo
!else !YN:
! Above is skipped as we require Uopt(Rk) to be related to Uopt(k) !YN: RS:
!write(stdout,"(a)") & !YN: RS:
! 'Note(symmetry-adapted mode): u_matrix_opt are no longer the eigenstates of the subspace Hamiltonian.' !RS:
endif !YN:
endif
call comms_bcast(eigval_opt(1, 1), num_bands*num_kpts)
call comms_bcast(u_matrix_opt(1, 1, 1), num_bands*num_wann*num_kpts)
if (index(devel_flag, 'compspace') > 0) then
if (icompflag .eq. 1) then
if (iprint > 2) then
if (on_root) write (stdout, *) 'AT SOME K-POINT(S) COMPLEMENT SUBSPACE HAS ZERO DIMENSIONALITY'
if (on_root) write (stdout, *) '=> DID NOT CREATE FILE COMPSPACE.DAT'
endif
else
! DIAGONALIZE THE HAMILTONIAN IN THE COMPLEMENT SUBSPACE, WRITE THE
! CORRESPONDING EIGENFUNCTIONS AND ENERGY EIGENVALUES
do nkp = 1, num_kpts
do j = 1, ndimwin(nkp) - num_wann
do i = 1, ndimwin(nkp) - num_wann
cham(i, j, nkp) = cmplx_0
do l = 1, ndimwin(nkp)
cham(i, j, nkp) = cham(i, j, nkp) + conjg(camp(l, i, nkp)) &
*camp(l, j, nkp)*eigval_opt(l, nkp)
enddo
enddo
enddo
do j = 1, ndimwin(nkp) - num_wann
do i = 1, j
cap(i + ((j - 1)*j)/2) = cham(i, j, nkp)
enddo
enddo
ndiff = ndimwin(nkp) - num_wann
call ZHPEVX('V', 'A', 'U', ndiff, cap, 0.0_dp, 0.0_dp, 0, 0, &
-1.0_dp, m, w, cz, num_bands, cwork, rwork, iwork, ifail, info)
if (info .lt. 0) then
if (on_root) write (stdout, *) '*** ERROR *** ZHPEVX WHILE DIAGONALIZING HAMILTONIAN'
if (on_root) write (stdout, *) 'THE ', -info, ' ARGUMENT OF ZHPEVX HAD AN ILLEGAL VALUE'
call io_error(' dis_extract: error')
endif
if (info .gt. 0) then
if (on_root) write (stdout, *) '*** ERROR *** ZHPEVX WHILE DIAGONALIZING HAMILTONIAN'
if (on_root) write (stdout, *) info, 'EIGENVECTORS FAILED TO CONVERGE'
call io_error(' dis_extract: error')
endif
! CALCULATE AMPLITUDES OF THE ENERGY EIGENVECTORS IN THE COMPLEMENT SUBS
! TERMS OF THE ORIGINAL ENERGY EIGENVECTORS
do j = 1, ndimwin(nkp) - num_wann
do i = 1, ndimwin(nkp)
camp(i, j, nkp) = cmplx_0
do l = 1, ndimwin(nkp) - num_wann
!write(stdout,*) 'i=',i,' j=',j,' l=',l
!write(stdout,*) ' camp(i,j,nkp)=',camp(i,j,nkp)
!write(stdout,*) ' cz(l,j)=',cz(l,j)
!write(stdout,*) ' u_matrix_opt(i,l,nkp)=',u_matrix_opt(i,l,nkp)
! aam: 20/10/2006 -- the second dimension of u_matrix_opt is out of bounds (allocated as num_wann)!
! commenting this line out.
! camp(i,j,nkp) = camp(i,j,nkp) + cz(l,j) * u_matrix_opt(i,l,nkp)
enddo
enddo
enddo
enddo ! [loop over k points (nkp)]
endif ! [if icompflag=1]
endif ![if(index(devel_flag,'compspace')>0)]
deallocate (history, stat=ierr)
if (ierr /= 0) call io_error('Error deallocating history in dis_extract')
if (on_root) then
deallocate (cham, stat=ierr)
if (ierr /= 0) call io_error('Error deallocating cham in dis_extract')
endif
if (allocated(camp)) then
deallocate (camp, stat=ierr)
if (ierr /= 0) call io_error('Error deallocating camp in dis_extract')
end if
if (allocated(camp_loc)) then
deallocate (camp_loc, stat=ierr)
if (ierr /= 0) call io_error('Error deallocating camp_loc in dis_extract')
endif
if (on_root) then
deallocate (ceamp, stat=ierr)
if (ierr /= 0) call io_error('Error deallocating ceamp in dis_extract')
endif
deallocate (u_matrix_opt_loc, stat=ierr)
if (ierr /= 0) call io_error('Error deallocating u_matrix_opt_loc in dis_extract')
deallocate (wkomegai1_loc, stat=ierr)
if (ierr /= 0) call io_error('Error deallocating wkomegai1_loc in dis_extract')
deallocate (wkomegai1, stat=ierr)
if (ierr /= 0) call io_error('Error deallocating wkomegai1 in dis_extract')
deallocate (cz, stat=ierr)
if (ierr /= 0) call io_error('Error deallocating cz in dis_extract')
deallocate (cwork, stat=ierr)
if (ierr /= 0) call io_error('Error deallocating cwork in dis_extract')
deallocate (cap, stat=ierr)
if (ierr /= 0) call io_error('Error deallocating cap in dis_extract')
deallocate (rwork, stat=ierr)
if (ierr /= 0) call io_error('Error deallocating rwork in dis_extract')
deallocate (w, stat=ierr)
if (ierr /= 0) call io_error('Error deallocating w in dis_extract')
deallocate (ifail, stat=ierr)
if (ierr /= 0) call io_error('Error deallocating ifail in dis_extract')
deallocate (iwork, stat=ierr)
if (ierr /= 0) call io_error('Error deallocating iwork in dis_extract')
deallocate (cbw, stat=ierr)
if (ierr /= 0) call io_error('Error deallocating cbw in dis_extract')
deallocate (cww, stat=ierr)
if (ierr /= 0) call io_error('Error deallocating cww in dis_extract')
deallocate (cwb, stat=ierr)
if (ierr /= 0) call io_error('Error deallocating cwb in dis_extract')
if (on_root) write (stdout, '(1x,a/)') &
'+----------------------------------------------------------------------------+'
if (timing_level > 1 .and. on_root) call io_stopwatch('dis: extract', 2)
return
contains
subroutine internal_test_convergence()
!! Check if we have converged
implicit none
integer :: ierr
real(kind=dp), allocatable :: temp_hist(:)
allocate (temp_hist(dis_conv_window), stat=ierr)
if (ierr /= 0) call io_error('Error allocating temp_hist in dis_extract')
if (iter .le. dis_conv_window) then
history(iter) = delta_womegai
else
temp_hist = eoshift(history, 1, delta_womegai)
history = temp_hist
endif
dis_converged = .false.
if (iter .ge. dis_conv_window) then
dis_converged = all(abs(history) .lt. dis_conv_tol)
endif
deallocate (temp_hist, stat=ierr)
if (ierr /= 0) call io_error('Error deallocating temp_hist in dis_extract')
return
end subroutine internal_test_convergence
!==================================================================!
subroutine internal_zmatrix(nkp, nkp_loc, cmtrx)
!==================================================================!
!! Compute the Z-matrix
! !
! !
! !
!==================================================================!
implicit none
integer, intent(in) :: nkp
integer, intent(in) :: nkp_loc
!! Which kpoint
complex(kind=dp), intent(out) :: cmtrx(num_bands, num_bands)
!! (M,N)-TH ENTRY IN THE (NDIMWIN(NKP)-NDIMFROZ(NKP)) x (NDIMWIN(NKP)-NDIMFRO
!! HERMITIAN MATRIX AT THE NKP-TH K-POINT
! Internal variables
integer :: l, m, n, p, q, nn, nkp2, ndimk
complex(kind=dp) :: csum
if (timing_level > 1 .and. on_root) call io_stopwatch('dis: extract: zmatrix', 1)
cmtrx = cmplx_0
ndimk = ndimwin(nkp) - ndimfroz(nkp)
do nn = 1, nntot
nkp2 = nnlist(nkp, nn)
call zgemm('N', 'N', num_bands, num_wann, ndimwin(nkp2), cmplx_1, &
m_matrix_orig_local(:, :, nn, nkp_loc), num_bands, u_matrix_opt(:, :, nkp2), num_bands, &
cmplx_0, cbw, num_bands)
do n = 1, ndimk
q = indxnfroz(n, nkp)
do m = 1, n
p = indxnfroz(m, nkp)
csum = cmplx_0
do l = 1, num_wann
csum = csum + cbw(p, l)*conjg(cbw(q, l))
enddo
cmtrx(m, n) = cmtrx(m, n) + cmplx(wb(nn), 0.0_dp, kind=dp)*csum
cmtrx(n, m) = conjg(cmtrx(m, n))
enddo
enddo
enddo
if (timing_level > 1 .and. on_root) call io_stopwatch('dis: extract: zmatrix', 2)
return
end subroutine internal_zmatrix
!~ !==================================================================!
!~! function dis_zeig(nkp,m,cmk)
!~ function dis_zeig(nkp,m)
!~ !==================================================================!
!~ ! !
!~ ! !
!~ ! !
!~ ! !
!~ !==================================================================!
!~
!~ ! Computes <lambda>_mk = sum_{n=1}^N sum_b w_b |<u_{mk}|u_{n,k+b}>|^2
!~ ! [See Eqs. (12) and (17) of SMV]
!~
!~ implicit none
!~
!~ integer, intent(in) :: nkp
!~ integer, intent(in) :: m
!~! complex(kind=dp), intent(in) :: cmk(num_bands,num_bands,nntot)
!~
!~ ! Internal variables
!~ real(kind=dp) :: dis_zeig
!~ complex(kind=dp) :: cdot_bloch
!~ integer :: n,nn,ndnnx,ndnn,nnsh,nkp2,l,j
!~
!~ dis_zeig=0.0_dp
!~
!~! do nn=1,nntot
!~! nkp2=nnlist(nkp,nn)
!~ do n = 1, num_wann
!~ do nn = 1, nntot
!~ nkp2 = nnlist(nkp,nn)
!~ ! Dotproduct
!~ cdot_bloch = cmplx_0
!~ do l = 1, ndimwin(nkp)
!~ do j = 1, ndimwin(nkp2)
!~ cdot_bloch = cdot_bloch + &
!~! conjg(u_matrix_opt(l,m,nkp)) * u_matrix_opt(j,n,nkp2) * cmk(l,j,nn)
!~ conjg(u_matrix_opt(l,m,nkp)) * u_matrix_opt(j,n,nkp2) * m_matrix_orig(l,j,nn,nkp)
!~ enddo
!~ enddo
!~ write(stdout,'(a,4i5,2f15.10)') 'zeig:',nkp,nn,m,n,cdot_bloch
!~! call zgemm('C','N',num_wann,ndimwin(nkp2),ndimwin(nkp),cmplx_1,&
!~! u_matrix_opt(:,:,nkp),num_bands,m_matrix_orig(:,:,nn,nkp),num_bands,cmplx_0,&
!~! cwb,num_wann)
!~! call zgemm('N','N',num_wann,num_wann,ndimwin(nkp2),cmplx_1,&
!~! cwb,num_wann,u_matrix_opt(:,:,nkp),num_bands,cmplx_0,cww,num_wann)
!~
!~ dis_zeig = dis_zeig + wb(nn) * abs(cdot_bloch)**2
!~
!~! do n=1,num_wann
!~! dis_zeig = dis_zeig + wb(nn) * abs(cww(m,n))**2
!~! enddo
!~
!~ enddo
!~ enddo
!~
!~ return
!~
!~ end function dis_zeig
end subroutine dis_extract