get_oper.F90 Source File


This file depends on

sourcefile~~get_oper.f90~~EfferentGraph sourcefile~get_oper.f90 get_oper.F90 sourcefile~utility.f90 utility.F90 sourcefile~get_oper.f90->sourcefile~utility.f90 sourcefile~parameters.f90 parameters.F90 sourcefile~get_oper.f90->sourcefile~parameters.f90 sourcefile~constants.f90 constants.F90 sourcefile~get_oper.f90->sourcefile~constants.f90 sourcefile~io.f90 io.F90 sourcefile~get_oper.f90->sourcefile~io.f90 sourcefile~comms.f90 comms.F90 sourcefile~get_oper.f90->sourcefile~comms.f90 sourcefile~postw90_common.f90 postw90_common.F90 sourcefile~get_oper.f90->sourcefile~postw90_common.f90 sourcefile~utility.f90->sourcefile~constants.f90 sourcefile~utility.f90->sourcefile~io.f90 sourcefile~parameters.f90->sourcefile~utility.f90 sourcefile~parameters.f90->sourcefile~constants.f90 sourcefile~parameters.f90->sourcefile~io.f90 sourcefile~parameters.f90->sourcefile~comms.f90 sourcefile~io.f90->sourcefile~constants.f90 sourcefile~comms.f90->sourcefile~constants.f90 sourcefile~comms.f90->sourcefile~io.f90 sourcefile~postw90_common.f90->sourcefile~utility.f90 sourcefile~postw90_common.f90->sourcefile~parameters.f90 sourcefile~postw90_common.f90->sourcefile~constants.f90 sourcefile~postw90_common.f90->sourcefile~io.f90 sourcefile~postw90_common.f90->sourcefile~comms.f90 sourcefile~ws_distance.f90 ws_distance.F90 sourcefile~postw90_common.f90->sourcefile~ws_distance.f90 sourcefile~ws_distance.f90->sourcefile~utility.f90 sourcefile~ws_distance.f90->sourcefile~parameters.f90 sourcefile~ws_distance.f90->sourcefile~constants.f90 sourcefile~ws_distance.f90->sourcefile~io.f90

Files dependent on this one

sourcefile~~get_oper.f90~~AfferentGraph sourcefile~get_oper.f90 get_oper.F90 sourcefile~geninterp.f90 geninterp.F90 sourcefile~geninterp.f90->sourcefile~get_oper.f90 sourcefile~wan_ham.f90 wan_ham.F90 sourcefile~geninterp.f90->sourcefile~wan_ham.f90 sourcefile~berry.f90 berry.F90 sourcefile~berry.f90->sourcefile~get_oper.f90 sourcefile~spin.f90 spin.F90 sourcefile~berry.f90->sourcefile~spin.f90 sourcefile~berry.f90->sourcefile~wan_ham.f90 sourcefile~gyrotropic.f90 gyrotropic.F90 sourcefile~gyrotropic.f90->sourcefile~get_oper.f90 sourcefile~gyrotropic.f90->sourcefile~berry.f90 sourcefile~gyrotropic.f90->sourcefile~spin.f90 sourcefile~gyrotropic.f90->sourcefile~wan_ham.f90 sourcefile~kpath.f90 kpath.F90 sourcefile~kpath.f90->sourcefile~get_oper.f90 sourcefile~kpath.f90->sourcefile~berry.f90 sourcefile~kpath.f90->sourcefile~spin.f90 sourcefile~spin.f90->sourcefile~get_oper.f90 sourcefile~kslice.f90 kslice.F90 sourcefile~kslice.f90->sourcefile~get_oper.f90 sourcefile~kslice.f90->sourcefile~berry.f90 sourcefile~kslice.f90->sourcefile~spin.f90 sourcefile~kslice.f90->sourcefile~wan_ham.f90 sourcefile~boltzwann.f90 boltzwann.F90 sourcefile~boltzwann.f90->sourcefile~get_oper.f90 sourcefile~boltzwann.f90->sourcefile~spin.f90 sourcefile~dos.f90 dos.F90 sourcefile~boltzwann.f90->sourcefile~dos.f90 sourcefile~boltzwann.f90->sourcefile~wan_ham.f90 sourcefile~dos.f90->sourcefile~get_oper.f90 sourcefile~dos.f90->sourcefile~spin.f90 sourcefile~dos.f90->sourcefile~wan_ham.f90 sourcefile~wan_ham.f90->sourcefile~get_oper.f90 sourcefile~postw90.f90 postw90.F90 sourcefile~postw90.f90->sourcefile~geninterp.f90 sourcefile~postw90.f90->sourcefile~berry.f90 sourcefile~postw90.f90->sourcefile~gyrotropic.f90 sourcefile~postw90.f90->sourcefile~kpath.f90 sourcefile~postw90.f90->sourcefile~spin.f90 sourcefile~postw90.f90->sourcefile~kslice.f90 sourcefile~postw90.f90->sourcefile~boltzwann.f90 sourcefile~postw90.f90->sourcefile~dos.f90

Contents

Source Code


Source Code

!-*- mode: F90 -*-!
!------------------------------------------------------------!
! This file is distributed as part of the Wannier90 code and !
! under the terms of the GNU General Public License. See the !
! file `LICENSE' in the root directory of the Wannier90      !
! distribution, or http://www.gnu.org/copyleft/gpl.txt       !
!                                                            !
! The webpage of the Wannier90 code is www.wannier.org       !
!                                                            !
! The Wannier90 code is hosted on GitHub:                    !
!                                                            !
! https://github.com/wannier-developers/wannier90            !
!------------------------------------------------------------!

module w90_get_oper
!===========================================================
!! Finds the Wannier matrix elements of various operators,
!! starting from k-space matrices generated by an interface
!! (e.g., pw2wannier90) to an ab initio package
!! (e.g., quantum-espresso)
!===========================================================

  use w90_constants, only: dp

  implicit none

  public

  private :: fourier_q_to_R, get_win_min

  complex(kind=dp), allocatable, save :: HH_R(:, :, :) !  <0n|r|Rm>
  !! $$\langle 0n | H | Rm \rangle$$

  complex(kind=dp), allocatable, save :: AA_R(:, :, :, :) ! <0n|r|Rm>
  !! $$\langle 0n |  \hat{r} | Rm \rangle$$

  complex(kind=dp), allocatable, save :: BB_R(:, :, :, :) ! <0|H(r-R)|R>
  !! $$\langle 0n | H(\hat{r}-R) | Rm \rangle$$

  complex(kind=dp), allocatable, save :: CC_R(:, :, :, :, :) ! <0|r_alpha.H(r-R)_beta|R>
  !! $$\langle 0n | \hat{r}_{\alpha}.H.(\hat{r}- R)_{\beta} | Rm \rangle$$

  complex(kind=dp), allocatable, save :: FF_R(:, :, :, :, :) ! <0|r_alpha.(r-R)_beta|R>
  !! $$\langle 0n | \hat{r}_{\alpha}.(\hat{r}-R)_{\beta} | Rm \rangle$$

  complex(kind=dp), allocatable, save :: SS_R(:, :, :, :) ! <0n|sigma_x,y,z|Rm>
  !! $$\langle 0n | \sigma_{x,y,z} | Rm \rangle$$

  complex(kind=dp), allocatable, save :: SR_R(:, :, :, :, :) ! <0n|sigma_x,y,z.(r-R)_alpha|Rm>
  !! $$\langle 0n | \sigma_{x,y,z}.(\hat{r}-R)_{\alpha}  | Rm \rangle$$

  complex(kind=dp), allocatable, save :: SHR_R(:, :, :, :, :) ! <0n|sigma_x,y,z.H.(r-R)_alpha|Rm>
  !! $$\langle 0n | \sigma_{x,y,z}.H.(\hat{r}-R)_{\alpha}  | Rm \rangle$$

  complex(kind=dp), allocatable, save :: SH_R(:, :, :, :) ! <0n|sigma_x,y,z.H|Rm>
  !! $$\langle 0n | \sigma_{x,y,z}.H  | Rm \rangle$$

contains

  !======================================================!
  !                   PUBLIC PROCEDURES                  !
  !======================================================!

  !======================================================
  subroutine get_HH_R
    !======================================================
    !
    !! computes <0n|H|Rm>, in eV
    !! (pwscf uses Ry, but pw2wannier90 converts to eV)
    !
    !======================================================

    use w90_constants, only: dp, cmplx_0
    use w90_io, only: io_error, stdout, io_stopwatch, &
      io_file_unit, seedname
    use w90_parameters, only: num_wann, ndimwin, num_kpts, num_bands, &
      eigval, u_matrix, have_disentangled, &
      timing_level, scissors_shift, &
      num_valence_bands, effective_model, &
      real_lattice
    use w90_postw90_common, only: nrpts, rpt_origin, v_matrix, ndegen, irvec, crvec
    use w90_comms, only: on_root, comms_bcast

    integer                       :: i, j, n, m, ii, ik, winmin_q, file_unit, &
                                     ir, io, idum, ivdum(3), ivdum_old(3)
    integer, allocatable          :: num_states(:)
    real(kind=dp)                 :: rdum_real, rdum_imag
    complex(kind=dp), allocatable :: HH_q(:, :, :)
    logical                       :: new_ir

    !ivo
    complex(kind=dp), allocatable :: sciss_q(:, :, :)
    complex(kind=dp), allocatable :: sciss_R(:, :, :)
    real(kind=dp)                 :: sciss_shift

    if (timing_level > 1 .and. on_root) call io_stopwatch('get_oper: get_HH_R', 1)

    if (.not. allocated(HH_R)) then
      allocate (HH_R(num_wann, num_wann, nrpts))
    else
      if (timing_level > 1 .and. on_root) call io_stopwatch('get_oper: get_HH_R', 2)
      return
    end if

    ! Real-space Hamiltonian H(R) is read from file
    !
    if (effective_model) then
      HH_R = cmplx_0
      if (on_root) then
        write (stdout, '(/a)') ' Reading real-space Hamiltonian from file ' &
          //trim(seedname)//'_HH_R.dat'
        file_unit = io_file_unit()
        open (file_unit, file=trim(seedname)//'_HH_R.dat', form='formatted', &
              status='old', err=101)
        read (file_unit, *) ! header
        read (file_unit, *) idum ! num_wann
        read (file_unit, *) idum ! nrpts
        ir = 1
        new_ir = .true.
        ivdum_old(:) = 0
        n = 1
        do
          read (file_unit, '(5I5,2F12.6)', iostat=io) ivdum(1:3), j, i, &
            rdum_real, rdum_imag
          if (io < 0) exit ! reached end of file
          if (i < 1 .or. i > num_wann .or. j < 1 .or. j > num_wann) then
            write (stdout, *) 'num_wann=', num_wann, '  i=', i, '  j=', j
            call io_error &
              ('Error in get_HH_R: orbital indices out of bounds')
          endif
          if (n > 1) then
            if (ivdum(1) /= ivdum_old(1) .or. ivdum(2) /= ivdum_old(2) .or. &
                ivdum(3) /= ivdum_old(3)) then
              ir = ir + 1
              new_ir = .true.
            else
              new_ir = .false.
            endif
          endif
          ivdum_old = ivdum
          ! Note that the same (j,i,ir) may occur more than once in
          ! the file seedname_HH_R.dat, hence the addition instead
          ! of a simple equality. (This has to do with the way the
          ! Berlijn effective Hamiltonian algorithm is
          ! implemented.)
          HH_R(j, i, ir) = HH_R(j, i, ir) + cmplx(rdum_real, rdum_imag, kind=dp)
          if (new_ir) then
            irvec(:, ir) = ivdum(:)
            if (ivdum(1) == 0 .and. ivdum(2) == 0 .and. ivdum(3) == 0) rpt_origin = ir
          endif
          n = n + 1
        enddo
        close (file_unit)
        if (ir /= nrpts) then
          write (stdout, *) 'ir=', ir, '  nrpts=', nrpts
          call io_error('Error in get_HH_R: inconsistent nrpts values')
        endif
        do ir = 1, nrpts
          crvec(:, ir) = matmul(transpose(real_lattice), irvec(:, ir))
        end do
        ndegen(:) = 1 ! This is assumed when reading HH_R from file
        !
        ! TODO: Implement scissors in this case? Need to choose a
        ! uniform k-mesh (the scissors correction is applied in
        ! k-space) and then proceed as below, Fourier transforming
        ! back to real space and adding to HH_R, Hopefully the
        ! result converges (rapidly) with the k-mesh density, but
        ! one should check
        !
        if (abs(scissors_shift) > 1.0e-7_dp) &
          call io_error( &
          'Error in get_HH_R: scissors shift not implemented for ' &
          //'effective_model=T')
      endif
      call comms_bcast(HH_R(1, 1, 1), num_wann*num_wann*nrpts)
      call comms_bcast(ndegen(1), nrpts)
      call comms_bcast(irvec(1, 1), 3*nrpts)
      call comms_bcast(crvec(1, 1), 3*nrpts)
      if (timing_level > 1 .and. on_root) call io_stopwatch('get_oper: get_HH_R', 2)
      return
    endif

    ! Everything below is only executed if effective_model==False (default)

    ! Real-space Hamiltonian H(R) is calculated by Fourier
    ! transforming H(q) defined on the ab-initio reciprocal mesh
    !
    allocate (HH_q(num_wann, num_wann, num_kpts))
    allocate (num_states(num_kpts))

    HH_q = cmplx_0
    do ik = 1, num_kpts
      if (have_disentangled) then
        num_states(ik) = ndimwin(ik)
      else
        num_states(ik) = num_wann
      endif
      call get_win_min(ik, winmin_q)
      do m = 1, num_wann
        do n = 1, m
          do i = 1, num_states(ik)
            ii = winmin_q + i - 1
            HH_q(n, m, ik) = HH_q(n, m, ik) &
                             + conjg(v_matrix(i, n, ik))*eigval(ii, ik) &
                             *v_matrix(i, m, ik)
          enddo
          HH_q(m, n, ik) = conjg(HH_q(n, m, ik))
        enddo
      enddo
    enddo
    call fourier_q_to_R(HH_q, HH_R)

    ! Scissors correction for an insulator: shift conduction bands upwards by
    ! scissors_shift eV
    !
    if (num_valence_bands > 0 .and. abs(scissors_shift) > 1.0e-7_dp) then
      allocate (sciss_R(num_wann, num_wann, nrpts))
      allocate (sciss_q(num_wann, num_wann, num_kpts))
      sciss_q = cmplx_0
      do ik = 1, num_kpts
        do j = 1, num_wann
          do i = 1, j
            do m = 1, num_valence_bands
              sciss_q(i, j, ik) = sciss_q(i, j, ik) - &
                                  conjg(u_matrix(m, i, ik))*u_matrix(m, j, ik)
            enddo
            sciss_q(j, i, ik) = conjg(sciss_q(i, j, ik))
          enddo
        enddo
      enddo
      call fourier_q_to_R(sciss_q, sciss_R)
      do n = 1, num_wann
        sciss_R(n, n, rpt_origin) = sciss_R(n, n, rpt_origin) + 1.0_dp
      end do
      sciss_R = sciss_R*scissors_shift
      HH_R = HH_R + sciss_R
    endif

    if (timing_level > 1 .and. on_root) call io_stopwatch('get_oper: get_HH_R', 2)
    return

101 call io_error('Error in get_HH_R: problem opening file '// &
                  trim(seedname)//'_HH_R.dat')

  end subroutine get_HH_R

  !==================================================
  subroutine get_AA_R
    !==================================================
    !
    !! AA_a(R) = <0|r_a|R> is the Fourier transform
    !! of the Berrry connection AA_a(k) = i<u|del_a u>
    !! (a=x,y,z)
    !
    !==================================================

    use w90_constants, only: dp, cmplx_0, cmplx_i
    use w90_parameters, only: num_kpts, nntot, num_wann, wb, bk, timing_level, &
      num_bands, ndimwin, nnlist, have_disentangled, &
      transl_inv, nncell, effective_model
    use w90_postw90_common, only: nrpts
    use w90_io, only: stdout, io_file_unit, io_error, io_stopwatch, &
      seedname
    use w90_comms, only: on_root, comms_bcast

    complex(kind=dp), allocatable :: AA_q(:, :, :, :)
    complex(kind=dp), allocatable :: AA_q_diag(:, :)
    complex(kind=dp), allocatable :: S_o(:, :)
    complex(kind=dp), allocatable :: S(:, :)
    integer                       :: n, m, i, j, &
                                     ik, ik2, ik_prev, nn, inn, nnl, nnm, nnn, &
                                     idir, ncount, nn_count, mmn_in, &
                                     nb_tmp, nkp_tmp, nntot_tmp, file_unit, &
                                     ir, io, ivdum(3), ivdum_old(3)
    integer, allocatable          :: num_states(:)
    real(kind=dp)                 :: m_real, m_imag, rdum1_real, rdum1_imag, &
                                     rdum2_real, rdum2_imag, rdum3_real, rdum3_imag
    logical                       :: nn_found
    character(len=60)             :: header

    if (timing_level > 1 .and. on_root) call io_stopwatch('get_oper: get_AA_R', 1)

    if (.not. allocated(AA_R)) then
      allocate (AA_R(num_wann, num_wann, nrpts, 3))
    else
      if (timing_level > 1 .and. on_root) call io_stopwatch('get_oper: get_AA_R', 2)
      return
    end if

    ! Real-space position matrix elements read from file
    !
    if (effective_model) then
      if (.not. allocated(HH_R)) call io_error( &
        'Error in get_AA_R: Must read file'//trim(seedname)//'_HH_R.dat first')
      AA_R = cmplx_0
      if (on_root) then
        write (stdout, '(/a)') ' Reading position matrix elements from file ' &
          //trim(seedname)//'_AA_R.dat'
        file_unit = io_file_unit()
        open (file_unit, file=trim(seedname)//'_AA_R.dat', form='formatted', &
              status='old', err=103)
        read (file_unit, *) ! header
        ir = 1
        ivdum_old(:) = 0
        n = 1
        do
          read (file_unit, '(5I5,6F12.6)', iostat=io) &
            ivdum(1:3), j, i, rdum1_real, rdum1_imag, &
            rdum2_real, rdum2_imag, rdum3_real, rdum3_imag
          if (io < 0) exit
          if (i < 1 .or. i > num_wann .or. j < 1 .or. j > num_wann) then
            write (stdout, *) 'num_wann=', num_wann, '  i=', i, '  j=', j
            call io_error('Error in get_AA_R: orbital indices out of bounds')
          endif
          if (n > 1) then
            if (ivdum(1) /= ivdum_old(1) .or. ivdum(2) /= ivdum_old(2) .or. &
                ivdum(3) /= ivdum_old(3)) ir = ir + 1
          endif
          ivdum_old = ivdum
          AA_R(j, i, ir, 1) = AA_R(j, i, ir, 1) + cmplx(rdum1_real, rdum1_imag, kind=dp)
          AA_R(j, i, ir, 2) = AA_R(j, i, ir, 2) + cmplx(rdum2_real, rdum2_imag, kind=dp)
          AA_R(j, i, ir, 3) = AA_R(j, i, ir, 3) + cmplx(rdum3_real, rdum3_imag, kind=dp)
          n = n + 1
        enddo
        close (file_unit)
        ! AA_R may not contain the same number of R-vectors as HH_R
        ! (e.g., if a diagonal representation of the position matrix
        ! elements is used, but it cannot be larger
        if (ir > nrpts) then
          write (stdout, *) 'ir=', ir, '  nrpts=', nrpts
          call io_error('Error in get_AA_R: inconsistent nrpts values')
        endif
      endif
      call comms_bcast(AA_R(1, 1, 1, 1), num_wann*num_wann*nrpts*3)
      if (timing_level > 1 .and. on_root) call io_stopwatch('get_oper: get_AA_R', 2)
      return
    endif

    ! Real-space position matrix elements calculated by Fourier
    ! transforming overlap matrices defined on the ab-initio
    ! reciprocal mesh
    !
    ! Do everything on root, broadcast AA_R at the end (smaller than S_o)
    !
    if (on_root) then

      allocate (AA_q(num_wann, num_wann, num_kpts, 3))
      allocate (AA_q_diag(num_wann, 3))
      allocate (S_o(num_bands, num_bands))
      allocate (S(num_wann, num_wann))

      allocate (num_states(num_kpts))
      do ik = 1, num_kpts
        if (have_disentangled) then
          num_states(ik) = ndimwin(ik)
        else
          num_states(ik) = num_wann
        endif
      enddo

      mmn_in = io_file_unit()
      open (unit=mmn_in, file=trim(seedname)//'.mmn', &
            form='formatted', status='old', action='read', err=101)
      write (stdout, '(/a)', advance='no') &
        ' Reading overlaps from '//trim(seedname)//'.mmn in get_AA_R   : '
      ! Read the comment line (header)
      read (mmn_in, '(a)', err=102, end=102) header
      write (stdout, '(a)') trim(header)
      ! Read the number of bands, k-points and nearest neighbours
      read (mmn_in, *, err=102, end=102) nb_tmp, nkp_tmp, nntot_tmp
      ! Checks
      if (nb_tmp .ne. num_bands) &
        call io_error(trim(seedname)//'.mmn has wrong number of bands')
      if (nkp_tmp .ne. num_kpts) &
        call io_error(trim(seedname)//'.mmn has wrong number of k-points')
      if (nntot_tmp .ne. nntot) &
        call io_error &
        (trim(seedname)//'.mmn has wrong number of nearest neighbours')

      AA_q = cmplx_0
      ik_prev = 0

      ! Composite loop over k-points ik (outer loop) and neighbors ik2 (inner)
      do ncount = 1, num_kpts*nntot
        !
        !Read from .mmn file the original overlap matrix
        ! S_o=<u_ik|u_ik2> between ab initio eigenstates
        !
        read (mmn_in, *, err=102, end=102) ik, ik2, nnl, nnm, nnn
        do n = 1, num_bands
          do m = 1, num_bands
            read (mmn_in, *, err=102, end=102) m_real, m_imag
            S_o(m, n) = cmplx(m_real, m_imag, kind=dp)
          enddo
        enddo
        !debug
        !OK
        !if(ik.ne.ik_prev .and.ik_prev.ne.0) then
        !   if(nn_count.ne.nntot)&
        !        write(stdout,*) 'something wrong in get_AA_R!'
        !endif
        !enddebug
        if (ik .ne. ik_prev) nn_count = 0
        nn = 0
        nn_found = .false.
        do inn = 1, nntot
          if ((ik2 .eq. nnlist(ik, inn)) .and. &
              (nnl .eq. nncell(1, ik, inn)) .and. &
              (nnm .eq. nncell(2, ik, inn)) .and. &
              (nnn .eq. nncell(3, ik, inn))) then
            if (.not. nn_found) then
              nn_found = .true.
              nn = inn
            else
              call io_error('Error reading '//trim(seedname)//'.mmn.&
                   & More than one matching nearest neighbour found')
            endif
          endif
        end do
        if (nn .eq. 0) then
          write (stdout, '(/a,i8,2i5,i4,2x,3i3)') &
            ' Error reading '//trim(seedname)//'.mmn:', &
            ncount, ik, ik2, nn, nnl, nnm, nnn
          call io_error('Neighbour not found')
        end if
        nn_count = nn_count + 1 !Check: can also be place after nn=inn (?)

        ! Wannier-gauge overlap matrix S in the projected subspace
        !
        call get_gauge_overlap_matrix( &
          ik, num_states(ik), &
          nnlist(ik, nn), num_states(nnlist(ik, nn)), &
          S_o, S)

        ! Berry connection matrix
        ! Assuming all neighbors of a given point are read in sequence!
        !
        if (transl_inv .and. ik .ne. ik_prev) AA_q_diag(:, :) = cmplx_0
        do idir = 1, 3
          AA_q(:, :, ik, idir) = AA_q(:, :, ik, idir) &
                                 + cmplx_i*wb(nn)*bk(idir, nn, ik)*S(:, :)
          if (transl_inv) then
            !
            ! Rewrite band-diagonal elements a la Eq.(31) of MV97
            !
            do i = 1, num_wann
              AA_q_diag(i, idir) = AA_q_diag(i, idir) &
                                   - wb(nn)*bk(idir, nn, ik)*aimag(log(S(i, i)))
            enddo
          endif
        end do
        ! Assuming all neighbors of a given point are read in sequence!
        if (nn_count == nntot) then !looped over all neighbors
          do idir = 1, 3
            if (transl_inv) then
              do n = 1, num_wann
                AA_q(n, n, ik, idir) = AA_q_diag(n, idir)
              enddo
            endif
            !
            ! Since Eq.(44) WYSV06 does not preserve the Hermiticity of the
            ! Berry potential matrix, take Hermitean part (whether this
            ! makes a difference or not for e.g. the AHC, depends on which
            ! expression is used to evaluate the Berry curvature.
            ! See comments in berry_wanint.F90)
            !
            AA_q(:, :, ik, idir) = &
              0.5_dp*(AA_q(:, :, ik, idir) &
                      + conjg(transpose(AA_q(:, :, ik, idir))))
          enddo
        end if

        ik_prev = ik
      enddo !ncount

      close (mmn_in)

      call fourier_q_to_R(AA_q(:, :, :, 1), AA_R(:, :, :, 1))
      call fourier_q_to_R(AA_q(:, :, :, 2), AA_R(:, :, :, 2))
      call fourier_q_to_R(AA_q(:, :, :, 3), AA_R(:, :, :, 3))

    endif !on_root

    call comms_bcast(AA_R(1, 1, 1, 1), num_wann*num_wann*nrpts*3)

    if (timing_level > 1 .and. on_root) call io_stopwatch('get_oper: get_AA_R', 2)
    return

101 call io_error &
      ('Error: Problem opening input file '//trim(seedname)//'.mmn')
102 call io_error &
      ('Error: Problem reading input file '//trim(seedname)//'.mmn')
103 call io_error('Error in get_AA_R: problem opening file '// &
                  trim(seedname)//'_AA_R.dat')

  end subroutine get_AA_R

  !=====================================================
  subroutine get_BB_R
    !=====================================================
    !
    !! BB_a(R)=<0n|H(r-R)|Rm> is the Fourier transform of
    !! BB_a(k) = i<u|H|del_a u> (a=x,y,z)
    !
    !=====================================================

    use w90_constants, only: dp, cmplx_0, cmplx_i
    use w90_parameters, only: num_kpts, nntot, nnlist, num_wann, num_bands, &
      ndimwin, eigval, wb, bk, have_disentangled, &
      timing_level, nncell, scissors_shift
    use w90_postw90_common, only: nrpts, v_matrix
    use w90_io, only: stdout, io_file_unit, io_error, io_stopwatch, &
      seedname
    use w90_comms, only: on_root, comms_bcast

    integer          :: idir, n, m, nn, i, ii, j, jj, &
                        ik, ik2, inn, nnl, nnm, nnn, &
                        winmin_q, winmin_qb, ncount, &
                        nb_tmp, nkp_tmp, nntot_tmp, mmn_in

    complex(kind=dp), allocatable :: S_o(:, :)
    complex(kind=dp), allocatable :: BB_q(:, :, :, :)
    complex(kind=dp), allocatable :: H_q_qb(:, :)
    integer, allocatable          :: num_states(:)
    real(kind=dp)                 :: m_real, m_imag
    logical                       :: nn_found
    character(len=60)             :: header

    if (timing_level > 1 .and. on_root) call io_stopwatch('get_oper: get_BB_R', 1)
    if (.not. allocated(BB_R)) then
      allocate (BB_R(num_wann, num_wann, nrpts, 3))
    else
      if (timing_level > 1 .and. on_root) call io_stopwatch('get_oper: get_BB_R', 2)
      return
    end if

    if (on_root) then

      if (abs(scissors_shift) > 1.0e-7_dp) &
        call io_error('Error: scissors correction not yet implemented for BB_R')

      allocate (BB_q(num_wann, num_wann, num_kpts, 3))
      allocate (S_o(num_bands, num_bands))
      allocate (H_q_qb(num_wann, num_wann))

      allocate (num_states(num_kpts))
      do ik = 1, num_kpts
        if (have_disentangled) then
          num_states(ik) = ndimwin(ik)
        else
          num_states(ik) = num_wann
        endif
      enddo

      mmn_in = io_file_unit()
      open (unit=mmn_in, file=trim(seedname)//'.mmn', &
            form='formatted', status='old', action='read', err=103)
      write (stdout, '(/a)', advance='no') &
        ' Reading overlaps from '//trim(seedname)//'.mmn in get_BB_R   : '
      ! Read the comment line (header)
      read (mmn_in, '(a)', err=104, end=104) header
      write (stdout, '(a)') trim(header)
      ! Read the number of bands, k-points and nearest neighbours
      read (mmn_in, *, err=104, end=104) nb_tmp, nkp_tmp, nntot_tmp
      ! Checks
      if (nb_tmp .ne. num_bands) &
        call io_error(trim(seedname)//'.mmn has wrong number of bands')
      if (nkp_tmp .ne. num_kpts) &
        call io_error(trim(seedname)//'.mmn has wrong number of k-points')
      if (nntot_tmp .ne. nntot) &
        call io_error &
        (trim(seedname)//'.mmn has wrong number of nearest neighbours')

      BB_q = cmplx_0

      do ncount = 1, num_kpts*nntot
        !
        !Read from .mmn file the original overlap matrix
        ! S_o=<u_ik|u_ik2> between ab initio eigenstates
        !
        read (mmn_in, *, err=104, end=104) ik, ik2, nnl, nnm, nnn
        do n = 1, num_bands
          do m = 1, num_bands
            read (mmn_in, *, err=104, end=104) m_real, m_imag
            S_o(m, n) = cmplx(m_real, m_imag, kind=dp)
          enddo
        enddo
        nn = 0
        nn_found = .false.
        do inn = 1, nntot
          if ((ik2 .eq. nnlist(ik, inn)) .and. &
              (nnl .eq. nncell(1, ik, inn)) .and. &
              (nnm .eq. nncell(2, ik, inn)) .and. &
              (nnn .eq. nncell(3, ik, inn))) then
            if (.not. nn_found) then
              nn_found = .true.
              nn = inn
            else
              call io_error('Error reading '//trim(seedname)//'.mmn.&
                   & More than one matching nearest neighbour found')
            endif
          endif
        end do
        if (nn .eq. 0) then
          write (stdout, '(/a,i8,2i5,i4,2x,3i3)') &
            ' Error reading '//trim(seedname)//'.mmn:', &
            ncount, ik, ik2, nn, nnl, nnm, nnn
          call io_error('Neighbour not found')
        end if

        call get_win_min(ik, winmin_q)
        call get_win_min(nnlist(ik, nn), winmin_qb)
        call get_gauge_overlap_matrix( &
          ik, num_states(ik), &
          nnlist(ik, nn), num_states(nnlist(ik, nn)), &
          S_o, H=H_q_qb)
        do idir = 1, 3
          BB_q(:, :, ik, idir) = BB_q(:, :, ik, idir) &
                                 + cmplx_i*wb(nn)*bk(idir, nn, ik)*H_q_qb(:, :)
        enddo
      enddo !ncount

      close (mmn_in)

      call fourier_q_to_R(BB_q(:, :, :, 1), BB_R(:, :, :, 1))
      call fourier_q_to_R(BB_q(:, :, :, 2), BB_R(:, :, :, 2))
      call fourier_q_to_R(BB_q(:, :, :, 3), BB_R(:, :, :, 3))

    endif !on_root

    call comms_bcast(BB_R(1, 1, 1, 1), num_wann*num_wann*nrpts*3)

    if (timing_level > 1 .and. on_root) call io_stopwatch('get_oper: get_BB_R', 2)
    return

103 call io_error &
      ('Error: Problem opening input file '//trim(seedname)//'.mmn')
104 call io_error &
      ('Error: Problem reading input file '//trim(seedname)//'.mmn')

  end subroutine get_BB_R

  !=============================================================
  subroutine get_CC_R
    !=============================================================
    !
    !! CC_ab(R) = <0|r_a.H.(r-R)_b|R> is the Fourier transform of
    !! CC_ab(k) = <del_a u|H|del_b u> (a,b=x,y,z)
    !
    !=============================================================

    use w90_constants, only: dp, cmplx_0
    use w90_parameters, only: num_kpts, nntot, nnlist, num_wann, &
      num_bands, ndimwin, wb, bk, &
      have_disentangled, timing_level, &
      scissors_shift, uHu_formatted
    use w90_postw90_common, only: nrpts, v_matrix
    use w90_io, only: stdout, io_error, io_stopwatch, io_file_unit, &
      seedname
    use w90_comms, only: on_root, comms_bcast

    integer          :: i, j, ii, jj, m, n, a, b, nn1, nn2, ik, nb_tmp, nkp_tmp, &
                        nntot_tmp, uHu_in, qb1, qb2, winmin_qb1, winmin_qb2

    integer, allocatable          :: num_states(:)
    complex(kind=dp), allocatable :: CC_q(:, :, :, :, :)
    complex(kind=dp), allocatable :: Ho_qb1_q_qb2(:, :)
    complex(kind=dp), allocatable :: H_qb1_q_qb2(:, :)
    real(kind=dp)                 :: c_real, c_img
    character(len=60)             :: header

    if (timing_level > 1 .and. on_root) call io_stopwatch('get_oper: get_CC_R', 1)

    if (.not. allocated(CC_R)) then
      allocate (CC_R(num_wann, num_wann, nrpts, 3, 3))
    else
      if (timing_level > 1 .and. on_root) call io_stopwatch('get_oper: get_CC_R', 2)
      return
    end if

    if (on_root) then

      if (abs(scissors_shift) > 1.0e-7_dp) &
        call io_error('Error: scissors correction not yet implemented for CC_R')

      allocate (Ho_qb1_q_qb2(num_bands, num_bands))
      allocate (H_qb1_q_qb2(num_wann, num_wann))
      allocate (CC_q(num_wann, num_wann, num_kpts, 3, 3))

      allocate (num_states(num_kpts))
      do ik = 1, num_kpts
        if (have_disentangled) then
          num_states(ik) = ndimwin(ik)
        else
          num_states(ik) = num_wann
        endif
      enddo

      uHu_in = io_file_unit()
      if (uHu_formatted) then
        open (unit=uHu_in, file=trim(seedname)//".uHu", form='formatted', &
              status='old', action='read', err=105)
        write (stdout, '(/a)', advance='no') &
          ' Reading uHu overlaps from '//trim(seedname)//'.uHu in get_CC_R: '
        read (uHu_in, *, err=106, end=106) header
        write (stdout, '(a)') trim(header)
        read (uHu_in, *, err=106, end=106) nb_tmp, nkp_tmp, nntot_tmp
      else
        open (unit=uHu_in, file=trim(seedname)//".uHu", form='unformatted', &
              status='old', action='read', err=105)
        write (stdout, '(/a)', advance='no') &
          ' Reading uHu overlaps from '//trim(seedname)//'.uHu in get_CC_R: '
        read (uHu_in, err=106, end=106) header
        write (stdout, '(a)') trim(header)
        read (uHu_in, err=106, end=106) nb_tmp, nkp_tmp, nntot_tmp
      endif
      if (nb_tmp .ne. num_bands) &
        call io_error &
        (trim(seedname)//'.uHu has not the right number of bands')
      if (nkp_tmp .ne. num_kpts) &
        call io_error &
        (trim(seedname)//'.uHu has not the right number of k-points')
      if (nntot_tmp .ne. nntot) &
        call io_error &
        (trim(seedname)//'.uHu has not the right number of nearest neighbours')

      CC_q = cmplx_0
      do ik = 1, num_kpts
        do nn2 = 1, nntot
          qb2 = nnlist(ik, nn2)
          call get_win_min(qb2, winmin_qb2)
          do nn1 = 1, nntot
            qb1 = nnlist(ik, nn1)
            call get_win_min(qb1, winmin_qb1)
            !
            ! Read from .uHu file the matrices <u_{q+b1}|H_q|u_{q+b2}>
            ! between the original ab initio eigenstates
            !
            if (uHu_formatted) then
              do m = 1, num_bands
                do n = 1, num_bands
                  read (uHu_in, *, err=106, end=106) c_real, c_img
                  Ho_qb1_q_qb2(n, m) = cmplx(c_real, c_img, dp)
                end do
              end do
            else
              read (uHu_in, err=106, end=106) &
                ((Ho_qb1_q_qb2(n, m), n=1, num_bands), m=1, num_bands)
            endif
            ! pw2wannier90 is coded a bit strangely, so here we take the transpose
            Ho_qb1_q_qb2 = transpose(Ho_qb1_q_qb2)
            ! old code here
            !do m=1,num_bands
            !   do n=1,num_bands
            !      read(uHu_in,err=106,end=106) Ho_qb1_q_qb2(m,n)
            !   end do
            !end do
            !
            ! Transform to projected subspace, Wannier gauge
            !
            call get_gauge_overlap_matrix( &
              qb1, num_states(qb1), &
              qb2, num_states(qb2), &
              Ho_qb1_q_qb2, H_qb1_q_qb2)
            do b = 1, 3
              do a = 1, b
                CC_q(:, :, ik, a, b) = CC_q(:, :, ik, a, b) + wb(nn1)*bk(a, nn1, ik) &
                                       *wb(nn2)*bk(b, nn2, ik)*H_qb1_q_qb2(:, :)
              enddo
            enddo
          enddo !nn1
        enddo !nn2
        do b = 1, 3
          do a = 1, b
            CC_q(:, :, ik, b, a) = conjg(transpose(CC_q(:, :, ik, a, b)))
          enddo
        enddo
      enddo !ik

      close (uHu_in)

      do b = 1, 3
        do a = 1, 3
          call fourier_q_to_R(CC_q(:, :, :, a, b), CC_R(:, :, :, a, b))
        enddo
      enddo

    endif !on_root

    call comms_bcast(CC_R(1, 1, 1, 1, 1), num_wann*num_wann*nrpts*3*3)

    if (timing_level > 1 .and. on_root) call io_stopwatch('get_oper: get_CC_R', 2)
    return

105 call io_error &
      ('Error: Problem opening input file '//trim(seedname)//'.uHu')
106 call io_error &
      ('Error: Problem reading input file '//trim(seedname)//'.uHu')

  end subroutine get_CC_R

  !===========================================================
  subroutine get_FF_R
    !===========================================================
    !
    !! FF_ab(R) = <0|r_a.(r-R)_b|R> is the Fourier transform of
    !! FF_ab(k) = <del_a u|del_b u> (a=alpha,b=beta)
    !
    !===========================================================

    use w90_constants, only: dp, cmplx_0
    use w90_parameters, only: num_kpts, nntot, nnlist, num_wann, &
      num_bands, ndimwin, wb, bk, &
      have_disentangled, timing_level
    use w90_postw90_common, only: nrpts, v_matrix
    use w90_io, only: stdout, io_error, io_stopwatch, io_file_unit, &
      seedname
    use w90_comms, only: on_root, comms_bcast

    integer          :: i, j, ii, jj, m, n, a, b, nn1, nn2, ik, nb_tmp, nkp_tmp, nntot_tmp, &
                        uIu_in, qb1, qb2, winmin_qb1, winmin_qb2

    integer, allocatable          :: num_states(:)
    complex(kind=dp), allocatable :: FF_q(:, :, :, :, :)
    complex(kind=dp), allocatable :: Lo_qb1_q_qb2(:, :)
    complex(kind=dp), allocatable :: L_qb1_q_qb2(:, :)
    character(len=60)             :: header

    if (timing_level > 1 .and. on_root) call io_stopwatch('get_oper: get_FF_R', 1)

    if (.not. allocated(FF_R)) then
      allocate (FF_R(num_wann, num_wann, nrpts, 3, 3))
    else
      if (timing_level > 1 .and. on_root) call io_stopwatch('get_oper: get_FF_R', 2)
      return
    end if

    if (on_root) then

      allocate (Lo_qb1_q_qb2(num_bands, num_bands))
      allocate (L_qb1_q_qb2(num_wann, num_wann))
      allocate (FF_q(num_wann, num_wann, num_kpts, 3, 3))

      allocate (num_states(num_kpts))
      do ik = 1, num_kpts
        if (have_disentangled) then
          num_states(ik) = ndimwin(ik)
        else
          num_states(ik) = num_wann
        endif
      enddo

      uIu_in = io_file_unit()
      open (unit=uIu_in, file=TRIM(seedname)//".uIu", form='unformatted', &
            status='old', action='read', err=107)
      write (stdout, '(/a)', advance='no') &
        ' Reading uIu overlaps from '//trim(seedname)//'.uIu in get_FF_R: '
      read (uIu_in, err=108, end=108) header
      write (stdout, '(a)') trim(header)
      read (uIu_in, err=108, end=108) nb_tmp, nkp_tmp, nntot_tmp
      if (nb_tmp .ne. num_bands) &
        call io_error &
        (trim(seedname)//'.uIu has not the right number of bands')
      if (nkp_tmp .ne. num_kpts) &
        call io_error &
        (trim(seedname)//'.uIu has not the right number of k-points')
      if (nntot_tmp .ne. nntot) &
        call io_error &
        (trim(seedname)//'.uIu has not the right number of nearest neighbours')

      FF_q = cmplx_0
      do ik = 1, num_kpts
        do nn2 = 1, nntot
          qb2 = nnlist(ik, nn2)
          call get_win_min(qb2, winmin_qb2)
          do nn1 = 1, nntot
            qb1 = nnlist(ik, nn1)
            call get_win_min(qb1, winmin_qb1)
            !
            ! Read from .uIu file the matrices <u_{q+b1}|u_{q+b2}>
            ! between the original ab initio eigenstates
            !
            !               do m=1,num_bands
            !                  do n=1,num_bands
            !                     read(uIu_in,err=108,end=108) Lo_qb1_q_qb2(m,n)
            !                  end do
            !               end do
            read (uIu_in, err=108, end=108) &
              ((Lo_qb1_q_qb2(n, m), n=1, num_bands), m=1, num_bands)
            !
            ! **************************************************************
            ! 2013-08-09: Do we need to take a transpose here?! SEE get_CC_R
            Lo_qb1_q_qb2 = transpose(Lo_qb1_q_qb2) ! added 2013-08-09 (?)
            ! **************************************************************
            !
            ! Transform to projected subspace, Wannier gauge
            !
            L_qb1_q_qb2(:, :) = cmplx_0
            do m = 1, num_wann
              do n = 1, num_wann
                do i = 1, num_states(qb1)
                  ii = winmin_qb1 + i - 1
                  do j = 1, num_states(qb2)
                    jj = winmin_qb2 + j - 1
                    L_qb1_q_qb2(n, m) = L_qb1_q_qb2(n, m) &
                                        + conjg(v_matrix(i, n, qb1)) &
                                        *Lo_qb1_q_qb2(ii, jj) &
                                        *v_matrix(j, m, qb2)
                  enddo
                enddo
              enddo
            enddo
            do b = 1, 3
              do a = 1, b
                FF_q(:, :, ik, a, b) = FF_q(:, :, ik, a, b) + wb(nn1)*bk(a, nn1, ik) &
                                       *wb(nn2)*bk(b, nn2, ik)*L_qb1_q_qb2(:, :)
              enddo
            enddo
          enddo !nn1
        enddo !nn2
        do b = 1, 3
          do a = 1, b
            FF_q(:, :, ik, b, a) = conjg(transpose(FF_q(:, :, ik, a, b)))
          enddo
        enddo
      enddo !ik

      close (uIu_in)

      do b = 1, 3
        do a = 1, 3
          call fourier_q_to_R(FF_q(:, :, :, a, b), FF_R(:, :, :, a, b))
        enddo
      enddo

    endif !on_root

    call comms_bcast(FF_R(1, 1, 1, 1, 1), num_wann*num_wann*nrpts*3*3)

    if (timing_level > 1 .and. on_root) call io_stopwatch('get_oper: get_FF_R', 2)
    return

107 call io_error &
      ('Error: Problem opening input file '//trim(seedname)//'.uIu')
108 call io_error &
      ('Error: Problem reading input file '//trim(seedname)//'.uIu')

  end subroutine get_FF_R

  !================================================================
  subroutine get_SS_R
    !================================================================
    !
    !! Wannier representation of the Pauli matrices: <0n|sigma_a|Rm>
    !! (a=x,y,z)
    !
    !================================================================

    use w90_constants, only: dp, pi, cmplx_0
    use w90_parameters, only: num_wann, ndimwin, num_kpts, num_bands, &
      timing_level, have_disentangled, spn_formatted
    use w90_postw90_common, only: nrpts, v_matrix
    use w90_io, only: io_error, io_stopwatch, stdout, seedname, &
      io_file_unit
    use w90_comms, only: on_root, comms_bcast

    implicit none

    complex(kind=dp), allocatable :: spn_o(:, :, :, :), SS_q(:, :, :, :), spn_temp(:, :)
    real(kind=dp)                 :: s_real, s_img
    integer, allocatable          :: num_states(:)
    integer                       :: i, j, ii, jj, m, n, spn_in, ik, is, &
                                     winmin, nb_tmp, nkp_tmp, ierr, s, counter
    character(len=60)             :: header

    if (timing_level > 1 .and. on_root) call io_stopwatch('get_oper: get_SS_R', 1)

    if (.not. allocated(SS_R)) then
      allocate (SS_R(num_wann, num_wann, nrpts, 3))
    else
      return ! been here before
    end if

    if (on_root) then

      allocate (spn_o(num_bands, num_bands, num_kpts, 3))
      allocate (SS_q(num_wann, num_wann, num_kpts, 3))

      allocate (num_states(num_kpts))
      do ik = 1, num_kpts
        if (have_disentangled) then
          num_states(ik) = ndimwin(ik)
        else
          num_states(ik) = num_wann
        endif
      enddo

      ! Read from .spn file the original spin matrices <psi_nk|sigma_i|psi_mk>
      ! (sigma_i = Pauli matrix) between ab initio eigenstates
      !
      spn_in = io_file_unit()
      if (spn_formatted) then
        open (unit=spn_in, file=trim(seedname)//'.spn', form='formatted', &
              status='old', err=109)
        write (stdout, '(/a)', advance='no') &
          ' Reading spin matrices from '//trim(seedname)//'.spn in get_SS_R : '
        read (spn_in, *, err=110, end=110) header
        write (stdout, '(a)') trim(header)
        read (spn_in, *, err=110, end=110) nb_tmp, nkp_tmp
      else
        open (unit=spn_in, file=trim(seedname)//'.spn', form='unformatted', &
              status='old', err=109)
        write (stdout, '(/a)', advance='no') &
          ' Reading spin matrices from '//trim(seedname)//'.spn in get_SS_R : '
        read (spn_in, err=110, end=110) header
        write (stdout, '(a)') trim(header)
        read (spn_in, err=110, end=110) nb_tmp, nkp_tmp
      endif
      if (nb_tmp .ne. num_bands) &
        call io_error(trim(seedname)//'.spn has wrong number of bands')
      if (nkp_tmp .ne. num_kpts) &
        call io_error(trim(seedname)//'.spn has wrong number of k-points')
      if (spn_formatted) then
        do ik = 1, num_kpts
          do m = 1, num_bands
            do n = 1, m
              read (spn_in, *, err=110, end=110) s_real, s_img
              spn_o(n, m, ik, 1) = cmplx(s_real, s_img, dp)
              read (spn_in, *, err=110, end=110) s_real, s_img
              spn_o(n, m, ik, 2) = cmplx(s_real, s_img, dp)
              read (spn_in, *, err=110, end=110) s_real, s_img
              spn_o(n, m, ik, 3) = cmplx(s_real, s_img, dp)
              ! Read upper-triangular part, now build the rest
              spn_o(m, n, ik, 1) = conjg(spn_o(n, m, ik, 1))
              spn_o(m, n, ik, 2) = conjg(spn_o(n, m, ik, 2))
              spn_o(m, n, ik, 3) = conjg(spn_o(n, m, ik, 3))
            end do
          end do
        enddo
      else
        allocate (spn_temp(3, (num_bands*(num_bands + 1))/2), stat=ierr)
        if (ierr /= 0) call io_error('Error in allocating spm_temp in get_SS_R')
        do ik = 1, num_kpts
          read (spn_in) ((spn_temp(s, m), s=1, 3), m=1, (num_bands*(num_bands + 1))/2)
          counter = 0
          do m = 1, num_bands
            do n = 1, m
              counter = counter + 1
              spn_o(n, m, ik, 1) = spn_temp(1, counter)
              spn_o(m, n, ik, 1) = conjg(spn_temp(1, counter))
              spn_o(n, m, ik, 2) = spn_temp(2, counter)
              spn_o(m, n, ik, 2) = conjg(spn_temp(2, counter))
              spn_o(n, m, ik, 3) = spn_temp(3, counter)
              spn_o(m, n, ik, 3) = conjg(spn_temp(3, counter))
            end do
          end do
        end do
        deallocate (spn_temp, stat=ierr)
        if (ierr /= 0) call io_error('Error in deallocating spm_temp in get_SS_R')
      endif

      close (spn_in)

      ! Transform to projected subspace, Wannier gauge
      !
      SS_q(:, :, :, :) = cmplx_0
      do ik = 1, num_kpts
        do is = 1, 3
          call get_gauge_overlap_matrix( &
            ik, num_states(ik), &
            ik, num_states(ik), &
            spn_o(:, :, ik, is), SS_q(:, :, ik, is))
        enddo !is
      enddo !ik

      call fourier_q_to_R(SS_q(:, :, :, 1), SS_R(:, :, :, 1))
      call fourier_q_to_R(SS_q(:, :, :, 2), SS_R(:, :, :, 2))
      call fourier_q_to_R(SS_q(:, :, :, 3), SS_R(:, :, :, 3))

    endif !on_root

    call comms_bcast(SS_R(1, 1, 1, 1), num_wann*num_wann*nrpts*3)

    if (timing_level > 1 .and. on_root) call io_stopwatch('get_oper: get_SS_R', 2)
    return

109 call io_error &
      ('Error: Problem opening input file '//trim(seedname)//'.spn')
110 call io_error &
      ('Error: Problem reading input file '//trim(seedname)//'.spn')

  end subroutine get_SS_R

  !==================================================
  subroutine get_SHC_R
    !==================================================
    !
    !! Compute several matrices for spin Hall conductivity
    !! SR_R  = <0n|sigma_{x,y,z}.(r-R)_alpha|Rm>
    !! SHR_R = <0n|sigma_{x,y,z}.H.(r-R)_alpha|Rm>
    !! SH_R  = <0n|sigma_{x,y,z}.H|Rm>
    !
    !==================================================

    use w90_constants, only: dp, cmplx_0, cmplx_i
    use w90_parameters, only: num_kpts, nntot, num_wann, wb, bk, timing_level, &
      num_bands, ndimwin, nnlist, have_disentangled, &
      transl_inv, nncell, spn_formatted, eigval, &
      scissors_shift, num_valence_bands, &
      shc_bandshift, shc_bandshift_firstband, shc_bandshift_energyshift
    use w90_postw90_common, only: nrpts
    use w90_io, only: stdout, io_file_unit, io_error, io_stopwatch, &
      seedname
    use w90_comms, only: on_root, comms_bcast

    complex(kind=dp), allocatable :: SR_q(:, :, :, :, :)
    complex(kind=dp), allocatable :: SHR_q(:, :, :, :, :)
    complex(kind=dp), allocatable :: SH_q(:, :, :, :)

    complex(kind=dp), allocatable :: S_o(:, :)
    complex(kind=dp), allocatable :: spn_o(:, :, :, :), spn_temp(:, :)
    complex(kind=dp), allocatable :: H_o(:, :, :)
    complex(kind=dp), allocatable :: SH_o(:, :, :, :)
    complex(kind=dp)              :: SM_o(num_bands, num_bands, 3)
    complex(kind=dp)              :: SHM_o(num_bands, num_bands, 3)

    complex(kind=dp)              :: SS_q(num_wann, num_wann, 3)
    complex(kind=dp)              :: SM_q(num_wann, num_wann, 3)
    complex(kind=dp)              :: SHM_q(num_wann, num_wann, 3)

    real(kind=dp)                 :: s_real, s_img
    integer                       :: spn_in, counter, ierr, s, is

    integer                       :: n, m, i, j, &
                                     ik, ik2, ik_prev, nn, inn, nnl, nnm, nnn, &
                                     idir, ncount, nn_count, mmn_in, &
                                     nb_tmp, nkp_tmp, nntot_tmp, file_unit, &
                                     ir, io, ivdum(3), ivdum_old(3)
    integer, allocatable          :: num_states(:)
    real(kind=dp)                 :: m_real, m_imag, rdum1_real, rdum1_imag, &
                                     rdum2_real, rdum2_imag, rdum3_real, rdum3_imag
    logical                       :: nn_found
    character(len=60)             :: header

    if (timing_level > 1 .and. on_root) call io_stopwatch('get_oper: get_SHC_R', 1)

    if (.not. allocated(SR_R)) then
      allocate (SR_R(num_wann, num_wann, nrpts, 3, 3))
    else
      if (timing_level > 1 .and. on_root) call io_stopwatch('get_oper: get_SHC_R', 2)
      return
    end if
    if (.not. allocated(SHR_R)) then
      allocate (SHR_R(num_wann, num_wann, nrpts, 3, 3))
    else
      if (timing_level > 1 .and. on_root) call io_stopwatch('get_oper: get_SHC_R', 2)
      return
    end if
    if (.not. allocated(SH_R)) then
      allocate (SH_R(num_wann, num_wann, nrpts, 3))
    else
      if (timing_level > 1 .and. on_root) call io_stopwatch('get_oper: get_SHC_R', 2)
      return
    end if

    ! start copying from get_SS_R, Junfeng Qiao
    ! read spn file
    if (on_root) then

      allocate (spn_o(num_bands, num_bands, num_kpts, 3))

      allocate (num_states(num_kpts))
      do ik = 1, num_kpts
        if (have_disentangled) then
          num_states(ik) = ndimwin(ik)
        else
          num_states(ik) = num_wann
        endif
      enddo

      ! Read from .spn file the original spin matrices <psi_nk|sigma_i|psi_mk>
      ! (sigma_i = Pauli matrix) between ab initio eigenstates
      !
      spn_in = io_file_unit()
      if (spn_formatted) then
        open (unit=spn_in, file=trim(seedname)//'.spn', form='formatted', &
              status='old', err=109)
        write (stdout, '(/a)', advance='no') &
          ' Reading spin matrices from '//trim(seedname)//'.spn in get_SHC_R : '
        read (spn_in, *, err=110, end=110) header
        write (stdout, '(a)') trim(header)
        read (spn_in, *, err=110, end=110) nb_tmp, nkp_tmp
      else
        open (unit=spn_in, file=trim(seedname)//'.spn', form='unformatted', &
              status='old', err=109)
        write (stdout, '(/a)', advance='no') &
          ' Reading spin matrices from '//trim(seedname)//'.spn in get_SHC_R : '
        read (spn_in, err=110, end=110) header
        write (stdout, '(a)') trim(header)
        read (spn_in, err=110, end=110) nb_tmp, nkp_tmp
      endif
      if (nb_tmp .ne. num_bands) &
        call io_error(trim(seedname)//'.spn has wrong number of bands')
      if (nkp_tmp .ne. num_kpts) &
        call io_error(trim(seedname)//'.spn has wrong number of k-points')
      if (spn_formatted) then
        do ik = 1, num_kpts
          do m = 1, num_bands
            do n = 1, m
              read (spn_in, *, err=110, end=110) s_real, s_img
              spn_o(n, m, ik, 1) = cmplx(s_real, s_img, dp)
              read (spn_in, *, err=110, end=110) s_real, s_img
              spn_o(n, m, ik, 2) = cmplx(s_real, s_img, dp)
              read (spn_in, *, err=110, end=110) s_real, s_img
              spn_o(n, m, ik, 3) = cmplx(s_real, s_img, dp)
              ! Read upper-triangular part, now build the rest
              spn_o(m, n, ik, 1) = conjg(spn_o(n, m, ik, 1))
              spn_o(m, n, ik, 2) = conjg(spn_o(n, m, ik, 2))
              spn_o(m, n, ik, 3) = conjg(spn_o(n, m, ik, 3))
            end do
          end do
        enddo
      else
        allocate (spn_temp(3, (num_bands*(num_bands + 1))/2), stat=ierr)
        if (ierr /= 0) call io_error('Error in allocating spm_temp in get_SHC_R')
        do ik = 1, num_kpts
          read (spn_in) ((spn_temp(s, m), s=1, 3), m=1, (num_bands*(num_bands + 1))/2)
          counter = 0
          do m = 1, num_bands
            do n = 1, m
              counter = counter + 1
              spn_o(n, m, ik, 1) = spn_temp(1, counter)
              spn_o(m, n, ik, 1) = conjg(spn_temp(1, counter))
              spn_o(n, m, ik, 2) = spn_temp(2, counter)
              spn_o(m, n, ik, 2) = conjg(spn_temp(2, counter))
              spn_o(n, m, ik, 3) = spn_temp(3, counter)
              spn_o(m, n, ik, 3) = conjg(spn_temp(3, counter))
            end do
          end do
        end do
        deallocate (spn_temp, stat=ierr)
        if (ierr /= 0) call io_error('Error in deallocating spm_temp in get_SHC_R')
      endif

      close (spn_in)

    endif !on_root
    ! end copying from get_SS_R, Junfeng Qiao

    ! start copying from get_HH_R, Junfeng Qiao
    ! Note this is different from get_HH_R, at here we need the
    ! original Hamiltonian to construct SHR_R, SH_R.
    if (on_root) then
      allocate (H_o(num_bands, num_bands, num_kpts))
      H_o = cmplx_0
      do ik = 1, num_kpts
        do m = 1, num_bands
          H_o(m, m, ik) = eigval(m, ik)
        enddo
        ! scissors shift applied to the original Hamiltonian
        if (num_valence_bands > 0 .and. abs(scissors_shift) > 1.0e-7_dp) then
          do m = num_valence_bands + 1, num_bands
            H_o(m, m, ik) = H_o(m, m, ik) + scissors_shift
          end do
        else if (shc_bandshift) then
          do m = shc_bandshift_firstband, num_bands
            H_o(m, m, ik) = H_o(m, m, ik) + shc_bandshift_energyshift
          end do
        end if
      enddo
    endif !on_root
    ! end copying from get_HH_R, Junfeng Qiao

    ! start copying from get_AA_R, Junfeng Qiao
    ! read mmn file
    !
    if (on_root) then

      allocate (SR_q(num_wann, num_wann, num_kpts, 3, 3))
      allocate (SHR_q(num_wann, num_wann, num_kpts, 3, 3))
      allocate (SH_q(num_wann, num_wann, num_kpts, 3))
      allocate (S_o(num_bands, num_bands))

      mmn_in = io_file_unit()
      open (unit=mmn_in, file=trim(seedname)//'.mmn', &
            form='formatted', status='old', action='read', err=101)
      write (stdout, '(/a)', advance='no') &
        ' Reading overlaps from '//trim(seedname)//'.mmn in get_SHC_R   : '
      ! Read the comment line (header)
      read (mmn_in, '(a)', err=102, end=102) header
      write (stdout, '(a)') trim(header)
      ! Read the number of bands, k-points and nearest neighbours
      read (mmn_in, *, err=102, end=102) nb_tmp, nkp_tmp, nntot_tmp
      ! Checks
      if (nb_tmp .ne. num_bands) &
        call io_error(trim(seedname)//'.mmn has wrong number of bands')
      if (nkp_tmp .ne. num_kpts) &
        call io_error(trim(seedname)//'.mmn has wrong number of k-points')
      if (nntot_tmp .ne. nntot) &
        call io_error &
        (trim(seedname)//'.mmn has wrong number of nearest neighbours')

      SR_q = cmplx_0
      SHR_q = cmplx_0
      SH_q = cmplx_0
      ik_prev = 0

      ! QZYZ18 Eq.(48)
      allocate (SH_o(num_bands, num_bands, num_kpts, 3))
      SH_o = cmplx_0
      do ik = 1, num_kpts
        do is = 1, 3
          SH_o(:, :, ik, is) = matmul(spn_o(:, :, ik, is), H_o(:, :, ik))
          call get_gauge_overlap_matrix( &
            ik, num_states(ik), &
            ik, num_states(ik), &
            SH_o(:, :, ik, is), SH_q(:, :, ik, is))
        end do
      end do

      ! Composite loop over k-points ik (outer loop) and neighbors ik2 (inner)
      do ncount = 1, num_kpts*nntot
        !
        !Read from .mmn file the original overlap matrix
        ! S_o=<u_ik|u_ik2> between ab initio eigenstates
        !
        read (mmn_in, *, err=102, end=102) ik, ik2, nnl, nnm, nnn
        do n = 1, num_bands
          do m = 1, num_bands
            read (mmn_in, *, err=102, end=102) m_real, m_imag
            S_o(m, n) = cmplx(m_real, m_imag, kind=dp)
          enddo
        enddo
        !debug
        !OK
        !if(ik.ne.ik_prev .and.ik_prev.ne.0) then
        !   if(nn_count.ne.nntot)&
        !        write(stdout,*) 'something wrong in get_AA_R!'
        !endif
        !enddebug
        if (ik .ne. ik_prev) nn_count = 0
        nn = 0
        nn_found = .false.
        do inn = 1, nntot
          if ((ik2 .eq. nnlist(ik, inn)) .and. &
              (nnl .eq. nncell(1, ik, inn)) .and. &
              (nnm .eq. nncell(2, ik, inn)) .and. &
              (nnn .eq. nncell(3, ik, inn))) then
            if (.not. nn_found) then
              nn_found = .true.
              nn = inn
            else
              call io_error('Error reading '//trim(seedname)//'.mmn.&
                   & More than one matching nearest neighbour found')
            endif
          endif
        end do
        if (nn .eq. 0) then
          write (stdout, '(/a,i8,2i5,i4,2x,3i3)') &
            ' Error reading '//trim(seedname)//'.mmn:', &
            ncount, ik, ik2, nn, nnl, nnm, nnn
          call io_error('Neighbour not found')
        end if
        nn_count = nn_count + 1 !Check: can also be place after nn=inn (?)

        SM_o = cmplx_0
        SHM_o = cmplx_0
        SS_q = cmplx_0
        SM_q = cmplx_0
        SHM_q = cmplx_0
        do is = 1, 3
          ! QZYZ18 Eq.(50)
          SM_o(:, :, is) = matmul(spn_o(:, :, ik, is), S_o(:, :))
          ! QZYZ18 Eq.(51)
          SHM_o(:, :, is) = matmul(SH_o(:, :, ik, is), S_o(:, :))

          ! Transform to projected subspace, Wannier gauge
          !
          ! QZYZ18 Eq.(50)
          call get_gauge_overlap_matrix( &
            ik, num_states(ik), &
            ik, num_states(ik), &
            spn_o(:, :, ik, is), SS_q(:, :, is))
          ! QZYZ18 Eq.(50)
          call get_gauge_overlap_matrix( &
            ik, num_states(ik), &
            nnlist(ik, nn), num_states(nnlist(ik, nn)), &
            SM_o(:, :, is), SM_q(:, :, is))
          ! QZYZ18 Eq.(51)
          call get_gauge_overlap_matrix( &
            ik, num_states(ik), &
            nnlist(ik, nn), num_states(nnlist(ik, nn)), &
            SHM_o(:, :, is), SHM_q(:, :, is))

          ! Assuming all neighbors of a given point are read in sequence!
          !
          do idir = 1, 3
            ! QZYZ18 Eq.(50)
            SR_q(:, :, ik, is, idir) = SR_q(:, :, ik, is, idir) &
                                       + wb(nn)*bk(idir, nn, ik)*(SM_q(:, :, is) - SS_q(:, :, is))
            ! QZYZ18 Eq.(51)
            SHR_q(:, :, ik, is, idir) = SHR_q(:, :, ik, is, idir) &
                                        + wb(nn)*bk(idir, nn, ik)*(SHM_q(:, :, is) - SH_q(:, :, ik, is))
          end do
        end do

        ik_prev = ik
      enddo !ncount

      close (mmn_in)

      do is = 1, 3
        ! QZYZ18 Eq.(46)
        call fourier_q_to_R(SH_q(:, :, :, is), SH_R(:, :, :, is))
        do idir = 1, 3
          ! QZYZ18 Eq.(44)
          call fourier_q_to_R(SR_q(:, :, :, is, idir), SR_R(:, :, :, is, idir))
          ! QZYZ18 Eq.(45)
          call fourier_q_to_R(SHR_q(:, :, :, is, idir), SHR_R(:, :, :, is, idir))
        end do
      end do
      SR_R = cmplx_i*SR_R
      SHR_R = cmplx_i*SHR_R

    endif !on_root

    call comms_bcast(SH_R(1, 1, 1, 1), num_wann*num_wann*nrpts*3)
    call comms_bcast(SR_R(1, 1, 1, 1, 1), num_wann*num_wann*nrpts*3*3)
    call comms_bcast(SHR_R(1, 1, 1, 1, 1), num_wann*num_wann*nrpts*3*3)

    ! end copying from get_AA_R, Junfeng Qiao

    if (timing_level > 1 .and. on_root) call io_stopwatch('get_oper: get_SHC_R', 2)
    return

101 call io_error &
      ('Error: Problem opening input file '//trim(seedname)//'.mmn')
102 call io_error &
      ('Error: Problem reading input file '//trim(seedname)//'.mmn')
109 call io_error &
      ('Error: Problem opening input file '//trim(seedname)//'.spn')
110 call io_error &
      ('Error: Problem reading input file '//trim(seedname)//'.spn')

  end subroutine get_SHC_R

  !=========================================================!
  !                   PRIVATE PROCEDURES                    !
  !=========================================================!

  !=========================================================!
  subroutine fourier_q_to_R(op_q, op_R)
    !==========================================================
    !
    !! Fourier transforms Wannier-gauge representation
    !! of a given operator O from q-space to R-space:
    !!
    !! O_ij(q) --> O_ij(R) = (1/N_kpts) sum_q e^{-iqR} O_ij(q)
    !
    !==========================================================

    use w90_constants, only: dp, cmplx_0, cmplx_i, twopi
    use w90_parameters, only: num_kpts, kpt_latt
    use w90_postw90_common, only: nrpts, irvec

    implicit none

    ! Arguments
    !
    complex(kind=dp), dimension(:, :, :), intent(in)  :: op_q
    !! Operator in q-space
    complex(kind=dp), dimension(:, :, :), intent(out) :: op_R
    !! Operator in R-space

    integer          :: ir, ik
    real(kind=dp)    :: rdotq
    complex(kind=dp) :: phase_fac

    op_R = cmplx_0
    do ir = 1, nrpts
      do ik = 1, num_kpts
        rdotq = twopi*dot_product(kpt_latt(:, ik), irvec(:, ir))
        phase_fac = exp(-cmplx_i*rdotq)
        op_R(:, :, ir) = op_R(:, :, ir) + phase_fac*op_q(:, :, ik)
      enddo
    enddo
    op_R = op_R/real(num_kpts, dp)

  end subroutine fourier_q_to_R

  !===============================================
  subroutine get_win_min(ik, win_min)
    !===============================================
    !
    !! Find the lower bound (band index) of the
    !! outer energy window at the specified k-point
    !
    !===============================================

    use w90_constants, only: dp
    use w90_parameters, only: num_bands, lwindow, have_disentangled

    implicit none

    ! Arguments
    !
    integer, intent(in)  :: ik
    !! Index of the required k-point
    integer, intent(out) :: win_min
    !! Index of the lower band of the outer energy window

    integer :: j

    if (.not. have_disentangled) then
      win_min = 1
      return
    endif

    do j = 1, num_bands
      if (lwindow(j, ik)) then
        win_min = j
        exit
      end if
    end do

  end subroutine get_win_min

  !==========================================================
  subroutine get_gauge_overlap_matrix(ik_a, ns_a, ik_b, ns_b, S_o, S, H)
    !==========================================================
    !
    ! Wannier-gauge overlap matrix S in the projected subspace
    !
    ! TODO: Update this documentation of this routine and
    ! possibliy give it a better name. The routine has been
    ! generalized multiple times.
    !
    !==========================================================

    use w90_constants, only: dp, cmplx_0
    use w90_postw90_common, only: v_matrix
    use w90_parameters, only: num_wann, eigval
    use w90_utility, only: utility_zgemmm

    integer, intent(in) :: ik_a, ns_a, ik_b, ns_b

    complex(kind=dp), dimension(:, :), intent(in)            :: S_o
    complex(kind=dp), dimension(:, :), intent(out), optional :: S, H

    integer :: wm_a, wm_b

    call get_win_min(ik_a, wm_a)
    call get_win_min(ik_b, wm_b)

    call utility_zgemmm(v_matrix(1:ns_a, 1:num_wann, ik_a), 'C', &
                        S_o(wm_a:wm_a + ns_a - 1, wm_b:wm_b + ns_b - 1), 'N', &
                        v_matrix(1:ns_b, 1:num_wann, ik_b), 'N', &
                        S, eigval(wm_a:wm_a + ns_a - 1, ik_a), H)
  end subroutine get_gauge_overlap_matrix

end module w90_get_oper