Estimate how much memory we will allocate
subroutine param_memory_estimate
!===========================================!
! !
!! Estimate how much memory we will allocate
! !
!===========================================!
use w90_comms, only: on_root
implicit none
real(kind=dp), parameter :: size_log = 1.0_dp
real(kind=dp), parameter :: size_int = 4.0_dp
real(kind=dp), parameter :: size_real = 8.0_dp
real(kind=dp), parameter :: size_cmplx = 16.0_dp
real(kind=dp) :: mem_wan, mem_wan1, mem_param, mem_dis, mem_dis2, mem_dis1
real(kind=dp) :: mem_bw
integer :: NumPoints1, NumPoints2, NumPoints3, ndim
real(kind=dp) :: TDF_exceeding_energy
mem_param = 0
mem_dis = 0
mem_dis1 = 0
mem_dis2 = 0
mem_wan = 0
mem_wan1 = 0
mem_bw = 0
! First the data stored in the parameters module
mem_param = mem_param + num_wann*num_wann*num_kpts*size_cmplx !u_matrix
if (.not. disentanglement) &
mem_param = mem_param + num_wann*num_wann*nntot*num_kpts*size_cmplx !m_matrix
if (disentanglement) then
mem_param = mem_param + num_bands*num_wann*num_kpts*size_cmplx ! u_matrix_opt
endif
if (allocated(atoms_species_num)) then
mem_param = mem_param + (num_species)*size_int !atoms_species_num
mem_param = mem_param + (num_species)*size_real !atoms_label
mem_param = mem_param + (num_species)*size_real !atoms_symbol
mem_param = mem_param + (3*maxval(atoms_species_num)*num_species)*size_real !atoms_pos_frac
mem_param = mem_param + (3*maxval(atoms_species_num)*num_species)*size_real !atoms_pos_cart
endif
if (allocated(input_proj_site)) then
mem_param = mem_param + (3*num_proj)*size_real !input_proj_site
mem_param = mem_param + (num_proj)*size_int !input_proj_l
mem_param = mem_param + (num_proj)*size_int !input_proj_m
mem_param = mem_param + (3*num_proj)*size_real !input_proj_z
mem_param = mem_param + (3*num_proj)*size_real !input_proj_x
mem_param = mem_param + (num_proj)*size_real !input_proj_radial
mem_param = mem_param + (num_proj)*size_real !input_proj_zona
endif
if (allocated(proj_site)) then
mem_param = mem_param + (3*num_wann)*size_real !proj_site
mem_param = mem_param + (num_wann)*size_int !proj_l
mem_param = mem_param + (num_wann)*size_int !proj_m
mem_param = mem_param + (3*num_wann)*size_real !proj_z
mem_param = mem_param + (3*num_wann)*size_real !proj_x
mem_param = mem_param + (num_wann)*size_real !proj_radial
mem_param = mem_param + (num_wann)*size_real !proj_zona
endif
mem_param = mem_param + num_kpts*nntot*size_int !nnlist
mem_param = mem_param + num_kpts*nntot/2*size_int !neigh
mem_param = mem_param + 3*num_kpts*nntot*size_int !nncell
mem_param = mem_param + nntot*size_real !wb
mem_param = mem_param + 3*nntot/2*size_real !bka
mem_param = mem_param + 3*nntot*num_kpts*size_real !bk
mem_param = mem_param + num_bands*num_kpts*size_real !eigval
mem_param = mem_param + 3*num_kpts*size_real !kpt_cart
mem_param = mem_param + 3*num_kpts*size_real !kpt_latt
if (disentanglement) then
mem_param = mem_param + num_kpts*size_int !ndimwin
mem_param = mem_param + num_bands*num_kpts*size_log !lwindow
endif
mem_param = mem_param + 3*num_wann*size_real !wannier_centres
mem_param = mem_param + num_wann*size_real !wannier_spreads
if (disentanglement) then
! Module vars
mem_dis = mem_dis + num_bands*num_kpts*size_real !eigval_opt
mem_dis = mem_dis + num_kpts*size_int !nfirstwin
mem_dis = mem_dis + num_kpts*size_int !ndimfroz
mem_dis = mem_dis + num_bands*num_kpts*size_int !indxfroz
mem_dis = mem_dis + num_bands*num_kpts*size_int !indxnfroz
mem_dis = mem_dis + num_bands*num_kpts*size_log !lfrozen
!the memory high-water wiil occur in dis_extract or when we allocate m_matrix
mem_dis1 = mem_dis1 + num_wann*num_bands*size_cmplx !cwb
mem_dis1 = mem_dis1 + num_wann*num_wann*size_cmplx !cww
mem_dis1 = mem_dis1 + num_bands*num_wann*size_cmplx !cbw
mem_dis1 = mem_dis1 + 5*num_bands*size_int !iwork
mem_dis1 = mem_dis1 + num_bands*size_int !ifail
mem_dis1 = mem_dis1 + num_bands*size_real !w
if (gamma_only) then
mem_dis1 = mem_dis1 + (num_bands*(num_bands + 1))/2*size_real !cap_r
mem_dis1 = mem_dis1 + 8*num_bands*size_real !work
mem_dis1 = mem_dis1 + num_bands*num_bands*size_real !rz
else
mem_dis1 = mem_dis1 + 7*num_bands*size_real !rwork
mem_dis1 = mem_dis1 + (num_bands*(num_bands + 1))/2*size_cmplx !cap
mem_dis1 = mem_dis1 + 2*num_bands*size_cmplx !cwork
mem_dis1 = mem_dis1 + num_bands*num_bands*size_cmplx !cz
end if
mem_dis1 = mem_dis1 + num_kpts*size_real !wkomegai1
mem_dis1 = mem_dis1 + num_bands*num_bands*num_kpts*size_cmplx !ceamp
mem_dis1 = mem_dis1 + num_bands*num_bands*num_kpts*size_cmplx !cham
mem_dis2 = mem_dis2 + num_wann*num_wann*nntot*num_kpts*size_cmplx!m_matrix
if (optimisation <= 0) then
mem_dis = mem_dis + mem_dis1
else
mem_dis = mem_dis + max(mem_dis1, mem_dis2)
endif
mem_dis = mem_dis + num_bands*num_bands*nntot*num_kpts*size_cmplx ! m_matrix_orig
mem_dis = mem_dis + num_bands*num_wann*num_kpts*size_cmplx ! a_matrix
endif
!Wannierise
mem_wan1 = mem_wan1 + (num_wann*num_wann*nntot*num_kpts)*size_cmplx ! 'm0'
if (optimisation > 0) then
mem_wan = mem_wan + mem_wan1
endif
mem_wan = mem_wan + (num_wann*num_wann*num_kpts)*size_cmplx ! 'u0'
mem_wan = mem_wan + (num_wann*nntot*num_kpts)*size_real ! 'rnkb'
mem_wan = mem_wan + (num_wann*nntot*num_kpts)*size_real ! 'ln_tmp'
mem_wan = mem_wan + (num_wann*nntot*num_kpts)*size_cmplx ! 'csheet'
mem_wan = mem_wan + (num_wann*nntot*num_kpts)*size_real ! 'sheet'
mem_wan = mem_wan + (3*num_wann)*size_real ! 'rave'
mem_wan = mem_wan + (num_wann)*size_real ! 'r2ave'
mem_wan = mem_wan + (num_wann)*size_real ! 'rave2'
mem_wan = mem_wan + (3*num_wann)*size_real ! 'rguide'
mem_wan = mem_wan + (num_wann*num_wann)*size_cmplx ! 'cz'
if (gamma_only) then
mem_wan = mem_wan + num_wann*num_wann*nntot*2*size_cmplx ! m_w
mem_wan = mem_wan + num_wann*num_wann*size_cmplx ! uc_rot
mem_wan = mem_wan + num_wann*num_wann*size_real ! ur_rot
!internal_svd_omega_i
mem_wan = mem_wan + 10*num_wann*size_cmplx ! cw1
mem_wan = mem_wan + 10*num_wann*size_cmplx ! cw2
mem_wan = mem_wan + num_wann*num_wann*size_cmplx ! cv1
mem_wan = mem_wan + num_wann*num_wann*size_cmplx ! cv2
mem_wan = mem_wan + num_wann*num_wann*size_real ! cpad1
mem_wan = mem_wan + num_wann*size_cmplx ! singvd
else
mem_wan = mem_wan + (num_wann)*size_cmplx ! 'cwschur1'
mem_wan = mem_wan + (10*num_wann)*size_cmplx ! 'cwschur2'
mem_wan = mem_wan + (num_wann)*size_cmplx ! 'cwschur3'
mem_wan = mem_wan + (num_wann)*size_cmplx ! 'cwschur4'
mem_wan = mem_wan + (num_wann*num_wann*num_kpts)*size_cmplx ! 'cdq'
mem_wan = mem_wan + (num_wann*num_wann)*size_cmplx ! 'cmtmp'
mem_wan = mem_wan + (num_wann*num_wann*num_kpts)*size_cmplx ! 'cdqkeep'
mem_wan = mem_wan + (num_wann*num_wann)*size_cmplx ! 'tmp_cdq'
mem_wan = mem_wan + (num_wann)*size_real ! 'evals'
mem_wan = mem_wan + (4*num_wann)*size_cmplx ! 'cwork'
mem_wan = mem_wan + (3*num_wann - 2)*size_real ! 'rwork'
!d_omega
mem_wan = mem_wan + (num_wann*num_wann)*size_cmplx ! 'cr'
mem_wan = mem_wan + (num_wann*num_wann)*size_cmplx ! 'crt'
end if
if (ispostw90) then
if (boltzwann) then
if (spin_decomp) then
ndim = 3
else
ndim = 1
end if
! I set a big value to have a rough estimate
TDF_exceeding_energy = 2._dp
NumPoints1 = int(floor((boltz_temp_max - boltz_temp_min)/boltz_temp_step)) + 1 ! temperature array
NumPoints2 = int(floor((boltz_mu_max - boltz_mu_min)/boltz_mu_step)) + 1 ! mu array
NumPoints3 = int(floor((dis_win_max - dis_win_min + 2._dp*TDF_exceeding_energy)/boltz_tdf_energy_step)) + 1 ! tdfenergyarray
mem_bw = mem_bw + NumPoints1*size_real !TempArray
mem_bw = mem_bw + NumPoints1*size_real !KTArray
mem_bw = mem_bw + NumPoints2*size_real !MuArray
mem_bw = mem_bw + NumPoints3*size_real !TDFEnergyArray
mem_bw = mem_bw + 6*NumPoints3*ndim*size_real !TDFArray
mem_bw = mem_bw + 6*NumPoints3*size_real !IntegrandArray
mem_bw = mem_bw + (9*4 + 6)*size_real
!ElCondTimesSeebeckFP,ThisElCond,ElCondInverse,ThisSeebeck,ElCondTimesSeebeck
mem_bw = mem_bw + 6*NumPoints1*NumPoints2*size_real !ElCond
mem_bw = mem_bw + 6*NumPoints1*NumPoints2*size_real !Seebeck
mem_bw = mem_bw + 6*NumPoints1*NumPoints2*size_real !ThermCond
! I put a upper bound here below (as if there was only 1 node), because I do not have any knowledge at this point
! of the number of processors, so I cannot have a correct estimate
mem_bw = mem_bw + 6*NumPoints1*NumPoints2*size_real !LocalElCond
mem_bw = mem_bw + 6*NumPoints1*NumPoints2*size_real !LocalSeebeck
mem_bw = mem_bw + 6*NumPoints1*NumPoints2*size_real !LocalThermCond
mem_bw = mem_bw + num_wann*num_wann*size_cmplx !HH
mem_bw = mem_bw + 3*num_wann*num_wann*size_cmplx !delHH
mem_bw = mem_bw + num_wann*num_wann*size_cmplx !UU
mem_bw = mem_bw + 3*num_wann*size_real !del_eig
mem_bw = mem_bw + num_wann*size_real !eig
mem_bw = mem_bw + num_wann*size_real !levelspacing_k
NumPoints1 = int(floor((boltz_dos_energy_max - boltz_dos_energy_min)/boltz_dos_energy_step)) + 1!dosnumpoints
mem_bw = mem_bw + NumPoints1*size_real !DOS_EnergyArray
mem_bw = mem_bw + 6*ndim*NumPoints3*size_real !TDF_k
mem_bw = mem_bw + ndim*NumPoints1*size_real !DOS_k
mem_bw = mem_bw + ndim*NumPoints1*size_real !DOS_all
end if
end if
if (disentanglement) &
mem_wan = mem_wan + num_wann*num_wann*nntot*num_kpts*size_cmplx !m_matrix
if (on_root) then
write (stdout, '(1x,a)') '*============================================================================*'
write (stdout, '(1x,a)') '| MEMORY ESTIMATE |'
write (stdout, '(1x,a)') '| Maximum RAM allocated during each phase of the calculation |'
write (stdout, '(1x,a)') '*============================================================================*'
if (disentanglement) &
write (stdout, '(1x,"|",24x,a15,f16.2,a,18x,"|")') 'Disentanglement:', (mem_param + mem_dis)/(1024**2), ' Mb'
write (stdout, '(1x,"|",24x,a15,f16.2,a,18x,"|")') 'Wannierise:', (mem_param + mem_wan)/(1024**2), ' Mb'
if (optimisation > 0 .and. iprint > 1) then
write (stdout, '(1x,a)') '| |'
write (stdout, '(1x,a)') '| N.B. by setting optimisation=0 memory usage will be reduced to: |'
if (disentanglement) &
write (stdout, '(1x,"|",24x,a15,f16.2,a,18x,"|")') 'Disentanglement:', &
(mem_param + mem_dis - max(mem_dis1, mem_dis2) + mem_dis1)/(1024**2), ' Mb'
if (gamma_only) then
write (stdout, '(1x,"|",24x,a15,f16.2,a,18x,"|")') 'Wannierise:', (mem_param + mem_wan)/(1024**2), ' Mb'
else
write (stdout, '(1x,"|",24x,a15,f16.2,a,18x,"|")') 'Wannierise:', &
(mem_param + mem_wan - mem_wan1)/(1024**2), ' Mb'
end if
write (stdout, '(1x,a)') '| However, this will result in more i/o and slow down the calculation |'
endif
if (ispostw90) then
if (boltzwann) &
write (stdout, '(1x,"|",24x,a15,f16.2,a,18x,"|")') 'BoltzWann:', (mem_param + mem_bw)/(1024**2), ' Mb'
end if
write (stdout, '(1x,"|",24x,a15,f16.2,a,18x,"|")') 'plot_wannier:', (mem_param + mem_wan)/(1024**2), ' Mb'
write (stdout, '(1x,a)') '*----------------------------------------------------------------------------*'
write (stdout, *) ' '
endif
! if(disentanglement) then
! write(*,'(a12,f12.4,a)') 'Disentangle',(mem_param+mem_dis)/(1024**2),' Mb'
! end if
! write(*,'(a12,f12.4,a)') 'Wannierise ',(mem_wan+mem_param)/(1024**2),' Mb'
! write(*,'(a12,f12.4,a)') 'Module',(mem_param)/(1024**2),' Mb'
return
end subroutine param_memory_estimate