@@ -210,6 +210,21 @@ void Paw_Cell::set_paw_k(
210210 {
211211 gnorm[ipw] = std::sqrt (kpg[ipw][0 ]*kpg[ipw][0 ] + kpg[ipw][1 ]*kpg[ipw][1 ] + kpg[ipw][2 ]*kpg[ipw][2 ]) * tpiba;
212212 }
213+
214+ std::complex <double > i_cplx = (0.0 ,1.0 );
215+ // ikpg : i (k+G)
216+ // if(GlobalV::CAL_FORCE || GlobalV::CAL_STRESS)
217+ {
218+ ikpg.resize (npw);
219+ for (int ipw = 0 ; ipw < npw; ipw ++)
220+ {
221+ ikpg[ipw].resize (3 );
222+ for (int i = 0 ; i < 3 ; i ++)
223+ {
224+ ikpg[ipw][i] = kpg[ipw][i] * tpiba * i_cplx;
225+ }
226+ }
227+ }
213228}
214229
215230void Paw_Cell::set_isk (const int nk, const int * isk_in)
@@ -665,4 +680,81 @@ void Paw_Cell::paw_nl_psi(const int mode, const std::complex<double> * psi, std:
665680 }
666681 }
667682 }
683+ }
684+
685+ void Paw_Cell::paw_nl_force (const std::complex <double > * psi, const double * epsilon, const double * weight, const int nbands , double * force)
686+ {
687+ ModuleBase::TITLE (" Paw_Cell" ," paw_nl_force" );
688+
689+ for (int iband = 0 ; iband < nbands; iband ++)
690+ {
691+ for (int iat = 0 ; iat < nat; iat ++)
692+ {
693+ // ca : <ptilde(G)|psi(G)>
694+ // = \sum_G [\int f(r)r^2j_l(r)dr] * [(-i)^l] * [ylm(\hat{G})] * [exp(-GR_I)] *psi(G)
695+ // = \sum_ipw ptilde * fact * ylm * sk * psi (in the code below)
696+ // This is what is called 'becp' in nonlocal pp
697+ std::vector<std::complex <double >> ca;
698+ std::vector<std::vector<std::complex <double >>> dca;
699+
700+ const int it = atom_type[iat];
701+ const int nproj = paw_element_list[it].get_mstates ();
702+ const int proj_start = start_iprj_ia[iat];
703+
704+ ca.resize (nproj);
705+ dca.resize (3 );
706+ for (int i = 0 ; i < 3 ; i ++)
707+ {
708+ dca[i].resize (nproj);
709+ }
710+
711+ for (int iproj = 0 ; iproj < nproj; iproj ++)
712+ {
713+ ca[iproj] = 0.0 ;
714+ dca[0 ][iproj] = 0.0 ;
715+ dca[1 ][iproj] = 0.0 ;
716+ dca[2 ][iproj] = 0.0 ;
717+
718+ // consider use blas subroutine for this part later
719+ for (int ipw = 0 ; ipw < npw; ipw ++)
720+ {
721+ std::complex <double > overlp = psi[ipw] * std::conj (vkb[iproj+proj_start][ipw]);
722+ ca[iproj] += overlp;
723+ dca[0 ][iproj] += overlp * ikpg[ipw][0 ];
724+ dca[1 ][iproj] += overlp * ikpg[ipw][1 ];
725+ dca[2 ][iproj] += overlp * ikpg[ipw][2 ];
726+ }
727+ }
728+
729+ #ifdef __MPI
730+ Parallel_Reduce::reduce_pool (ca.data (), nproj);
731+ Parallel_Reduce::reduce_pool (dca[0 ].data (), nproj);
732+ Parallel_Reduce::reduce_pool (dca[1 ].data (), nproj);
733+ Parallel_Reduce::reduce_pool (dca[2 ].data (), nproj);
734+ #endif
735+ // sum_ij (D_ij - epsilon_n O_ij) ca_j
736+ std::vector<std::complex <double >> v_ca;
737+ v_ca.resize (nproj);
738+
739+ for (int iproj = 0 ; iproj < nproj; iproj ++)
740+ {
741+ v_ca[iproj] = 0.0 ;
742+ for (int jproj = 0 ; jproj < nproj; jproj ++)
743+ {
744+ double coeff = paw_atom_list[iat].get_dij ()[current_spin][iproj*nproj+jproj] -
745+ paw_atom_list[iat].get_sij ()[iproj*nproj+jproj] * epsilon[iband];
746+ v_ca[iproj] += coeff * ca[jproj];
747+ }
748+ }
749+
750+ // force += conjg(v_ca[iproj]) * d_ca[iproj]
751+ // \sum_i ptilde_{iproj}(G) v_ca[iproj]
752+ for (int iproj = 0 ; iproj < nproj; iproj ++)
753+ {
754+ force[iat*3 ] += (std::conj (v_ca[iproj]) * dca[0 ][iproj]).real ();
755+ force[iat*3 +1 ] += (std::conj (v_ca[iproj]) * dca[1 ][iproj]).real ();
756+ force[iat*3 +2 ] += (std::conj (v_ca[iproj]) * dca[2 ][iproj]).real ();
757+ }
758+ }
759+ }
668760}
0 commit comments