14 #include "montecarlo.h"
18 using std::accumulate;
20 void MonteCarlo::addFieldsReal(MonteCarlo::Spin v, FieldRealArray &array,
int di,
int dj,
int dk) {
24 for(
int al= 0; al < 4; al++) {
27 array.align[al][r].x[c] += v;
37 MonteCarlo::xthread_start_routine(
void *x)
46 double r2int = v.x*v.x + v.y*v.y + v.z*v.z;
48 if(r2int - 0.01 > (4*s_cutoff_real_radius)*(4*s_cutoff_real_radius)) {
51 double r = LATTICE_CONST/4.0 * sqrt((
double)r2int);
54 double ir_eff = erfc_(alphar)*ir;
55 double derfc = 2.0*
s_alpha/sqrt(M_PI)*exp(-alphar*alphar);
56 double ir3_eff = ir*ir*(ir_eff + derfc);
59 rij *= LATTICE_CONST/4.0;
61 m2 *= 1e-7 * A_MOMENT * MU_B;
62 double mr = m2.innerProduct(rij);
64 hdip *= mr * ir5_eff * 3.0;
71 MonteCarlo::dipoleFieldRec(
const Vector3<double> &v,
int site2, Vector3<double> *ret)
73 double k2int = v.x*v.x + v.y*v.y + v.z*v.z;
74 if(k2int >= s_cutoff_rec_radius*s_cutoff_rec_radius) {
78 Vector3<double> m2(s_ASiteIsingVector[site2]);
79 m2 *= 4.0 * M_PI * 1e-7 * A_MOMENT * MU_B / LATTICE_VOLUME / (
s_num_spins/16);
80 Vector3<double> k(v.x, v.y, v.z);
81 k *= 2.0*M_PI / (LATTICE_CONST *
s_L);
82 Vector3<double> hdip(k);
89 double k2 = k.innerProduct(k);
90 hdip *= - exp(-k2/(4.0*
s_alpha*
s_alpha)) / k2 * k.innerProduct(m2);
100 double radius,
double radius_rec,
double alpha)
107 for(
int site1 = 0; site1 < 16; site1++) {
108 s_ASiteIsingVector[site1].x = cg_ASiteIsingAxes[site1][0] / sqrt(3.0);
109 s_ASiteIsingVector[site1].y = cg_ASiteIsingAxes[site1][1] / sqrt(3.0);
110 s_ASiteIsingVector[site1].z = cg_ASiteIsingAxes[site1][2] / sqrt(3.0);
113 int cutoff_real = (int)ceil(radius - 0.01);
114 alpha /= LATTICE_CONST;
116 double mag_cutoff = erfc_(alpha*LATTICE_CONST*radius);
117 fprintf(stderr,
"Magnitude at the cutoff boundary for real = %g%%\n", 100.0*mag_cutoff);
119 s_cutoff_real_radius = radius;
121 int cutoff_rec = (int)ceil(radius_rec);
123 s_cutoff_rec_radius = radius_rec;
124 double mag_cutoff_rec = exp(-pow(radius_rec / (2.0 * alpha) * (2.0 * M_PI / (LATTICE_CONST *
s_L)), 2.0));
125 fprintf(stderr,
"Magnitude at the cutoff boundary for rec = %g%%\n", 100.0*mag_cutoff_rec);
127 for(
int site2 = 0; site2 < 16; site2++) {
129 int size = (cutoff_real*2+1)*(cutoff_real*2+1)*((cutoff_real*2 + 3)/4 + 1);
130 for(
int al = 0; al < 4; al++) {
131 for(
int site1 = 0; site1 < 16; site1++) {
132 s_fields_real[site1][site2].align[al].clear();
133 s_fields_real[site1][site2].align[al].resize(size);
135 for(
int d = 0; d < 3; d++) {
136 for(
int site1 = 0; site1 < 16; site1++) {
137 s_fields_real_B[site1][site2][d].align[al].clear();
138 s_fields_real_B[site1][site2][d].align[al].resize(size);
140 for(
int site1 = 0; site1 < 8; site1++) {
141 s_fields_real_8a[site1][site2][d].align[al].clear();
142 s_fields_real_8a[site1][site2][d].align[al].resize(size);
144 for(
int site1 = 0; site1 < 48; site1++) {
145 s_fields_real_48f[site1][site2][d].align[al].clear();
146 s_fields_real_48f[site1][site2][d].align[al].resize(size);
151 int size = (cutoff_real*2+1)*(cutoff_real*2+1)*(cutoff_real*2+1);
152 for(
int site1 = 0; site1 < 16; site1++) {
153 s_fields_real[site1][site2].clear();
154 s_fields_real[site1][site2].resize(size, 0.0);
156 for(
int d = 0; d < 3; d++) {
157 for(
int site1 = 0; site1 < 16; site1++) {
158 s_fields_real_B[site1][site2][d].clear();
159 s_fields_real_B[site1][site2][d].resize(size, 0.0);
161 for(
int site1 = 0; site1 < 8; site1++) {
162 s_fields_real_8a[site1][site2][d].clear();
163 s_fields_real_8a[site1][site2][d].resize(size, 0.0);
165 for(
int site1 = 0; site1 < 48; site1++) {
166 s_fields_real_48f[site1][site2][d].clear();
167 s_fields_real_48f[site1][site2][d].resize(size, 0.0);
179 vector<int> cnt_n_r2(4*(cutoff_real+1)*4*(cutoff_real+1));
180 for(
int site1 = 0; site1 < 16; site1++) {
182 for(
int site2 = 0; site2 < 16; site2++) {
183 for(
int dk = -cutoff_real; dk <= cutoff_real; dk++) {
184 for(
int dj = -cutoff_real; dj <= cutoff_real; dj++) {
185 for(
int di = -cutoff_real; di <= cutoff_real; di++) {
187 int r2int = v.x*v.x + v.y*v.y + v.z*v.z;
194 double hdip_d1 = d1.innerProduct(hdip);
195 assert(fabs(hdip_d1) < 1.0);
200 addFieldsReal(hdip_d1, s_fields_real[site1][site2], di, dj, dk);
208 d_nn += hdip_d1 / (K_B / (A_MOMENT * MU_B)) * 3.0 * (d1.innerProduct(d2));
210 J_NN * K_B / (A_MOMENT * MU_B) * 3.0 * (d1.innerProduct(d2))
211 , s_fields_real[site1][site2], di, dj, dk);
232 for(
int site1 = 0; site1 < 16; site1++) {
233 for(
int site2 = 0; site2 < 16; site2++) {
234 for(
int dk = -cutoff_real; dk <= cutoff_real; dk++) {
235 for(
int dj = -cutoff_real; dj <= cutoff_real; dj++) {
236 for(
int di = -cutoff_real; di <= cutoff_real; di++) {
238 v.x = 4*di + cg_ASitePositions[site2][0] - cg_BSitePositions[site1][0];
239 v.y = 4*dj + cg_ASitePositions[site2][1] - cg_BSitePositions[site1][1];
240 v.z = 4*dk + cg_ASitePositions[site2][2] - cg_BSitePositions[site1][2];
244 addFieldsReal(hdip.x, s_fields_real_B[site1][site2][0], di, dj, dk);
245 addFieldsReal(hdip.y, s_fields_real_B[site1][site2][1], di, dj, dk);
246 addFieldsReal(hdip.z, s_fields_real_B[site1][site2][2], di, dj, dk);
252 for(
int site1 = 0; site1 < 8; site1++) {
253 for(
int site2 = 0; site2 < 16; site2++) {
254 for(
int dk = -cutoff_real; dk <= cutoff_real; dk++) {
255 for(
int dj = -cutoff_real; dj <= cutoff_real; dj++) {
256 for(
int di = -cutoff_real; di <= cutoff_real; di++) {
258 v.x = 4*di + cg_ASitePositions[site2][0] - cg_8aSitePositions[site1][0] * 0.5;
259 v.y = 4*dj + cg_ASitePositions[site2][1] - cg_8aSitePositions[site1][1] * 0.5;
260 v.z = 4*dk + cg_ASitePositions[site2][2] - cg_8aSitePositions[site1][2] * 0.5;
264 double r2int = v.x*v.x + v.y*v.y + v.z*v.z;
267 hdip *= AHF_DY_O1 / AHF_DY_O1_DIPOLE;
269 addFieldsReal(hdip.x, s_fields_real_8a[site1][site2][0], di, dj, dk);
270 addFieldsReal(hdip.y, s_fields_real_8a[site1][site2][1], di, dj, dk);
271 addFieldsReal(hdip.z, s_fields_real_8a[site1][site2][2], di, dj, dk);
277 for(
int site1 = 0; site1 < 48; site1++) {
278 for(
int site2 = 0; site2 < 16; site2++) {
279 for(
int dk = -cutoff_real; dk <= cutoff_real; dk++) {
280 for(
int dj = -cutoff_real; dj <= cutoff_real; dj++) {
281 for(
int di = -cutoff_real; di <= cutoff_real; di++) {
283 v.x = 4*di + cg_ASitePositions[site2][0] - cg_48fSitePositions[site1][0] * 0.5;
284 v.y = 4*dj + cg_ASitePositions[site2][1] - cg_48fSitePositions[site1][1] * 0.5;
285 v.z = 4*dk + cg_ASitePositions[site2][2] - cg_48fSitePositions[site1][2] * 0.5;
289 addFieldsReal(hdip.x, s_fields_real_48f[site1][site2][0], di, dj, dk);
290 addFieldsReal(hdip.y, s_fields_real_48f[site1][site2][1], di, dj, dk);
291 addFieldsReal(hdip.z, s_fields_real_48f[site1][site2][2], di, dj, dk);
299 for(
int i = 0; i < (int)cnt_n_r2.size(); i++) {
300 if(cnt_n_r2[i] == 0)
continue;
304 assert(cnt_n % 16 == 0);
306 fprintf(stderr,
"# of neighbors = %d\n", cnt_n);
307 assert(cnt_nn % 16 == 0);
309 assert(cnt_nn % 6 == 0);
310 fprintf(stderr,
"# of nearest neighbors = %d\n", cnt_nn);
311 assert(cnt_nnn % 16 == 0);
313 assert(cnt_nnn % 6 == 0);
314 fprintf(stderr,
"# of next nearest neighbors = %d\n", cnt_nnn);
315 assert(cnt_3nn % 16 == 0);
317 assert(cnt_3nn % 6 == 0);
318 fprintf(stderr,
"# of 3rd nearest neighbors = %d\n", cnt_3nn);
321 fprintf(stderr,
"D_NN = %g [K]\n", d_nn);
323 int rec_size = (2*cutoff_rec + 1)*(2*cutoff_rec + 1)*(cutoff_rec + 1);
324 for(
int site1 = 0; site1 < 16; site1++) {
327 for(
int site2 = 0; site2 < 16; site2++) {
331 for(
int kz = 0; kz <= cutoff_rec; kz++) {
332 for(
int ky = -cutoff_rec; ky <= cutoff_rec; ky++) {
333 for(
int kx = -cutoff_rec; kx <= cutoff_rec; kx++) {
336 if(!dipoleFieldRec(v, site2, &hdip))
338 int ridx = reciprocal_index(kx,ky,kz);
339 s_fields_rec[site1][site2][ridx] = hdip.innerProduct(d1);
345 for(
int site2 = 0; site2 < 16; site2++) {
346 s_fields_rec_generic[site2].clear();
347 s_fields_rec_generic[site2].resize(rec_size);
349 for(
int kz = 0; kz <= cutoff_rec; kz++) {
350 for(
int ky = -cutoff_rec; ky <= cutoff_rec; ky++) {
351 for(
int kx = -cutoff_rec; kx <= cutoff_rec; kx++) {
354 if(!dipoleFieldRec(v, site2, &hdip))
356 int ridx = reciprocal_index(kx,ky,kz);
357 s_fields_rec_generic[site2][ridx] = hdip;
365 for(
int site1 = 0; site1 < 16; site1++) {
366 s_exp_ph[site1].clear();
367 if(rec_size*
s_num_spins*
sizeof(
double) < 500000000) {
369 s_exp_ph[site1].resize(rec_size *
s_num_spins / 16);
372 pos1 *= LATTICE_CONST / 4.0;
373 for(
int k = 0; k <
s_L; k++) {
374 for(
int j = 0; j <
s_L; j++) {
375 for(
int i = 0; i <
s_L; i++) {
376 int lidx = lattice_index(i,j,k);
377 double phx = 2*M_PI / (LATTICE_CONST *
s_L) * (i * LATTICE_CONST + pos1.x);
378 double phy = 2*M_PI / (LATTICE_CONST *
s_L) * (j * LATTICE_CONST + pos1.y);
379 double phz = 2*M_PI / (LATTICE_CONST *
s_L) * (k * LATTICE_CONST + pos1.z);
380 complex<double> exp_i_rx = exp(complex<double>(0.0, phx));
381 complex<double> exp_i_ry = exp(complex<double>(0.0, phy));
382 complex<double> exp_i_rz = exp(complex<double>(0.0, phz));
384 complex<double> exp_ikrz = exp(complex<double>(0.0, -cutoff_rec * (phx + phy)));
385 for(
int kz = 0; kz <= cutoff_rec; kz++) {
386 complex<double> exp_ikryz = exp_ikrz;
387 for(
int ky = -cutoff_rec; ky <= cutoff_rec; ky++) {
388 complex<double> exp_ikr = exp_ikryz;
389 for(
int kx = -cutoff_rec; kx <= cutoff_rec; kx++) {
390 int ridx = reciprocal_index(kx,ky,kz);
391 s_exp_ph[site1][lidx * rec_size + ridx] = exp_ikr;
394 exp_ikryz *= exp_i_ry;
396 exp_ikrz *= exp_i_rz;
398 assert(abs(complex<double>(s_exp_ph[site1][(lidx+1) * rec_size-1])
399 /exp(complex<double>(0.0, (cutoff_rec) * (phx + phy + phz)))
400 - complex<double>(1.0,0)) < 1e-6);
411 MonteCarlo::iterate_real_redirected(
int cnt,
const FieldRealArray &array,
int i,
int j,
int k,
int site2)
414 assert(cnt == cutoff*2 + 1);
416 int l = (
s_L - 1) / 4 + 1;
417 int al = spins_real_index(i - cutoff,0,0) % 4;
419 PackedSpin *pspin_i = &
m_spins_real[site2][spins_real_index(i - cutoff,0,0) / 4];
420 asm (
"movaps %0, %%xmm3"
424 const PackedSpin *pfield = &array.align[al][0];
425 for(
int dk = -cutoff; dk <= cutoff; dk++) {
426 PackedSpin *pspin_k = pspin_i + spins_real_index(-
s_L, 0, (k+
s_L+dk) %
s_L) / 4;
427 for(
int dj = -cutoff; dj <= cutoff; dj++) {
428 PackedSpin *pspin = pspin_k + spins_real_index(-
s_L, (j+
s_L+dj) %
s_L, 0) / 4;
430 for(
int n = 0; n < (cnt + 2) / 4 + 1; n++) {
431 asm (
"movaps %0, %%xmm1;"
433 "mulps %%xmm1, %%xmm2;"
434 "addps %%xmm2, %%xmm3"
436 :
"m" (pspin[n]),
"m" (pfield[n])
437 :
"%xmm1",
"%xmm2",
"%xmm3");
442 pfield+=(cnt + 2) / 4 + 1;
446 asm (
"movaps %%xmm3, %0"
450 assert(pfield == &*array.align[al].end());
455 Spin *pspin_i = &
m_spins_real[site2][spins_real_index(i - cutoff, 0, 0)];
456 const Spin *pfield = &array[0];
457 for(
int dk = -cutoff; dk <= cutoff; dk++) {
458 Spin *pspin_k = pspin_i + spins_real_index(-
s_L, 0, (k+
s_L+dk) %
s_L);
459 for(
int dj = -cutoff; dj <= cutoff; dj++) {
460 Spin *pspin = pspin_k + spins_real_index(-
s_L, (j+
s_L+dj) %
s_L, 0);
462 for(
int n = 0; n < cnt; n++) {
463 h += pfield[n] * pspin[n];
469 assert(pfield == &*array.end());
475 MonteCarlo::iterate_real_generic(
const FieldRealArray array[16][3],
int i,
int j,
int k)
478 int cnt = 2*cutoff + 1;
479 Vector3<double> hall;
480 for(
int site2 = 0; site2 < 16; site2++) {
481 hall.x += iterate_real_redirected(cnt, array[site2][0], i, j, k, site2);
482 hall.y += iterate_real_redirected(cnt, array[site2][1], i, j, k, site2);
483 hall.z += iterate_real_redirected(cnt, array[site2][2], i, j, k, site2);
489 MonteCarlo::iterate_real(
int site1,
int i,
int j,
int k,
int site2)
495 return iterate_real_redirected(3, s_fields_real[site1][site2], i, j, k, site2);
497 return iterate_real_redirected(5, s_fields_real[site1][site2], i, j, k, site2);
499 return iterate_real_redirected(7, s_fields_real[site1][site2], i, j, k, site2);
501 return iterate_real_redirected(9, s_fields_real[site1][site2], i, j, k, site2);
503 return iterate_real_redirected(11, s_fields_real[site1][site2], i, j, k, site2);
505 return iterate_real_redirected(cnt, s_fields_real[site1][site2], i, j, k, site2);
509 MonteCarlo::iterate_rec_generic(Vector3<double> pos1,
int i,
int j,
int k)
512 for(
int site2 = 0; site2 < 16; site2++) {
513 h += iterate_rec_generic(pos1, i, j, k, site2);
518 MonteCarlo::iterate_rec_generic(Vector3<double> pos1,
int i,
int j,
int k,
int site2)
521 int cnt = 2*cutoff+1;
522 pos1 *= LATTICE_CONST;
524 const Vector3<Spin> *pfield = &s_fields_rec_generic[site2][0];
526 double phx = 2*M_PI / (LATTICE_CONST *
s_L) * (i * LATTICE_CONST + pos1.x);
527 double phy = 2*M_PI / (LATTICE_CONST *
s_L) * (j * LATTICE_CONST + pos1.y);
528 double phz = 2*M_PI / (LATTICE_CONST *
s_L) * (k * LATTICE_CONST + pos1.z);
529 complex<Spin> exp_i_rx = exp(complex<Spin>(0.0, phx));
530 complex<Spin> exp_i_ry = exp(complex<Spin>(0.0, phy));
531 complex<Spin> exp_i_rz = exp(complex<Spin>(0.0, phz));
533 complex<Spin> exp_ikrz = exp(complex<Spin>(0.0, -cutoff * (phx + phy)));
534 for(
int kz = 0; kz <= cutoff; kz++) {
535 complex<Spin> exp_ikryz = exp_ikrz;
536 for(
int ky = -cutoff; ky <= cutoff; ky++) {
537 complex<Spin> exp_ikr = exp_ikryz;
538 for(
int n = 0; n < cnt; n++) {
539 Vector3<Spin> e(pfield[n]);
540 e *= real(pspin[n] * exp_ikr);
546 exp_ikryz *= exp_i_ry;
548 exp_ikrz *= exp_i_rz;
553 MonteCarlo::iterate_rec_redirected(
int cutoff,
int site1,
int i,
int j,
int k,
int site2)
555 int cnt = 2*cutoff+1;
560 if(s_exp_ph[site1].size()) {
561 complex<Spin> *pexp_ph =
562 &s_exp_ph[site1][lattice_index(i,j,k) *
m_spins_rec[site2].size()];
563 for(
int m = 0; m < (cutoff+1)*(2*cutoff+1); m++) {
564 for(
int n = 0; n < cnt; n++) {
565 h += pfield[n] * real(pspin[n] * pexp_ph[n]);
574 Vector3<double> pos1(cg_ASitePositions[site1]);
576 h = iterate_rec_generic(pos1, i, j, k, site2)
577 .innerProduct(s_ASiteIsingVector[site1]);
581 Spin spin_self = readSpin(site1, spins_real_index(i,j,k));
589 MonteCarlo::iterate_rec(
int site1,
int i,
int j,
int k,
int site2)
595 return iterate_rec_redirected(2, site1, i, j, k, site2);
597 return iterate_rec_redirected(3, site1, i, j, k, site2);
599 return iterate_rec_redirected(4, site1, i, j, k, site2);
601 return iterate_rec_redirected(5, site1, i, j, k, site2);
603 return iterate_rec_redirected(6, site1, i, j, k, site2);
605 return iterate_rec_redirected(7, site1, i, j, k, site2);
607 return iterate_rec_redirected(cutoff, site1, i, j, k, site2);
614 MonteCarlo::execute()
622 m_thread_pool_cond.
wait();
628 int site2 = m_hint_sec_cache_miss[left - 1];
633 m_hint_spin1_site, m_hint_spin1_lattice_index, site2);
635 if(m_hint_site2_not_done.decAndTest()) {
637 m_hint_site2_last_cond.
signal();
643 MonteCarlo::hinteraction_miscache(
int sec_cache_miss_cnt,
int site1,
int lidx)
649 m_hint_spin1_site = site1;
650 m_hint_spin1_lattice_index = lidx;
652 m_hint_site2_not_done = sec_cache_miss_cnt;
664 while(m_hint_site2_not_done > 0) {
665 m_hint_site2_last_cond.
wait();
672 int site2 = m_hint_sec_cache_miss[left - 1];
675 m_hint_fields[site2] = iterate_interactions(site1, lidx, site2);
676 if(m_hint_site2_not_done.decAndTest()) {
682 for(
int miss = 0; miss < sec_cache_miss_cnt; miss++) {
683 int site2 = m_hint_sec_cache_miss[miss];