interaction.cpp
1 /***************************************************************************
2  Copyright (C) 2002-2015 Kentaro Kitagawa
3  kitagawa@phys.s.u-tokyo.ac.jp
4 
5  This program is free software; you can redistribute it and/or
6  modify it under the terms of the GNU Library General Public
7  License as published by the Free Software Foundation; either
8  version 2 of the License, or (at your option) any later version.
9 
10  You should have received a copy of the GNU Library General
11  Public License and a list of authors along with this program;
12  see the files COPYING and AUTHORS.
13 ***************************************************************************/
14 #include "montecarlo.h"
15 using std::vector;
16 using std::complex;
17 #include <numeric>
18 using std::accumulate;
19 
20 void MonteCarlo::addFieldsReal(MonteCarlo::Spin v, FieldRealArray &array, int di, int dj, int dk) {
21  int size = 2*s_cutoff_real + 1;
22 #ifdef PACK_4FLOAT
23  int sizex = (2*s_cutoff_real + 3)/4 + 1;
24  for(int al= 0; al < 4; al++) {
25  int r = sizex*(size*(dk + s_cutoff_real) + (dj + s_cutoff_real)) + (di + s_cutoff_real + al) / 4;
26  int c = (di + s_cutoff_real + al) % 4;
27  array.align[al][r].x[c] += v;
28  }
29 #else
30  int r = size*(size*(dk + s_cutoff_real) + (dj + s_cutoff_real)) + (di + s_cutoff_real);
31  array[r] += v;
32 #endif
33 }
34 
35 #include <pthread.h>
36 void *
37 MonteCarlo::xthread_start_routine(void *x)
38 {
39  MonteCarlo *ptr = (MonteCarlo *)x;
40  ptr->execute();
41  return NULL;
42 }
43 int
45 {
46  double r2int = v.x*v.x + v.y*v.y + v.z*v.z;
47  // spherical boundary
48  if(r2int - 0.01 > (4*s_cutoff_real_radius)*(4*s_cutoff_real_radius)) {
49  return false;
50  }
51  double r = LATTICE_CONST/4.0 * sqrt((double)r2int);
52  double ir = 1.0/r;
53  double alphar = s_alpha*r;
54  double ir_eff = erfc_(alphar)*ir;
55  double derfc = 2.0*s_alpha/sqrt(M_PI)*exp(-alphar*alphar);
56  double ir3_eff = ir*ir*(ir_eff + derfc);
57  double ir5_eff = ir*ir*(ir3_eff + 2.0/3.0*s_alpha*s_alpha*derfc);
58  Vector3<double> rij((double)v.x, (double)v.y, (double)v.z);
59  rij *= LATTICE_CONST/4.0;
60  Vector3<double> m2(s_ASiteIsingVector[site2]);
61  m2 *= 1e-7 * A_MOMENT * MU_B;
62  double mr = m2.innerProduct(rij);
63  Vector3<double> hdip(rij);
64  hdip *= mr * ir5_eff * 3.0;
65  m2 *= ir3_eff;
66  hdip -= m2;
67  *ret = hdip;
68  return true;
69 }
70 int
71 MonteCarlo::dipoleFieldRec(const Vector3<double> &v, int site2, Vector3<double> *ret)
72 {
73  double k2int = v.x*v.x + v.y*v.y + v.z*v.z;
74  if(k2int >= s_cutoff_rec_radius*s_cutoff_rec_radius) {
75  return false;
76  }
77 
78  Vector3<double> m2(s_ASiteIsingVector[site2]);
79  m2 *= 4.0 * M_PI * 1e-7 * A_MOMENT * MU_B / LATTICE_VOLUME / (s_num_spins/16);
80  Vector3<double> k(v.x, v.y, v.z);
81  k *= 2.0*M_PI / (LATTICE_CONST * s_L);
82  Vector3<double> hdip(k);
83  if(k2int == 0) {
84  // surface term.
85  hdip = m2;
86  hdip *= - s_dfactor;
87  }
88  else {
89  double k2 = k.innerProduct(k);
90  hdip *= - exp(-k2/(4.0*s_alpha*s_alpha)) / k2 * k.innerProduct(m2);
91  }
92  // summation of minus-k space.
93  if(v.z != 0)
94  hdip *= 2.0;
95  *ret = hdip;
96  return true;
97 }
98 int
99 MonteCarlo::setupField(int size, double dfactor,
100  double radius, double radius_rec, double alpha)
101 {
102  s_bAborting = false;
103  s_L = size;
104  s_num_spins = size*size*size*16;
105  s_dfactor = dfactor;
106 
107  for(int site1 = 0; site1 < 16; site1++) {
108  s_ASiteIsingVector[site1].x = cg_ASiteIsingAxes[site1][0] / sqrt(3.0);
109  s_ASiteIsingVector[site1].y = cg_ASiteIsingAxes[site1][1] / sqrt(3.0);
110  s_ASiteIsingVector[site1].z = cg_ASiteIsingAxes[site1][2] / sqrt(3.0);
111  }
112 
113  int cutoff_real = (int)ceil(radius - 0.01);
114  alpha /= LATTICE_CONST;
115  s_alpha = alpha;
116  double mag_cutoff = erfc_(alpha*LATTICE_CONST*radius);
117  fprintf(stderr, "Magnitude at the cutoff boundary for real = %g%%\n", 100.0*mag_cutoff);
118  s_cutoff_real = cutoff_real;
119  s_cutoff_real_radius = radius;
120 // double radius_rec = sqrt(-log(mag_cutoff)) * 2.0 * alpha / (2.0*M_PI / (LATTICE_CONST * s_L));
121  int cutoff_rec = (int)ceil(radius_rec);
122  s_cutoff_rec = cutoff_rec;
123  s_cutoff_rec_radius = radius_rec;
124  double mag_cutoff_rec = exp(-pow(radius_rec / (2.0 * alpha) * (2.0 * M_PI / (LATTICE_CONST * s_L)), 2.0));
125  fprintf(stderr, "Magnitude at the cutoff boundary for rec = %g%%\n", 100.0*mag_cutoff_rec);
126 
127  for(int site2 = 0; site2 < 16; site2++) {
128  #ifdef PACK_4FLOAT
129  int size = (cutoff_real*2+1)*(cutoff_real*2+1)*((cutoff_real*2 + 3)/4 + 1);
130  for(int al = 0; al < 4; al++) {
131  for(int site1 = 0; site1 < 16; site1++) {
132  s_fields_real[site1][site2].align[al].clear();
133  s_fields_real[site1][site2].align[al].resize(size);
134  }
135  for(int d = 0; d < 3; d++) {
136  for(int site1 = 0; site1 < 16; site1++) {
137  s_fields_real_B[site1][site2][d].align[al].clear();
138  s_fields_real_B[site1][site2][d].align[al].resize(size);
139  }
140  for(int site1 = 0; site1 < 8; site1++) {
141  s_fields_real_8a[site1][site2][d].align[al].clear();
142  s_fields_real_8a[site1][site2][d].align[al].resize(size);
143  }
144  for(int site1 = 0; site1 < 48; site1++) {
145  s_fields_real_48f[site1][site2][d].align[al].clear();
146  s_fields_real_48f[site1][site2][d].align[al].resize(size);
147  }
148  }
149  }
150  #else
151  int size = (cutoff_real*2+1)*(cutoff_real*2+1)*(cutoff_real*2+1);
152  for(int site1 = 0; site1 < 16; site1++) {
153  s_fields_real[site1][site2].clear();
154  s_fields_real[site1][site2].resize(size, 0.0);
155  }
156  for(int d = 0; d < 3; d++) {
157  for(int site1 = 0; site1 < 16; site1++) {
158  s_fields_real_B[site1][site2][d].clear();
159  s_fields_real_B[site1][site2][d].resize(size, 0.0);
160  }
161  for(int site1 = 0; site1 < 8; site1++) {
162  s_fields_real_8a[site1][site2][d].clear();
163  s_fields_real_8a[site1][site2][d].resize(size, 0.0);
164  }
165  for(int site1 = 0; site1 < 48; site1++) {
166  s_fields_real_48f[site1][site2][d].clear();
167  s_fields_real_48f[site1][site2][d].resize(size, 0.0);
168  }
169  }
170  #endif
171  }
172 
173 
174  int cnt_n = 0;
175  int cnt_nn = 0;
176  double d_nn = 0.0;
177  int cnt_nnn = 0;
178  int cnt_3nn = 0;
179  vector<int> cnt_n_r2(4*(cutoff_real+1)*4*(cutoff_real+1));
180  for(int site1 = 0; site1 < 16; site1++) {
181  Vector3<double> d1(s_ASiteIsingVector[site1]);
182  for(int site2 = 0; site2 < 16; site2++) {
183  for(int dk = -cutoff_real; dk <= cutoff_real; dk++) {
184  for(int dj = -cutoff_real; dj <= cutoff_real; dj++) {
185  for(int di = -cutoff_real; di <= cutoff_real; di++) {
186  VectorInt v = distance(site1,site2,di,dj,dk);
187  int r2int = v.x*v.x + v.y*v.y + v.z*v.z;
188  if(r2int == 0) {
189  continue;
190  }
191  Vector3<double> hdip;
192  if(!dipoleFieldReal(v, site2, &hdip))
193  continue;
194  double hdip_d1 = d1.innerProduct(hdip);
195  assert(fabs(hdip_d1) < 1.0);
196  // fprintf(stderr, "%g\n",h);
197 
198  //counts for real-space.
199  cnt_n_r2[r2int]++;
200  addFieldsReal(hdip_d1, s_fields_real[site1][site2], di, dj, dk);
201 
202  cnt_n++;
203  // Nearest neighbor.
204  if(r2int <= 2*1*1) {
205  assert(r2int == 2);
206  Vector3<double> d2(s_ASiteIsingVector[site2]);
207 
208  d_nn += hdip_d1 / (K_B / (A_MOMENT * MU_B)) * 3.0 * (d1.innerProduct(d2));
209  addFieldsReal(
210  J_NN * K_B / (A_MOMENT * MU_B) * 3.0 * (d1.innerProduct(d2))
211  , s_fields_real[site1][site2], di, dj, dk);
212  // fprintf(stderr, "%g\n",g_fields[didx]);
213  cnt_nn++;
214  continue;
215  }
216  // Next nearest neighbor.
217  if(r2int <= 6) {
218  cnt_nnn++;
219  continue;
220  }
221  // 3rd nearest neighbor.
222  if(r2int <= 8) {
223  cnt_3nn++;
224  continue;
225  }
226  }
227  }
228  }
229  }
230  }
231 
232  for(int site1 = 0; site1 < 16; site1++) {
233  for(int site2 = 0; site2 < 16; site2++) {
234  for(int dk = -cutoff_real; dk <= cutoff_real; dk++) {
235  for(int dj = -cutoff_real; dj <= cutoff_real; dj++) {
236  for(int di = -cutoff_real; di <= cutoff_real; di++) {
237  Vector3<double> v;
238  v.x = 4*di + cg_ASitePositions[site2][0] - cg_BSitePositions[site1][0];
239  v.y = 4*dj + cg_ASitePositions[site2][1] - cg_BSitePositions[site1][1];
240  v.z = 4*dk + cg_ASitePositions[site2][2] - cg_BSitePositions[site1][2];
241  Vector3<double> hdip;
242  if(!dipoleFieldReal(v, site2, &hdip))
243  continue;
244  addFieldsReal(hdip.x, s_fields_real_B[site1][site2][0], di, dj, dk);
245  addFieldsReal(hdip.y, s_fields_real_B[site1][site2][1], di, dj, dk);
246  addFieldsReal(hdip.z, s_fields_real_B[site1][site2][2], di, dj, dk);
247  }
248  }
249  }
250  }
251  }
252  for(int site1 = 0; site1 < 8; site1++) {
253  for(int site2 = 0; site2 < 16; site2++) {
254  for(int dk = -cutoff_real; dk <= cutoff_real; dk++) {
255  for(int dj = -cutoff_real; dj <= cutoff_real; dj++) {
256  for(int di = -cutoff_real; di <= cutoff_real; di++) {
257  Vector3<double> v;
258  v.x = 4*di + cg_ASitePositions[site2][0] - cg_8aSitePositions[site1][0] * 0.5;
259  v.y = 4*dj + cg_ASitePositions[site2][1] - cg_8aSitePositions[site1][1] * 0.5;
260  v.z = 4*dk + cg_ASitePositions[site2][2] - cg_8aSitePositions[site1][2] * 0.5;
261  Vector3<double> hdip;
262  if(!dipoleFieldReal(v, site2, &hdip))
263  continue;
264  double r2int = v.x*v.x + v.y*v.y + v.z*v.z;
265  //Nearest Neighbor.
266  if(r2int < 0.7501) {
267  hdip *= AHF_DY_O1 / AHF_DY_O1_DIPOLE;
268  }
269  addFieldsReal(hdip.x, s_fields_real_8a[site1][site2][0], di, dj, dk);
270  addFieldsReal(hdip.y, s_fields_real_8a[site1][site2][1], di, dj, dk);
271  addFieldsReal(hdip.z, s_fields_real_8a[site1][site2][2], di, dj, dk);
272  }
273  }
274  }
275  }
276  }
277  for(int site1 = 0; site1 < 48; site1++) {
278  for(int site2 = 0; site2 < 16; site2++) {
279  for(int dk = -cutoff_real; dk <= cutoff_real; dk++) {
280  for(int dj = -cutoff_real; dj <= cutoff_real; dj++) {
281  for(int di = -cutoff_real; di <= cutoff_real; di++) {
282  Vector3<double> v;
283  v.x = 4*di + cg_ASitePositions[site2][0] - cg_48fSitePositions[site1][0] * 0.5;
284  v.y = 4*dj + cg_ASitePositions[site2][1] - cg_48fSitePositions[site1][1] * 0.5;
285  v.z = 4*dk + cg_ASitePositions[site2][2] - cg_48fSitePositions[site1][2] * 0.5;
286  Vector3<double> hdip;
287  if(!dipoleFieldReal(v, site2, &hdip))
288  continue;
289  addFieldsReal(hdip.x, s_fields_real_48f[site1][site2][0], di, dj, dk);
290  addFieldsReal(hdip.y, s_fields_real_48f[site1][site2][1], di, dj, dk);
291  addFieldsReal(hdip.z, s_fields_real_48f[site1][site2][2], di, dj, dk);
292  }
293  }
294  }
295  }
296  }
297 
298  s_4r2_neighbor.clear();
299  for(int i = 0; i < (int)cnt_n_r2.size(); i++) {
300  if(cnt_n_r2[i] == 0) continue;
301 // fprintf(stderr, "neighbor r = %f, cnt = %f\n", sqrt((double)i)/4.0, cnt_n_r2[i]/16.0);
302  s_4r2_neighbor.push_back(i);
303  }
304  assert(cnt_n % 16 == 0);
305  cnt_n /= 16;
306  fprintf(stderr, "# of neighbors = %d\n", cnt_n);
307  assert(cnt_nn % 16 == 0);
308  cnt_nn /= 16;
309  assert(cnt_nn % 6 == 0);
310  fprintf(stderr, "# of nearest neighbors = %d\n", cnt_nn);
311  assert(cnt_nnn % 16 == 0);
312  cnt_nnn /= 16;
313  assert(cnt_nnn % 6 == 0);
314  fprintf(stderr, "# of next nearest neighbors = %d\n", cnt_nnn);
315  assert(cnt_3nn % 16 == 0);
316  cnt_3nn /= 16;
317  assert(cnt_3nn % 6 == 0);
318  fprintf(stderr, "# of 3rd nearest neighbors = %d\n", cnt_3nn);
319  d_nn /= 16;
320  d_nn /= cnt_nn;
321  fprintf(stderr, "D_NN = %g [K]\n", d_nn);
322 
323  int rec_size = (2*cutoff_rec + 1)*(2*cutoff_rec + 1)*(cutoff_rec + 1);
324  for(int site1 = 0; site1 < 16; site1++) {
325  Vector3<double> d1(s_ASiteIsingVector[site1]);
326 
327  for(int site2 = 0; site2 < 16; site2++) {
328  s_fields_rec[site1][site2].clear();
329  s_fields_rec[site1][site2].resize(rec_size);
330 
331  for(int kz = 0; kz <= cutoff_rec; kz++) {
332  for(int ky = -cutoff_rec; ky <= cutoff_rec; ky++) {
333  for(int kx = -cutoff_rec; kx <= cutoff_rec; kx++) {
334  VectorInt v(kx, ky, kz);
335  Vector3<double> hdip;
336  if(!dipoleFieldRec(v, site2, &hdip))
337  continue;
338  int ridx = reciprocal_index(kx,ky,kz);
339  s_fields_rec[site1][site2][ridx] = hdip.innerProduct(d1);
340  }
341  }
342  }
343  }
344  }
345  for(int site2 = 0; site2 < 16; site2++) {
346  s_fields_rec_generic[site2].clear();
347  s_fields_rec_generic[site2].resize(rec_size);
348 
349  for(int kz = 0; kz <= cutoff_rec; kz++) {
350  for(int ky = -cutoff_rec; ky <= cutoff_rec; ky++) {
351  for(int kx = -cutoff_rec; kx <= cutoff_rec; kx++) {
352  VectorInt v(kx, ky, kz);
353  Vector3<double> hdip;
354  if(!dipoleFieldRec(v, site2, &hdip))
355  continue;
356  int ridx = reciprocal_index(kx,ky,kz);
357  s_fields_rec_generic[site2][ridx] = hdip;
358  }
359  }
360  }
361  }
362  // For self-energy correction.
363  s_fields_rec_sum = accumulate(s_fields_rec[0][0].begin(), s_fields_rec[0][0].end(), 0.0);
364 
365  for(int site1 = 0; site1 < 16; site1++) {
366  s_exp_ph[site1].clear();
367  if(rec_size*s_num_spins*sizeof(double) < 500000000) {
368  //create deep cache.
369  s_exp_ph[site1].resize(rec_size * s_num_spins / 16);
370 
371  Vector3<double> pos1(cg_ASitePositions[site1]);
372  pos1 *= LATTICE_CONST / 4.0;
373  for(int k = 0; k < s_L; k++) {
374  for(int j = 0; j < s_L; j++) {
375  for(int i = 0; i < s_L; i++) {
376  int lidx = lattice_index(i,j,k);
377  double phx = 2*M_PI / (LATTICE_CONST * s_L) * (i * LATTICE_CONST + pos1.x);
378  double phy = 2*M_PI / (LATTICE_CONST * s_L) * (j * LATTICE_CONST + pos1.y);
379  double phz = 2*M_PI / (LATTICE_CONST * s_L) * (k * LATTICE_CONST + pos1.z);
380  complex<double> exp_i_rx = exp(complex<double>(0.0, phx));
381  complex<double> exp_i_ry = exp(complex<double>(0.0, phy));
382  complex<double> exp_i_rz = exp(complex<double>(0.0, phz));
383 
384  complex<double> exp_ikrz = exp(complex<double>(0.0, -cutoff_rec * (phx + phy)));
385  for(int kz = 0; kz <= cutoff_rec; kz++) {
386  complex<double> exp_ikryz = exp_ikrz;
387  for(int ky = -cutoff_rec; ky <= cutoff_rec; ky++) {
388  complex<double> exp_ikr = exp_ikryz;
389  for(int kx = -cutoff_rec; kx <= cutoff_rec; kx++) {
390  int ridx = reciprocal_index(kx,ky,kz);
391  s_exp_ph[site1][lidx * rec_size + ridx] = exp_ikr;
392  exp_ikr *= exp_i_rx;
393  }
394  exp_ikryz *= exp_i_ry;
395  }
396  exp_ikrz *= exp_i_rz;
397  }
398  assert(abs(complex<double>(s_exp_ph[site1][(lidx+1) * rec_size-1])
399  /exp(complex<double>(0.0, (cutoff_rec) * (phx + phy + phz)))
400  - complex<double>(1.0,0)) < 1e-6);
401  }
402  }
403  }
404  }
405  }
406 
407  return cnt_n;
408 }
409 
410 inline double
411 MonteCarlo::iterate_real_redirected(int cnt, const FieldRealArray &array, int i, int j, int k, int site2)
412 {
413  int cutoff = s_cutoff_real;
414  assert(cnt == cutoff*2 + 1);
415 #ifdef PACK_4FLOAT
416  int l = (s_L - 1) / 4 + 1;
417  int al = spins_real_index(i - cutoff,0,0) % 4;
418  PackedSpin h;
419  PackedSpin *pspin_i = &m_spins_real[site2][spins_real_index(i - cutoff,0,0) / 4];
420  asm ("movaps %0, %%xmm3"
421  :
422  : "m" (h)
423  : "%xmm3");
424  const PackedSpin *pfield = &array.align[al][0];
425  for(int dk = -cutoff; dk <= cutoff; dk++) {
426  PackedSpin *pspin_k = pspin_i + spins_real_index(-s_L, 0, (k+s_L+dk) % s_L) / 4;
427  for(int dj = -cutoff; dj <= cutoff; dj++) {
428  PackedSpin *pspin = pspin_k + spins_real_index(-s_L, (j+s_L+dj) % s_L, 0) / 4;
429 // assert(pspin == &m_spins_real[site2][spins_real_index(i - cutoff,(j+s_L+dj) % s_L,(k+s_L+dk) % s_L)]);
430  for(int n = 0; n < (cnt + 2) / 4 + 1; n++) {
431  asm ("movaps %0, %%xmm1;"
432  "movaps %1, %%xmm2;"
433  "mulps %%xmm1, %%xmm2;"
434  "addps %%xmm2, %%xmm3"
435  :
436  : "m" (pspin[n]), "m" (pfield[n])
437  : "%xmm1", "%xmm2", "%xmm3");
438 // PackedSpin x(pspin[n]);
439 // x *= pfield[n];
440 // h += x;
441  }
442  pfield+=(cnt + 2) / 4 + 1;
443 // assert(pspin == &m_spins_real[site2][spins_real_index(i + cutoff + 1,(j+s_L+dj) % s_L,(k+s_L+dk) % s_L)]);
444  }
445  }
446  asm ("movaps %%xmm3, %0"
447  : "=m" (h)
448  :
449  : "%xmm3");
450  assert(pfield == &*array.align[al].end());
451  return h.sum();
452 #else
453  Spin h = 0.0;
454  // note that spin images are repeated outside boundary along x.
455  Spin *pspin_i = &m_spins_real[site2][spins_real_index(i - cutoff, 0, 0)];
456  const Spin *pfield = &array[0];
457  for(int dk = -cutoff; dk <= cutoff; dk++) {
458  Spin *pspin_k = pspin_i + spins_real_index(-s_L, 0, (k+s_L+dk) % s_L);
459  for(int dj = -cutoff; dj <= cutoff; dj++) {
460  Spin *pspin = pspin_k + spins_real_index(-s_L, (j+s_L+dj) % s_L, 0);
461 // assert(pspin == &m_spins_real[site2][spins_real_index(i - cutoff,(j+s_L+dj) % s_L,(k+s_L+dk) % s_L)]);
462  for(int n = 0; n < cnt; n++) {
463  h += pfield[n] * pspin[n];
464  }
465  pfield+=cnt;
466 // assert(pspin == &m_spins_real[site2][spins_real_index(i + cutoff + 1,(j+s_L+dj) % s_L,(k+s_L+dk) % s_L)]);
467  }
468  }
469  assert(pfield == &*array.end());
470  return h;
471 #endif
472 }
473 
475 MonteCarlo::iterate_real_generic(const FieldRealArray array[16][3], int i, int j, int k)
476 {
477  int cutoff = s_cutoff_real;
478  int cnt = 2*cutoff + 1;
479  Vector3<double> hall;
480  for(int site2 = 0; site2 < 16; site2++) {
481  hall.x += iterate_real_redirected(cnt, array[site2][0], i, j, k, site2);
482  hall.y += iterate_real_redirected(cnt, array[site2][1], i, j, k, site2);
483  hall.z += iterate_real_redirected(cnt, array[site2][2], i, j, k, site2);
484  }
485  return hall;
486 }
487 
488 double
489 MonteCarlo::iterate_real(int site1, int i, int j, int k, int site2)
490 {
491  int cnt = 2 * s_cutoff_real + 1;
492  // Trick to use constant count. use -funroll-loops. to enhance optimization.
493  switch(cnt) {
494  case 3:
495  return iterate_real_redirected(3, s_fields_real[site1][site2], i, j, k, site2);
496  case 5:
497  return iterate_real_redirected(5, s_fields_real[site1][site2], i, j, k, site2);
498  case 7:
499  return iterate_real_redirected(7, s_fields_real[site1][site2], i, j, k, site2);
500  case 9:
501  return iterate_real_redirected(9, s_fields_real[site1][site2], i, j, k, site2);
502  case 11:
503  return iterate_real_redirected(11, s_fields_real[site1][site2], i, j, k, site2);
504  default:
505  return iterate_real_redirected(cnt, s_fields_real[site1][site2], i, j, k, site2);
506  }
507 }
509 MonteCarlo::iterate_rec_generic(Vector3<double> pos1, int i, int j, int k)
510 {
511  Vector3<double> h;
512  for(int site2 = 0; site2 < 16; site2++) {
513  h += iterate_rec_generic(pos1, i, j, k, site2);
514  }
515  return h;
516 }
518 MonteCarlo::iterate_rec_generic(Vector3<double> pos1, int i, int j, int k, int site2)
519 {
520  int cutoff = s_cutoff_rec;
521  int cnt = 2*cutoff+1;
522  pos1 *= LATTICE_CONST;
523  complex<Spin> *pspin = &m_spins_rec[site2][0];
524  const Vector3<Spin> *pfield = &s_fields_rec_generic[site2][0];
525 
526  double phx = 2*M_PI / (LATTICE_CONST * s_L) * (i * LATTICE_CONST + pos1.x);
527  double phy = 2*M_PI / (LATTICE_CONST * s_L) * (j * LATTICE_CONST + pos1.y);
528  double phz = 2*M_PI / (LATTICE_CONST * s_L) * (k * LATTICE_CONST + pos1.z);
529  complex<Spin> exp_i_rx = exp(complex<Spin>(0.0, phx));
530  complex<Spin> exp_i_ry = exp(complex<Spin>(0.0, phy));
531  complex<Spin> exp_i_rz = exp(complex<Spin>(0.0, phz));
532  Vector3<Spin> h;
533  complex<Spin> exp_ikrz = exp(complex<Spin>(0.0, -cutoff * (phx + phy)));
534  for(int kz = 0; kz <= cutoff; kz++) {
535  complex<Spin> exp_ikryz = exp_ikrz;
536  for(int ky = -cutoff; ky <= cutoff; ky++) {
537  complex<Spin> exp_ikr = exp_ikryz;
538  for(int n = 0; n < cnt; n++) {
539  Vector3<Spin> e(pfield[n]);
540  e *= real(pspin[n] * exp_ikr);
541  h += e;
542  exp_ikr *= exp_i_rx;
543  }
544  pspin+=cnt;
545  pfield+=cnt;
546  exp_ikryz *= exp_i_ry;
547  }
548  exp_ikrz *= exp_i_rz;
549  }
550  return h;
551 }
552 inline double
553 MonteCarlo::iterate_rec_redirected(int cutoff, int site1, int i, int j, int k, int site2)
554 {
555  int cnt = 2*cutoff+1;
556  Spin h = 0.0;
557  complex<Spin> *pspin = &m_spins_rec[site2][0];
558  Spin *pfield = &s_fields_rec[site1][site2][0];
559 
560  if(s_exp_ph[site1].size()) {
561  complex<Spin> *pexp_ph =
562  &s_exp_ph[site1][lattice_index(i,j,k) * m_spins_rec[site2].size()];
563  for(int m = 0; m < (cutoff+1)*(2*cutoff+1); m++) {
564  for(int n = 0; n < cnt; n++) {
565  h += pfield[n] * real(pspin[n] * pexp_ph[n]);
566 // assert(pfield == &s_fields_rec[site1][site2][reciprocal_index(kx,ky,kz)]);
567  }
568  pspin+=cnt;
569  pfield+=cnt;
570  pexp_ph+=cnt;
571  }
572  }
573  else {
574  Vector3<double> pos1(cg_ASitePositions[site1]);
575  pos1 *= 1.0/4.0;
576  h = iterate_rec_generic(pos1, i, j, k, site2)
577  .innerProduct(s_ASiteIsingVector[site1]);
578  }
579  // subtract self-energy.
580  if(site1 == site2) {
581  Spin spin_self = readSpin(site1, spins_real_index(i,j,k));
582  h -= s_fields_rec_sum * spin_self;
583  }
584 // assert(pspin == &*m_spins_rec[site2].end());
585 // assert(pfield == &*s_fields_rec[site1][site2].end());
586  return h;
587 }
588 double
589 MonteCarlo::iterate_rec(int site1, int i, int j, int k, int site2)
590 {
591  int cutoff = s_cutoff_rec;
592  // Trick to use constant count. use -funroll-loops. to enhance optimization.
593  switch(cutoff) {
594  case 2:
595  return iterate_rec_redirected(2, site1, i, j, k, site2);
596  case 3:
597  return iterate_rec_redirected(3, site1, i, j, k, site2);
598  case 4:
599  return iterate_rec_redirected(4, site1, i, j, k, site2);
600  case 5:
601  return iterate_rec_redirected(5, site1, i, j, k, site2);
602  case 6:
603  return iterate_rec_redirected(6, site1, i, j, k, site2);
604  case 7:
605  return iterate_rec_redirected(7, site1, i, j, k, site2);
606  default:
607  return iterate_rec_redirected(cutoff, site1, i, j, k, site2);
608  }
609 }
610 
611 
612 // Thread pool.
613 void
614 MonteCarlo::execute()
615 {
616  for(;;) {
617  int left = m_hint_site2_left;
618  if(left <= 1) {
619  XScopedLock<XCondition> lock(m_thread_pool_cond);
620  if(m_bTerminated)
621  break;
622  m_thread_pool_cond.wait();
623  continue;
624  }
625  if(!m_hint_site2_left.compare_set_strong(left, left - 1))
626  continue;
627 
628  int site2 = m_hint_sec_cache_miss[left - 1];
629  assert(site2 < 16);
630 
631  readBarrier();
632  m_hint_fields[site2] = iterate_interactions(
633  m_hint_spin1_site, m_hint_spin1_lattice_index, site2);
634  memoryBarrier();
635  if(m_hint_site2_not_done.decAndTest()) {
636  XScopedLock<XCondition> lock(m_hint_site2_last_cond);
637  m_hint_site2_last_cond.signal();
638  }
639  }
640 }
641 
642 double
643 MonteCarlo::hinteraction_miscache(int sec_cache_miss_cnt, int site1, int lidx)
644 {
645  double h = 0.0;
646 
647 // threaded summation.
648 
649  m_hint_spin1_site = site1;
650  m_hint_spin1_lattice_index = lidx;
651 // memoryBarrier();
652  m_hint_site2_not_done = sec_cache_miss_cnt;
653  // this is trigger.
654  m_hint_site2_left = sec_cache_miss_cnt;
655  {
656 // XScopedLock<XCondition> lock(m_thread_pool_cond);
657  m_thread_pool_cond.broadcast();
658  }
659 
660  for(;;) {
661  int left = m_hint_site2_left;
662  if(left == 0) {
663  XScopedLock<XCondition> lock(m_hint_site2_last_cond);
664  while(m_hint_site2_not_done > 0) {
665  m_hint_site2_last_cond.wait();
666  }
667  break;
668  }
669  if(!m_hint_site2_left.compare_set_strong(left, left - 1))
670  continue;
671 
672  int site2 = m_hint_sec_cache_miss[left - 1];
673  assert(site2 < 16);
674 
675  m_hint_fields[site2] = iterate_interactions(site1, lidx, site2);
676  if(m_hint_site2_not_done.decAndTest()) {
677  readBarrier();
678  break;
679  }
680  }
681 
682  for(int miss = 0; miss < sec_cache_miss_cnt; miss++) {
683  int site2 = m_hint_sec_cache_miss[miss];
684  h += m_hint_fields[site2];
685  }
686  return h;
687 }
688 

Generated for KAME4 by  doxygen 1.8.3