]> www.hackdaworld.org Git - physik/posic.git/blob - potentials/albe_fast.c
first attempts: parallel, albe_fast. fpu_ctrl. cleaned first j loop.
[physik/posic.git] / potentials / albe_fast.c
1 /*
2  * test: albe_new.c
3  *
4  * author: Frank Zirkelbach <frank.zirkelbach@physik.uni-augsburg.de>
5  *
6  */
7
8 #define _GNU_SOURCE
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <sys/types.h>
13 #include <sys/stat.h>
14 #include <fcntl.h>
15 #include <unistd.h>
16 #include <sys/time.h>
17 #include <time.h>
18 #include <math.h>
19
20 #ifdef PARALLEL
21 #include <omp.h>
22 #endif
23
24 #include "../moldyn.h"
25 #include "../math/math.h"
26 #include "albe.h"
27
28 /*
29  * virial calculation
30  */
31
32 #define albe_v_calc(a,f,d)      a->virial.xx+=f->x*d->x; \
33                                 a->virial.yy+=f->y*d->y; \
34                                 a->virial.zz+=f->z*d->z; \
35                                 a->virial.xy+=f->x*d->y; \
36                                 a->virial.xz+=f->x*d->z; \
37                                 a->virial.yz+=f->y*d->z
38
39 int albe_potential_force_calc(t_moldyn *moldyn) {
40
41         int i,j,k,count;
42         t_atom *itom,*jtom,*ktom;
43         t_virial *virial;
44         t_linkcell *lc;
45 #ifdef STATIC_LISTS
46         int *neighbour_i[27];
47         int p,q;
48         t_atom *atom;
49 #else
50         t_list neighbour_i[27];
51         t_list neighbour_i2[27];
52         t_list *this,*that;
53 #endif
54         u8 bc_ij,bc_ik;
55         int dnlc;
56
57         // needed to work
58         t_atom *ai;
59
60         // optimized
61         t_albe_mult_params *params;
62         t_albe_exchange *exchange;
63         t_3dvec dist_ij;
64         double d_ij2;
65         double d_ij;
66         u8 brand_i;
67         double S2;
68         int kcount;
69         double zeta_ij;
70         double pre_dzeta;
71
72         // more ...
73         double Rk,Sk,Sk2;
74         t_3dvec dist_ik;
75         double d_ik2,d_ik;
76         double cos_theta,h_cos,d2_h_cos2,frac,g,dg,s_r,arg;
77         double f_c_ik,df_c_ik;
78
79         t_3dvec force;
80         double f_a,df_a,b,db,f_c,df_c;
81         double f_r,df_r;
82         double scale;
83         double mu,B;
84         double lambda,A;
85         double r0;
86         double S,R;
87         double energy;
88
89         double dijdik_inv,fcdg,dfcg;
90         t_3dvec dcosdrj,dcosdrk;
91         t_3dvec tmp;
92
93
94         count=moldyn->count;
95         itom=moldyn->atom;
96         lc=&(moldyn->lc);
97 #ifdef STATIC_LISTS
98         atom=moldyn->atom;
99 #endif
100
101         // optimized
102         params=moldyn->pot_params;
103         exchange=&(params->exchange);
104
105
106         /* reset energy */
107         moldyn->energy=0.0;
108
109         /* reset global virial */
110         memset(&(moldyn->gvir),0,sizeof(t_virial));
111
112         /* reset force, site energy and virial of every atom */
113 #ifdef PARALLEL
114         #pragma omp parallel for private(virial)
115 #endif
116         for(i=0;i<count;i++) {
117
118                 /* reset force */
119                 v3_zero(&(itom[i].f));
120
121                 /* reset virial */
122                 virial=(&(itom[i].virial));
123                 virial->xx=0.0;
124                 virial->yy=0.0;
125                 virial->zz=0.0;
126                 virial->xy=0.0;
127                 virial->xz=0.0;
128                 virial->yz=0.0;
129         
130                 /* reset site energy */
131                 itom[i].e=0.0;
132
133         }
134
135         /* get energy, force and virial of every atom */
136
137         /* first (and only) loop over atoms i */
138         for(i=0;i<count;i++) {
139
140                 if(!(itom[i].attr&ATOM_ATTR_3BP))
141                         continue;
142
143                 link_cell_neighbour_index(moldyn,
144                                           (itom[i].r.x+moldyn->dim.x/2)/lc->x,
145                                           (itom[i].r.y+moldyn->dim.y/2)/lc->y,
146                                           (itom[i].r.z+moldyn->dim.z/2)/lc->z,
147                                           neighbour_i);
148
149                 dnlc=lc->dnlc;
150
151                 /* copy the neighbour lists */
152 #ifndef STATIC_LISTS
153                 memcpy(neighbour_i2,neighbour_i,27*sizeof(t_list));
154 #endif
155
156                 ai=&(itom[i]);
157                 brand_i=ai->brand;
158
159                 /* loop over atoms j */
160                 for(j=0;j<27;j++) {
161
162                         bc_ij=(j<dnlc)?0:1;
163 #ifdef STATIC_LISTS
164                         p=0;
165
166                         while(neighbour_i[j][p]!=0) {
167
168                                 jtom=&(atom[neighbour_i[j][p]]);
169                                 p++;
170 #else
171                         this=&(neighbour_i[j]);
172                         list_reset_f(this);
173
174                         if(this->start==NULL)
175                                 continue;
176
177                         do {
178
179                                 jtom=this->current->data;
180 #endif
181
182                                 if(jtom==&(itom[i]))
183                                         continue;
184
185                                 if(!(jtom->attr&ATOM_ATTR_3BP))
186                                         continue;
187
188                                 /* reset 3bp run */
189                                 moldyn->run3bp=1;
190
191
192 /* j1 func here ... */
193 /* albe 3 body potential function (first ij loop) */
194
195         /* reset zeta sum */
196         zeta_ij=0.0;
197
198         /*
199          * set ij depending values
200          */
201
202         if(brand_i==jtom->brand) {
203                 S2=params->S2[brand_i];
204         }
205         else {
206                 S2=params->S2mixed;
207         }
208
209         /* dist_ij, d_ij2 */
210         v3_sub(&dist_ij,&(jtom->r),&(ai->r));
211         if(bc_ij) check_per_bound(moldyn,&dist_ij);
212         d_ij2=v3_absolute_square(&dist_ij);
213
214         /* if d_ij2 > S2 => no force & potential energy contribution */
215         if(d_ij2>S2)
216                 continue;
217
218         /* d_ij */
219         d_ij=sqrt(d_ij2);
220
221         /* reset k counter for first k loop */
222         kcount=0;
223
224                                 /* first loop over atoms k */
225                                 for(k=0;k<27;k++) {
226
227                                         bc_ik=(k<dnlc)?0:1;
228 #ifdef STATIC_LISTS
229                                         q=0;
230
231                                         while(neighbour_i[j][q]!=0) {
232
233                                                 ktom=&(atom[neighbour_i[k][q]]);
234                                                 q++;
235 #else
236                                         that=&(neighbour_i2[k]);
237                                         list_reset_f(that);
238                                         
239                                         if(that->start==NULL)
240                                                 continue;
241
242                                         do {
243                                                 ktom=that->current->data;
244 #endif
245
246                                                 if(!(ktom->attr&ATOM_ATTR_3BP))
247                                                         continue;
248
249                                                 if(ktom==jtom)
250                                                         continue;
251
252                                                 if(ktom==&(itom[i]))
253                                                         continue;
254
255
256 /* k1 func here ... */
257 /* albe 3 body potential function (first k loop) */
258
259         if(kcount>ALBE_MAXN) {
260                 printf("FATAL: neighbours = %d\n",kcount);
261                 printf("  -> %d %d %d\n",ai->tag,jtom->tag,ktom->tag);
262         }
263
264         /* ik constants */
265         if(brand_i==ktom->brand) {
266                 Rk=params->R[brand_i];
267                 Sk=params->S[brand_i];
268                 Sk2=params->S2[brand_i];
269                 /* albe needs i,k depending c,d,h and gamma values */
270                 exchange->gamma_i=&(params->gamma[brand_i]);
271                 exchange->c_i=&(params->c[brand_i]);
272                 exchange->d_i=&(params->d[brand_i]);
273                 exchange->h_i=&(params->h[brand_i]);
274         }
275         else {
276                 Rk=params->Rmixed;
277                 Sk=params->Smixed;
278                 Sk2=params->S2mixed;
279                 /* albe needs i,k depending c,d,h and gamma values */
280                 exchange->gamma_i=&(params->gamma_m);
281                 exchange->c_i=&(params->c_mixed);
282                 exchange->d_i=&(params->d_mixed);
283                 exchange->h_i=&(params->h_mixed);
284         }
285         exchange->ci2=*(exchange->c_i)**(exchange->c_i);
286         exchange->di2=*(exchange->d_i)**(exchange->d_i);
287         exchange->ci2di2=exchange->ci2/exchange->di2;
288
289         /* dist_ik, d_ik2 */
290         v3_sub(&dist_ik,&(ktom->r),&(ai->r));
291         if(bc_ik) check_per_bound(moldyn,&dist_ik);
292         d_ik2=v3_absolute_square(&dist_ik);
293
294         /* store data for second k loop */
295         exchange->dist_ik[kcount]=dist_ik;
296         exchange->d_ik2[kcount]=d_ik2;
297
298         /* return if not within cutoff */
299         if(d_ik2>Sk2) {
300                 kcount++;
301                 continue;
302         }
303
304         /* d_ik */
305         d_ik=sqrt(d_ik2);
306
307         /* cos theta */
308         cos_theta=v3_scalar_product(&dist_ij,&dist_ik)/(d_ij*d_ik);
309
310         /* g_ijk */
311         h_cos=*(exchange->h_i)+cos_theta; // + in albe formalism
312         d2_h_cos2=exchange->di2+(h_cos*h_cos);
313         frac=exchange->ci2/d2_h_cos2;
314         g=*(exchange->gamma_i)*(1.0+exchange->ci2di2-frac);
315         dg=2.0*frac**(exchange->gamma_i)*h_cos/d2_h_cos2; // + in albe f..
316
317         /* zeta sum += f_c_ik * g_ijk */
318         if(d_ik<=Rk) {
319                 zeta_ij+=g;
320                 f_c_ik=1.0;
321                 df_c_ik=0.0;
322         }
323         else {
324                 s_r=Sk-Rk;
325                 arg=M_PI*(d_ik-Rk)/s_r;
326                 f_c_ik=0.5+0.5*cos(arg);
327                 df_c_ik=0.5*sin(arg)*(M_PI/(s_r*d_ik));
328                 zeta_ij+=f_c_ik*g;
329         }
330
331         /* store even more data for second k loop */
332         exchange->g[kcount]=g;
333         exchange->dg[kcount]=dg;
334         exchange->d_ik[kcount]=d_ik;
335         exchange->cos_theta[kcount]=cos_theta;
336         exchange->f_c_ik[kcount]=f_c_ik;
337         exchange->df_c_ik[kcount]=df_c_ik;
338
339         /* increase k counter */
340         kcount++;
341
342 #ifdef STATIC_LISTS
343                                         }
344 #else
345                                         } while(list_next_f(that)!=\
346                                                 L_NO_NEXT_ELEMENT);
347 #endif
348
349                                 }
350
351
352 /* j2 func here ... */
353
354
355         if(brand_i==jtom->brand) {
356                 S=params->S[brand_i];
357                 R=params->R[brand_i];
358                 B=params->B[brand_i];
359                 A=params->A[brand_i];
360                 r0=params->r0[brand_i];
361                 mu=params->mu[brand_i];
362                 lambda=params->lambda[brand_i];
363         }
364         else {
365                 S=params->Smixed;
366                 R=params->Rmixed;
367                 B=params->Bmixed;
368                 A=params->Amixed;
369                 r0=params->r0_mixed;
370                 mu=params->mu_m;
371                 lambda=params->lambda_m;
372         }
373
374         /* f_c, df_c */
375         if(d_ij<R) {
376                 f_c=1.0;
377                 df_c=0.0;
378         }
379         else {
380                 s_r=S-R;
381                 arg=M_PI*(d_ij-R)/s_r;
382                 f_c=0.5+0.5*cos(arg);
383                 df_c=0.5*sin(arg)*(M_PI/(s_r*d_ij));
384         }
385
386         /* f_a, df_a */
387         f_a=-B*exp(-mu*(d_ij-r0));
388         df_a=mu*f_a/d_ij;
389
390         /* f_r, df_r */
391         f_r=A*exp(-lambda*(d_ij-r0));
392         df_r=lambda*f_r/d_ij;
393
394         /* b, db */
395         if(zeta_ij==0.0) {
396                 b=1.0;
397                 db=0.0;
398         }
399         else {
400                 b=1.0/sqrt(1.0+zeta_ij);
401                 db=-0.5*b/(1.0+zeta_ij);
402         }
403
404         /* force contribution for atom i */
405         scale=-0.5*(f_c*(df_r-b*df_a)+df_c*(f_r-b*f_a)); // - in albe formalism
406         v3_scale(&force,&(dist_ij),scale);
407         v3_add(&(ai->f),&(ai->f),&force);
408
409         /* force contribution for atom j */
410         v3_scale(&force,&force,-1.0); // dri rij = - drj rij
411         v3_add(&(jtom->f),&(jtom->f),&force);
412
413         /* virial */
414         virial_calc(ai,&force,&(dist_ij));
415
416 #ifdef DEBUG
417 if(moldyn->time>DSTART&&moldyn->time<DEND) {
418         if((ai==&(moldyn->atom[DATOM]))|(jtom==&(moldyn->atom[DATOM]))) {
419                 printf("force 3bp (j2): [%d %d sum]\n",ai->tag,jtom->tag);
420                 printf("  adding %f %f %f\n",force.x,force.y,force.z);
421                 if(ai==&(moldyn->atom[0]))
422                         printf("  total i: %f %f %f\n",ai->f.x,ai->f.y,ai->f.z);
423                 if(jtom==&(moldyn->atom[0]))
424                         printf("  total j: %f %f %f\n",jtom->f.x,jtom->f.y,jtom->f.z);
425                 printf("  energy: %f = %f %f %f %f\n",0.5*f_c*(b*f_a+f_r),
426                                                     f_c,b,f_a,f_r);
427                 printf("          %f %f %f\n",zeta_ij,.0,.0);
428         }
429 }
430 #endif
431
432         /* dzeta prefactor = - f_c f_a db, (* -0.5 due to force calc) */
433         pre_dzeta=0.5*f_a*f_c*db;
434
435         /* energy contribution */
436         energy=0.5*f_c*(f_r-b*f_a); // - in albe formalism
437         moldyn->energy+=energy;
438         ai->e+=energy;
439
440         /* reset k counter for second k loop */
441         kcount=0;
442                 
443
444                                 /* second loop over atoms k */
445                                 for(k=0;k<27;k++) {
446
447                                         bc_ik=(k<dnlc)?0:1;
448 #ifdef STATIC_LISTS
449                                         q=0;
450
451                                         while(neighbour_i[j][q]!=0) {
452
453                                                 ktom=&(atom[neighbour_i[k][q]]);
454                                                 q++;
455 #else
456                                         that=&(neighbour_i2[k]);
457                                         list_reset_f(that);
458                                         
459                                         if(that->start==NULL)
460                                                 continue;
461
462                                         do {
463                                                 ktom=that->current->data;
464 #endif
465
466                                                 if(!(ktom->attr&ATOM_ATTR_3BP))
467                                                         continue;
468
469                                                 if(ktom==jtom)
470                                                         continue;
471
472                                                 if(ktom==&(itom[i]))
473                                                         continue;
474
475
476 /* k2 func here ... */
477 /* albe 3 body potential function (second k loop) */
478
479         if(kcount>ALBE_MAXN)
480                 printf("FATAL: neighbours!\n");
481
482         /* d_ik2 */
483         d_ik2=exchange->d_ik2[kcount];
484
485         if(brand_i==ktom->brand)
486                 Sk2=params->S2[brand_i];
487         else
488                 Sk2=params->S2mixed;
489
490         /* return if d_ik > S */
491         if(d_ik2>Sk2) {
492                 kcount++;
493                 continue;
494         }
495
496         /* dist_ik, d_ik */
497         dist_ik=exchange->dist_ik[kcount];
498         d_ik=exchange->d_ik[kcount];
499
500         /* f_c_ik, df_c_ik */
501         f_c_ik=exchange->f_c_ik[kcount];
502         df_c_ik=exchange->df_c_ik[kcount];
503
504         /* g, dg, cos_theta */
505         g=exchange->g[kcount];
506         dg=exchange->dg[kcount];
507         cos_theta=exchange->cos_theta[kcount];
508
509         /* cos_theta derivatives wrt j,k */
510         dijdik_inv=1.0/(d_ij*d_ik);
511         v3_scale(&dcosdrj,&dist_ik,dijdik_inv);         // j
512         v3_scale(&tmp,&dist_ij,-cos_theta/d_ij2);
513         v3_add(&dcosdrj,&dcosdrj,&tmp);
514         v3_scale(&dcosdrk,&dist_ij,dijdik_inv);         // k
515         v3_scale(&tmp,&dist_ik,-cos_theta/d_ik2);
516         v3_add(&dcosdrk,&dcosdrk,&tmp);
517
518         /* f_c_ik * dg, df_c_ik * g */
519         fcdg=f_c_ik*dg;
520         dfcg=df_c_ik*g;
521
522         /* derivative wrt j */
523         v3_scale(&force,&dcosdrj,fcdg*pre_dzeta);
524
525         /* force contribution */
526         v3_add(&(jtom->f),&(jtom->f),&force);
527
528 #ifdef DEBUG
529 if(moldyn->time>DSTART&&moldyn->time<DEND) {
530         if(jtom==&(moldyn->atom[DATOM])) {
531                 printf("force 3bp (k2): [%d %d %d]\n",ai->tag,jtom->tag,ktom->tag);
532                 printf("  adding %f %f %f\n",force.x,force.y,force.z);
533                 printf("  total j: %f %f %f\n",jtom->f.x,jtom->f.y,jtom->f.z);
534                 printf("  angle: %f\n",acos(cos_theta)*360.0/(2*M_PI));
535                 printf("    d ij ik = %f %f\n",d_ij,d_ik);
536         }
537 }
538 #endif
539
540         /* virial */
541         virial_calc(ai,&force,&dist_ij);
542
543         /* force contribution to atom i */
544         v3_scale(&force,&force,-1.0);
545         v3_add(&(ai->f),&(ai->f),&force);
546
547         /* derivative wrt k */
548         v3_scale(&force,&dist_ik,-1.0*dfcg); // dri rik = - drk rik
549         v3_scale(&tmp,&dcosdrk,fcdg);
550         v3_add(&force,&force,&tmp);
551         v3_scale(&force,&force,pre_dzeta);
552
553         /* force contribution */
554         v3_add(&(ktom->f),&(ktom->f),&force);
555
556 #ifdef DEBUG
557 if(moldyn->time>DSTART&&moldyn->time<DEND) {
558         if(ktom==&(moldyn->atom[DATOM])) {
559                 printf("force 3bp (k2): [%d %d %d]\n",ai->tag,jtom->tag,ktom->tag);
560                 printf("  adding %f %f %f\n",force.x,force.y,force.z);
561                 printf("  total k: %f %f %f\n",ktom->f.x,ktom->f.y,ktom->f.z);
562                 printf("  angle: %f\n",acos(cos_theta)*360.0/(2*M_PI));
563                 printf("    d ij ik = %f %f\n",d_ij,d_ik);
564         }
565 }
566 #endif
567
568         /* virial */
569         virial_calc(ai,&force,&dist_ik);
570         
571         /* force contribution to atom i */
572         v3_scale(&force,&force,-1.0);
573         v3_add(&(ai->f),&(ai->f),&force);
574
575         /* increase k counter */
576         kcount++;       
577
578
579
580 #ifdef STATIC_LISTS
581                                         }
582 #else
583                                         } while(list_next_f(that)!=\
584                                                 L_NO_NEXT_ELEMENT);
585 #endif
586
587                                 }
588                                 
589 #ifdef STATIC_LISTS
590                         }
591 #else
592                         } while(list_next_f(this)!=L_NO_NEXT_ELEMENT);
593 #endif
594                 
595                 }
596                 
597 #ifdef DEBUG
598         //printf("\n\n");
599 #endif
600 #ifdef VDEBUG
601         printf("\n\n");
602 #endif
603
604         }
605
606 #ifdef DEBUG
607         //printf("\nATOM 0: %f %f %f\n\n",itom->f.x,itom->f.y,itom->f.z);
608         if(moldyn->time>DSTART&&moldyn->time<DEND) {
609                 printf("force:\n");
610                 printf("  x: %0.40f\n",moldyn->atom[DATOM].f.x);
611                 printf("  y: %0.40f\n",moldyn->atom[DATOM].f.y);
612                 printf("  z: %0.40f\n",moldyn->atom[DATOM].f.z);
613         }
614 #endif
615
616         /* some postprocessing */
617 #ifdef PARALLEL
618         #pragma omp parallel for
619 #endif
620         for(i=0;i<count;i++) {
621                 /* calculate global virial */
622                 moldyn->gvir.xx+=itom[i].r.x*itom[i].f.x;
623                 moldyn->gvir.yy+=itom[i].r.y*itom[i].f.y;
624                 moldyn->gvir.zz+=itom[i].r.z*itom[i].f.z;
625                 moldyn->gvir.xy+=itom[i].r.y*itom[i].f.x;
626                 moldyn->gvir.xz+=itom[i].r.z*itom[i].f.x;
627                 moldyn->gvir.yz+=itom[i].r.z*itom[i].f.y;
628
629                 /* check forces regarding the given timestep */
630                 if(v3_norm(&(itom[i].f))>\
631                    0.1*moldyn->nnd*itom[i].mass/moldyn->tau_square)
632                         printf("[moldyn] WARNING: pfc (high force: atom %d)\n",
633                                i);
634         }
635
636         return 0;
637 }
638