]> www.hackdaworld.org Git - physik/posic.git/blob - potentials/albe_fast.c
952f1fe41d4dee5e69c816bee96f9906d44228de
[physik/posic.git] / potentials / albe_fast.c
1 /*
2  * test: albe_new.c
3  *
4  * author: Frank Zirkelbach <frank.zirkelbach@physik.uni-augsburg.de>
5  *
6  */
7
8 #define _GNU_SOURCE
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <sys/types.h>
13 #include <sys/stat.h>
14 #include <fcntl.h>
15 #include <unistd.h>
16 #include <sys/time.h>
17 #include <time.h>
18 #include <math.h>
19
20 #ifdef PARALLEL
21 #include <omp.h>
22 #endif
23
24 #ifdef PTHREADS
25 #include <pthread.h>
26 #define MAX_THREADS 4
27 #endif
28
29 #include "../moldyn.h"
30 #include "../math/math.h"
31 #include "albe.h"
32
33 #ifdef PTHREADS
34 extern pthread_mutex_t *amutex;
35 extern pthread_mutex_t emutex;
36 #endif
37
38 /*
39  * virial calculation
40  */
41
42 #define albe_v_calc(a,f,d)      a->virial.xx+=f->x*d->x; \
43                                 a->virial.yy+=f->y*d->y; \
44                                 a->virial.zz+=f->z*d->z; \
45                                 a->virial.xy+=f->x*d->y; \
46                                 a->virial.xz+=f->x*d->z; \
47                                 a->virial.yz+=f->y*d->z
48
49 #ifndef PTHREADS
50
51 int albe_potential_force_calc(t_moldyn *moldyn) {
52
53         int i,j,k,count;
54         t_atom *itom,*jtom,*ktom;
55         t_virial *virial;
56         t_linkcell *lc;
57 #ifdef STATIC_LISTS
58         int *neighbour_i[27];
59         int p,q;
60 #elif LOWMEM_LISTS
61         int neighbour_i[27];
62         int p,q;
63 #else
64         t_list neighbour_i[27];
65         t_list neighbour_i2[27];
66         t_list *this,*that;
67 #endif
68         u8 bc_ij,bc_ik;
69         int dnlc;
70 #ifdef PTHREADS
71         int ret;
72         t_kdata kdata[27];
73         pthread_t kthread[27];
74 #endif
75
76         // needed to work
77         t_atom *ai;
78
79         // optimized
80         t_albe_mult_params *params;
81         t_albe_exchange *exchange;
82         t_3dvec dist_ij;
83         double d_ij2;
84         double d_ij;
85         u8 brand_i;
86         double S2;
87         int kcount;
88         double zeta_ij;
89         double pre_dzeta;
90
91         // more ...
92         double Rk,Sk,Sk2;
93         t_3dvec dist_ik;
94         double d_ik2,d_ik;
95         double cos_theta,h_cos,d2_h_cos2,frac,g,dg,s_r,arg;
96         double f_c_ik,df_c_ik;
97
98         t_3dvec force;
99         double f_a,df_a,b,db,f_c,df_c;
100         double f_r,df_r;
101         double scale;
102         double mu,B;
103         double lambda,A;
104         double r0;
105         double S,R;
106         double energy;
107
108         double dijdik_inv,fcdg,dfcg;
109         t_3dvec dcosdrj,dcosdrk;
110         t_3dvec tmp;
111
112         // even more ...
113         double gamma_i;
114         double c_i;
115         double d_i;
116         double h_i;
117         double ci2;
118         double di2;
119         double ci2di2;
120
121         count=moldyn->count;
122         itom=moldyn->atom;
123         lc=&(moldyn->lc);
124
125         // optimized
126         params=moldyn->pot_params;
127         exchange=&(params->exchange);
128
129
130         /* reset energy */
131         moldyn->energy=0.0;
132
133         /* reset global virial */
134         memset(&(moldyn->gvir),0,sizeof(t_virial));
135
136         /* reset force, site energy and virial of every atom */
137 #ifdef PARALLEL
138         #pragma omp parallel for private(virial)
139 #endif
140         for(i=0;i<count;i++) {
141
142                 /* reset force */
143                 v3_zero(&(itom[i].f));
144
145                 /* reset virial */
146                 virial=(&(itom[i].virial));
147                 virial->xx=0.0;
148                 virial->yy=0.0;
149                 virial->zz=0.0;
150                 virial->xy=0.0;
151                 virial->xz=0.0;
152                 virial->yz=0.0;
153         
154                 /* reset site energy */
155                 itom[i].e=0.0;
156
157         }
158
159         /* get energy, force and virial of every atom */
160
161         /* first (and only) loop over atoms i */
162         for(i=0;i<count;i++) {
163
164                 if(!(itom[i].attr&ATOM_ATTR_3BP))
165                         continue;
166
167                 link_cell_neighbour_index(moldyn,
168                                           (itom[i].r.x+moldyn->dim.x/2)/lc->x,
169                                           (itom[i].r.y+moldyn->dim.y/2)/lc->y,
170                                           (itom[i].r.z+moldyn->dim.z/2)/lc->z,
171                                           neighbour_i);
172
173                 dnlc=lc->dnlc;
174
175                 /* copy the neighbour lists */
176 #ifdef STATIC_LISTS
177 #elif LOWMEM_LISTS
178 #else
179                 memcpy(neighbour_i2,neighbour_i,27*sizeof(t_list));
180 #endif
181
182                 ai=&(itom[i]);
183                 brand_i=ai->brand;
184
185                 /* loop over atoms j */
186                 for(j=0;j<27;j++) {
187
188                         bc_ij=(j<dnlc)?0:1;
189 #ifdef STATIC_LISTS
190                         p=0;
191
192                         while(neighbour_i[j][p]!=-1) {
193
194                                 jtom=&(itom[neighbour_i[j][p]]);
195                                 p++;
196 #elif LOWMEM_LISTS
197                         p=neighbour_i[j];
198
199                         while(p!=-1) {
200
201                                 jtom=&(itom[p]);
202                                 p=lc->subcell->list[p];
203 #else
204                         this=&(neighbour_i[j]);
205                         list_reset_f(this);
206
207                         if(this->start==NULL)
208                                 continue;
209
210                         do {
211
212                                 jtom=this->current->data;
213 #endif
214
215                                 if(jtom==&(itom[i]))
216                                         continue;
217
218                                 if(!(jtom->attr&ATOM_ATTR_3BP))
219                                         continue;
220
221                                 /* reset 3bp run */
222                                 moldyn->run3bp=1;
223
224
225 /* j1 func here ... */
226 /* albe 3 body potential function (first ij loop) */
227
228         /* reset zeta sum */
229         zeta_ij=0.0;
230
231         /*
232          * set ij depending values
233          */
234
235         if(brand_i==jtom->brand) {
236                 S2=params->S2[brand_i];
237         }
238         else {
239                 S2=params->S2mixed;
240         }
241
242         /* dist_ij, d_ij2 */
243         v3_sub(&dist_ij,&(jtom->r),&(ai->r));
244         if(bc_ij) check_per_bound(moldyn,&dist_ij);
245         d_ij2=v3_absolute_square(&dist_ij);
246
247         /* if d_ij2 > S2 => no force & potential energy contribution */
248         if(d_ij2>S2)
249                 continue;
250
251         /* d_ij */
252         d_ij=sqrt(d_ij2);
253
254         /* reset k counter for first k loop */
255         kcount=0;
256
257                                 /* first loop over atoms k */
258                                 for(k=0;k<27;k++) {
259
260                                         bc_ik=(k<dnlc)?0:1;
261 #ifdef STATIC_LISTS
262                                         q=0;
263
264                                         while(neighbour_i[k][q]!=-1) {
265
266                                                 ktom=&(itom[neighbour_i[k][q]]);
267                                                 q++;
268 #elif LOWMEM_LISTS
269                                         q=neighbour_i[k];
270
271                                         while(q!=-1) {
272
273                                                 ktom=&(itom[q]);
274                                                 q=lc->subcell->list[q];
275 #else
276                                         that=&(neighbour_i2[k]);
277                                         list_reset_f(that);
278                                         
279                                         if(that->start==NULL)
280                                                 continue;
281
282                                         do {
283                                                 ktom=that->current->data;
284 #endif
285
286                                                 if(!(ktom->attr&ATOM_ATTR_3BP))
287                                                         continue;
288
289                                                 if(ktom==jtom)
290                                                         continue;
291
292                                                 if(ktom==&(itom[i]))
293                                                         continue;
294
295 /* k1 func here ... */
296 /* albe 3 body potential function (first k loop) */
297
298         if(kcount>ALBE_MAXN) {
299                 printf("FATAL: neighbours = %d\n",kcount);
300                 printf("  -> %d %d %d\n",ai->tag,jtom->tag,ktom->tag);
301         }
302
303         /* ik constants */
304         if(brand_i==ktom->brand) {
305                 Rk=params->R[brand_i];
306                 Sk=params->S[brand_i];
307                 Sk2=params->S2[brand_i];
308                 /* albe needs i,k depending c,d,h and gamma values */
309                 gamma_i=params->gamma[brand_i];
310                 c_i=params->c[brand_i];
311                 d_i=params->d[brand_i];
312                 h_i=params->h[brand_i];
313                 ci2=params->c2[brand_i];
314                 di2=params->d2[brand_i];
315                 ci2di2=params->c2d2[brand_i];
316         }
317         else {
318                 Rk=params->Rmixed;
319                 Sk=params->Smixed;
320                 Sk2=params->S2mixed;
321                 /* albe needs i,k depending c,d,h and gamma values */
322                 gamma_i=params->gamma_m;
323                 c_i=params->c_mixed;
324                 d_i=params->d_mixed;
325                 h_i=params->h_mixed;
326                 ci2=params->c2_mixed;
327                 di2=params->d2_mixed;
328                 ci2di2=params->c2d2_m;
329         }
330
331         /* dist_ik, d_ik2 */
332         v3_sub(&dist_ik,&(ktom->r),&(ai->r));
333         if(bc_ik) check_per_bound(moldyn,&dist_ik);
334         d_ik2=v3_absolute_square(&dist_ik);
335
336         /* store data for second k loop */
337         exchange->dist_ik[kcount]=dist_ik;
338         exchange->d_ik2[kcount]=d_ik2;
339
340         /* return if not within cutoff */
341         if(d_ik2>Sk2) {
342                 kcount++;
343                 continue;
344         }
345
346         /* d_ik */
347         d_ik=sqrt(d_ik2);
348
349         /* cos theta */
350         cos_theta=v3_scalar_product(&dist_ij,&dist_ik)/(d_ij*d_ik);
351
352         /* g_ijk 
353         h_cos=*(exchange->h_i)+cos_theta; // + in albe formalism
354         d2_h_cos2=exchange->di2+(h_cos*h_cos);
355         frac=exchange->ci2/d2_h_cos2;
356         g=*(exchange->gamma_i)*(1.0+exchange->ci2di2-frac);
357         dg=2.0*frac**(exchange->gamma_i)*h_cos/d2_h_cos2; // + in albe f..
358         */
359
360         h_cos=h_i+cos_theta; // + in albe formalism
361         d2_h_cos2=di2+(h_cos*h_cos);
362         frac=ci2/d2_h_cos2;
363         g=gamma_i*(1.0+ci2di2-frac);
364         dg=2.0*frac*gamma_i*h_cos/d2_h_cos2; // + in albe f..
365
366         /* zeta sum += f_c_ik * g_ijk */
367         if(d_ik<=Rk) {
368                 zeta_ij+=g;
369                 f_c_ik=1.0;
370                 df_c_ik=0.0;
371         }
372         else {
373                 s_r=Sk-Rk;
374                 arg=M_PI*(d_ik-Rk)/s_r;
375                 f_c_ik=0.5+0.5*cos(arg);
376                 df_c_ik=0.5*sin(arg)*(M_PI/(s_r*d_ik));
377                 zeta_ij+=f_c_ik*g;
378         }
379
380         /* store even more data for second k loop */
381         exchange->g[kcount]=g;
382         exchange->dg[kcount]=dg;
383         exchange->d_ik[kcount]=d_ik;
384         exchange->cos_theta[kcount]=cos_theta;
385         exchange->f_c_ik[kcount]=f_c_ik;
386         exchange->df_c_ik[kcount]=df_c_ik;
387
388         /* increase k counter */
389         kcount++;
390
391 #ifdef STATIC_LISTS
392                                         }
393 #elif LOWMEM_LISTS
394                                         }
395 #else
396                                         } while(list_next_f(that)!=\
397                                                 L_NO_NEXT_ELEMENT);
398 #endif
399
400                                 }
401
402 /* j2 func here ... */
403
404
405         if(brand_i==jtom->brand) {
406                 S=params->S[brand_i];
407                 R=params->R[brand_i];
408                 B=params->B[brand_i];
409                 A=params->A[brand_i];
410                 r0=params->r0[brand_i];
411                 mu=params->mu[brand_i];
412                 lambda=params->lambda[brand_i];
413         }
414         else {
415                 S=params->Smixed;
416                 R=params->Rmixed;
417                 B=params->Bmixed;
418                 A=params->Amixed;
419                 r0=params->r0_mixed;
420                 mu=params->mu_m;
421                 lambda=params->lambda_m;
422         }
423
424         /* f_c, df_c */
425         if(d_ij<R) {
426                 f_c=1.0;
427                 df_c=0.0;
428         }
429         else {
430                 s_r=S-R;
431                 arg=M_PI*(d_ij-R)/s_r;
432                 f_c=0.5+0.5*cos(arg);
433                 df_c=0.5*sin(arg)*(M_PI/(s_r*d_ij));
434         }
435
436         /* f_a, df_a */
437         f_a=-B*exp(-mu*(d_ij-r0));
438         df_a=mu*f_a/d_ij;
439
440         /* f_r, df_r */
441         f_r=A*exp(-lambda*(d_ij-r0));
442         df_r=lambda*f_r/d_ij;
443
444         /* b, db */
445         if(zeta_ij==0.0) {
446                 b=1.0;
447                 db=0.0;
448         }
449         else {
450                 b=1.0/sqrt(1.0+zeta_ij);
451                 db=-0.5*b/(1.0+zeta_ij);
452         }
453
454         /* force contribution for atom i */
455         scale=-0.5*(f_c*(df_r-b*df_a)+df_c*(f_r-b*f_a)); // - in albe formalism
456         v3_scale(&force,&(dist_ij),scale);
457         v3_add(&(ai->f),&(ai->f),&force);
458
459         /* force contribution for atom j */
460         v3_scale(&force,&force,-1.0); // dri rij = - drj rij
461         v3_add(&(jtom->f),&(jtom->f),&force);
462
463         /* virial */
464         virial_calc(ai,&force,&(dist_ij));
465
466 #ifdef DEBUG
467 if(moldyn->time>DSTART&&moldyn->time<DEND) {
468         if((ai==&(moldyn->atom[DATOM]))|(jtom==&(moldyn->atom[DATOM]))) {
469                 printf("force 3bp (j2): [%d %d sum]\n",ai->tag,jtom->tag);
470                 printf("  adding %f %f %f\n",force.x,force.y,force.z);
471                 if(ai==&(moldyn->atom[0]))
472                         printf("  total i: %f %f %f\n",ai->f.x,ai->f.y,ai->f.z);
473                 if(jtom==&(moldyn->atom[0]))
474                         printf("  total j: %f %f %f\n",jtom->f.x,jtom->f.y,jtom->f.z);
475                 printf("  energy: %f = %f %f %f %f\n",0.5*f_c*(b*f_a+f_r),
476                                                     f_c,b,f_a,f_r);
477                 printf("          %f %f %f\n",zeta_ij,.0,.0);
478         }
479 }
480 #endif
481
482         /* dzeta prefactor = - f_c f_a db, (* -0.5 due to force calc) */
483         pre_dzeta=0.5*f_a*f_c*db;
484
485         /* energy contribution */
486         energy=0.5*f_c*(f_r-b*f_a); // - in albe formalism
487         moldyn->energy+=energy;
488         ai->e+=energy;
489
490         /* reset k counter for second k loop */
491         kcount=0;
492                 
493
494                                 /* second loop over atoms k */
495                                 for(k=0;k<27;k++) {
496
497                                         bc_ik=(k<dnlc)?0:1;
498 #ifdef STATIC_LISTS
499                                         q=0;
500
501                                         while(neighbour_i[k][q]!=-1) {
502
503                                                 ktom=&(itom[neighbour_i[k][q]]);
504                                                 q++;
505 #elif LOWMEM_LISTS
506                                         q=neighbour_i[k];
507
508                                         while(q!=-1) {
509
510                                                 ktom=&(itom[q]);
511                                                 q=lc->subcell->list[q];
512 #else
513                                         that=&(neighbour_i2[k]);
514                                         list_reset_f(that);
515                                         
516                                         if(that->start==NULL)
517                                                 continue;
518
519                                         do {
520                                                 ktom=that->current->data;
521 #endif
522
523                                                 if(!(ktom->attr&ATOM_ATTR_3BP))
524                                                         continue;
525
526                                                 if(ktom==jtom)
527                                                         continue;
528
529                                                 if(ktom==&(itom[i]))
530                                                         continue;
531
532
533 /* k2 func here ... */
534 /* albe 3 body potential function (second k loop) */
535
536         if(kcount>ALBE_MAXN)
537                 printf("FATAL: neighbours!\n");
538
539         /* d_ik2 */
540         d_ik2=exchange->d_ik2[kcount];
541
542         if(brand_i==ktom->brand)
543                 Sk2=params->S2[brand_i];
544         else
545                 Sk2=params->S2mixed;
546
547         /* return if d_ik > S */
548         if(d_ik2>Sk2) {
549                 kcount++;
550                 continue;
551         }
552
553         /* dist_ik, d_ik */
554         dist_ik=exchange->dist_ik[kcount];
555         d_ik=exchange->d_ik[kcount];
556
557         /* f_c_ik, df_c_ik */
558         f_c_ik=exchange->f_c_ik[kcount];
559         df_c_ik=exchange->df_c_ik[kcount];
560
561         /* g, dg, cos_theta */
562         g=exchange->g[kcount];
563         dg=exchange->dg[kcount];
564         cos_theta=exchange->cos_theta[kcount];
565
566         /* cos_theta derivatives wrt j,k */
567         dijdik_inv=1.0/(d_ij*d_ik);
568         v3_scale(&dcosdrj,&dist_ik,dijdik_inv);         // j
569         v3_scale(&tmp,&dist_ij,-cos_theta/d_ij2);
570         v3_add(&dcosdrj,&dcosdrj,&tmp);
571         v3_scale(&dcosdrk,&dist_ij,dijdik_inv);         // k
572         v3_scale(&tmp,&dist_ik,-cos_theta/d_ik2);
573         v3_add(&dcosdrk,&dcosdrk,&tmp);
574
575         /* f_c_ik * dg, df_c_ik * g */
576         fcdg=f_c_ik*dg;
577         dfcg=df_c_ik*g;
578
579         /* derivative wrt j */
580         v3_scale(&force,&dcosdrj,fcdg*pre_dzeta);
581
582         /* force contribution */
583         v3_add(&(jtom->f),&(jtom->f),&force);
584
585 #ifdef DEBUG
586 if(moldyn->time>DSTART&&moldyn->time<DEND) {
587         if(jtom==&(moldyn->atom[DATOM])) {
588                 printf("force 3bp (k2): [%d %d %d]\n",ai->tag,jtom->tag,ktom->tag);
589                 printf("  adding %f %f %f\n",force.x,force.y,force.z);
590                 printf("  total j: %f %f %f\n",jtom->f.x,jtom->f.y,jtom->f.z);
591                 printf("  angle: %f\n",acos(cos_theta)*360.0/(2*M_PI));
592                 printf("    d ij ik = %f %f\n",d_ij,d_ik);
593         }
594 }
595 #endif
596
597         /* virial */
598         virial_calc(ai,&force,&dist_ij);
599
600         /* force contribution to atom i */
601         v3_scale(&force,&force,-1.0);
602         v3_add(&(ai->f),&(ai->f),&force);
603
604         /* derivative wrt k */
605         v3_scale(&force,&dist_ik,-1.0*dfcg); // dri rik = - drk rik
606         v3_scale(&tmp,&dcosdrk,fcdg);
607         v3_add(&force,&force,&tmp);
608         v3_scale(&force,&force,pre_dzeta);
609
610         /* force contribution */
611         v3_add(&(ktom->f),&(ktom->f),&force);
612
613 #ifdef DEBUG
614 if(moldyn->time>DSTART&&moldyn->time<DEND) {
615         if(ktom==&(moldyn->atom[DATOM])) {
616                 printf("force 3bp (k2): [%d %d %d]\n",ai->tag,jtom->tag,ktom->tag);
617                 printf("  adding %f %f %f\n",force.x,force.y,force.z);
618                 printf("  total k: %f %f %f\n",ktom->f.x,ktom->f.y,ktom->f.z);
619                 printf("  angle: %f\n",acos(cos_theta)*360.0/(2*M_PI));
620                 printf("    d ij ik = %f %f\n",d_ij,d_ik);
621         }
622 }
623 #endif
624
625         /* virial */
626         virial_calc(ai,&force,&dist_ik);
627         
628         /* force contribution to atom i */
629         v3_scale(&force,&force,-1.0);
630         v3_add(&(ai->f),&(ai->f),&force);
631
632         /* increase k counter */
633         kcount++;       
634
635
636
637 #ifdef STATIC_LISTS
638                                         }
639 #elif LOWMEM_LISTS
640                                         }
641 #else
642                                         } while(list_next_f(that)!=\
643                                                 L_NO_NEXT_ELEMENT);
644 #endif
645
646                                 }
647                                 
648 #ifdef STATIC_LISTS
649                         }
650 #elif LOWMEM_LISTS
651                         }
652 #else
653                         } while(list_next_f(this)!=L_NO_NEXT_ELEMENT);
654 #endif
655                 
656                 }
657                 
658 #ifdef DEBUG
659         //printf("\n\n");
660 #endif
661 #ifdef VDEBUG
662         printf("\n\n");
663 #endif
664
665         }
666
667 #ifdef DEBUG
668         //printf("\nATOM 0: %f %f %f\n\n",itom->f.x,itom->f.y,itom->f.z);
669         if(moldyn->time>DSTART&&moldyn->time<DEND) {
670                 printf("force:\n");
671                 printf("  x: %0.40f\n",moldyn->atom[DATOM].f.x);
672                 printf("  y: %0.40f\n",moldyn->atom[DATOM].f.y);
673                 printf("  z: %0.40f\n",moldyn->atom[DATOM].f.z);
674         }
675 #endif
676
677         /* some postprocessing */
678 #ifdef PARALLEL
679         #pragma omp parallel for
680 #endif
681         for(i=0;i<count;i++) {
682                 /* calculate global virial */
683                 moldyn->gvir.xx+=itom[i].r.x*itom[i].f.x;
684                 moldyn->gvir.yy+=itom[i].r.y*itom[i].f.y;
685                 moldyn->gvir.zz+=itom[i].r.z*itom[i].f.z;
686                 moldyn->gvir.xy+=itom[i].r.y*itom[i].f.x;
687                 moldyn->gvir.xz+=itom[i].r.z*itom[i].f.x;
688                 moldyn->gvir.yz+=itom[i].r.z*itom[i].f.y;
689
690                 /* check forces regarding the given timestep */
691                 if(v3_norm(&(itom[i].f))>\
692                    0.1*moldyn->nnd*itom[i].mass/moldyn->tau_square)
693                         printf("[moldyn] WARNING: pfc (high force: atom %d)\n",
694                                i);
695         }
696
697         return 0;
698 }
699
700
701 #else // PTHREADS
702
703
704 typedef struct s_pft_data {
705         t_moldyn *moldyn;
706         int i;
707 } t_pft_data;
708
709 void *potential_force_thread(void *ptr) {
710
711         t_pft_data *pft_data;
712         t_moldyn *moldyn;
713         t_albe_exchange ec;
714
715         int i,j,k,count;
716         t_atom *itom,*jtom,*ktom;
717         t_linkcell *lc;
718 #ifdef STATIC_LISTS
719         int *neighbour_i[27];
720         int p,q;
721 #elif LOWMEM_LISTS
722         int neighbour_i[27];
723         int p,q;
724 #else
725         t_list neighbour_i[27];
726         t_list neighbour_i2[27];
727         t_list *this,*that;
728 #endif
729         u8 bc_ij,bc_ik;
730         int dnlc;
731
732         // needed to work
733         t_atom *ai;
734
735         // optimized
736         t_albe_mult_params *params;
737         t_albe_exchange *exchange;
738         t_3dvec dist_ij;
739         double d_ij2;
740         double d_ij;
741         u8 brand_i;
742         double S2;
743         int kcount;
744         double zeta_ij;
745         double pre_dzeta;
746
747         // more ...
748         double Rk,Sk,Sk2;
749         t_3dvec dist_ik;
750         double d_ik2,d_ik;
751         double cos_theta,h_cos,d2_h_cos2,frac,g,dg,s_r,arg;
752         double f_c_ik,df_c_ik;
753
754         t_3dvec force;
755         double f_a,df_a,b,db,f_c,df_c;
756         double f_r,df_r;
757         double scale;
758         double mu,B;
759         double lambda,A;
760         double r0;
761         double S,R;
762         double energy;
763
764         double dijdik_inv,fcdg,dfcg;
765         t_3dvec dcosdrj,dcosdrk;
766         t_3dvec tmp;
767
768         // even more ...
769         double gamma_i;
770         double c_i;
771         double d_i;
772         double h_i;
773         double ci2;
774         double di2;
775         double ci2di2;
776
777         pft_data=ptr;
778         moldyn=pft_data->moldyn;
779         exchange=&ec;
780
781         count=moldyn->count;
782         itom=moldyn->atom;
783         lc=&(moldyn->lc);
784
785         // optimized
786         params=moldyn->pot_params;
787
788
789         /* get energy, force and virial of every atom */
790
791         /* first (and only) loop over atoms i */
792         for(i=0;i<count;i++) {
793
794                 if(!(itom[i].attr&ATOM_ATTR_3BP))
795                         continue;
796
797                 link_cell_neighbour_index(moldyn,
798                                           (itom[i].r.x+moldyn->dim.x/2)/lc->x,
799                                           (itom[i].r.y+moldyn->dim.y/2)/lc->y,
800                                           (itom[i].r.z+moldyn->dim.z/2)/lc->z,
801                                           neighbour_i);
802
803                 dnlc=lc->dnlc;
804
805                 /* copy the neighbour lists */
806 #ifdef STATIC_LISTS
807 #elif LOWMEM_LISTS
808 #else
809                 memcpy(neighbour_i2,neighbour_i,27*sizeof(t_list));
810 #endif
811
812                 ai=&(itom[i]);
813                 brand_i=ai->brand;
814
815                 /* loop over atoms j */
816                 for(j=0;j<27;j++) {
817
818                         bc_ij=(j<dnlc)?0:1;
819 #ifdef STATIC_LISTS
820                         p=0;
821
822                         while(neighbour_i[j][p]!=-1) {
823
824                                 jtom=&(itom[neighbour_i[j][p]]);
825                                 p++;
826 #elif LOWMEM_LISTS
827                         p=neighbour_i[j];
828
829                         while(p!=-1) {
830
831                                 jtom=&(itom[p]);
832                                 p=lc->subcell->list[p];
833 #else
834                         this=&(neighbour_i[j]);
835                         list_reset_f(this);
836
837                         if(this->start==NULL)
838                                 continue;
839
840                         do {
841
842                                 jtom=this->current->data;
843 #endif
844
845                                 if(jtom==&(itom[i]))
846                                         continue;
847
848                                 if(!(jtom->attr&ATOM_ATTR_3BP))
849                                         continue;
850
851                                 /* reset 3bp run */
852                                 moldyn->run3bp=1;
853
854
855 /* j1 func here ... */
856 /* albe 3 body potential function (first ij loop) */
857
858         /* reset zeta sum */
859         zeta_ij=0.0;
860
861         /*
862          * set ij depending values
863          */
864
865         if(brand_i==jtom->brand) {
866                 S2=params->S2[brand_i];
867         }
868         else {
869                 S2=params->S2mixed;
870         }
871
872         /* dist_ij, d_ij2 */
873         v3_sub(&dist_ij,&(jtom->r),&(ai->r));
874         if(bc_ij) check_per_bound(moldyn,&dist_ij);
875         d_ij2=v3_absolute_square(&dist_ij);
876
877         /* if d_ij2 > S2 => no force & potential energy contribution */
878         if(d_ij2>S2)
879                 continue;
880
881         /* d_ij */
882         d_ij=sqrt(d_ij2);
883
884         /* reset k counter for first k loop */
885         kcount=0;
886
887                                 /* first loop over atoms k */
888                                 for(k=0;k<27;k++) {
889
890                                         bc_ik=(k<dnlc)?0:1;
891 #ifdef STATIC_LISTS
892                                         q=0;
893
894                                         while(neighbour_i[k][q]!=-1) {
895
896                                                 ktom=&(itom[neighbour_i[k][q]]);
897                                                 q++;
898 #elif LOWMEM_LISTS
899                                         q=neighbour_i[k];
900
901                                         while(q!=-1) {
902
903                                                 ktom=&(itom[q]);
904                                                 q=lc->subcell->list[q];
905 #else
906                                         that=&(neighbour_i2[k]);
907                                         list_reset_f(that);
908                                         
909                                         if(that->start==NULL)
910                                                 continue;
911
912                                         do {
913                                                 ktom=that->current->data;
914 #endif
915
916                                                 if(!(ktom->attr&ATOM_ATTR_3BP))
917                                                         continue;
918
919                                                 if(ktom==jtom)
920                                                         continue;
921
922                                                 if(ktom==&(itom[i]))
923                                                         continue;
924
925 /* k1 func here ... */
926 /* albe 3 body potential function (first k loop) */
927
928         if(kcount>ALBE_MAXN) {
929                 printf("FATAL: neighbours = %d\n",kcount);
930                 printf("  -> %d %d %d\n",ai->tag,jtom->tag,ktom->tag);
931         }
932
933         /* ik constants */
934         if(brand_i==ktom->brand) {
935                 Rk=params->R[brand_i];
936                 Sk=params->S[brand_i];
937                 Sk2=params->S2[brand_i];
938                 /* albe needs i,k depending c,d,h and gamma values */
939                 gamma_i=params->gamma[brand_i];
940                 c_i=params->c[brand_i];
941                 d_i=params->d[brand_i];
942                 h_i=params->h[brand_i];
943                 ci2=params->c2[brand_i];
944                 di2=params->d2[brand_i];
945                 ci2di2=params->c2d2[brand_i];
946         }
947         else {
948                 Rk=params->Rmixed;
949                 Sk=params->Smixed;
950                 Sk2=params->S2mixed;
951                 /* albe needs i,k depending c,d,h and gamma values */
952                 gamma_i=params->gamma_m;
953                 c_i=params->c_mixed;
954                 d_i=params->d_mixed;
955                 h_i=params->h_mixed;
956                 ci2=params->c2_mixed;
957                 di2=params->d2_mixed;
958                 ci2di2=params->c2d2_m;
959         }
960
961         /* dist_ik, d_ik2 */
962         v3_sub(&dist_ik,&(ktom->r),&(ai->r));
963         if(bc_ik) check_per_bound(moldyn,&dist_ik);
964         d_ik2=v3_absolute_square(&dist_ik);
965
966         /* store data for second k loop */
967         exchange->dist_ik[kcount]=dist_ik;
968         exchange->d_ik2[kcount]=d_ik2;
969
970         /* return if not within cutoff */
971         if(d_ik2>Sk2) {
972                 kcount++;
973                 continue;
974         }
975
976         /* d_ik */
977         d_ik=sqrt(d_ik2);
978
979         /* cos theta */
980         cos_theta=v3_scalar_product(&dist_ij,&dist_ik)/(d_ij*d_ik);
981
982         /* g_ijk 
983         h_cos=*(exchange->h_i)+cos_theta; // + in albe formalism
984         d2_h_cos2=exchange->di2+(h_cos*h_cos);
985         frac=exchange->ci2/d2_h_cos2;
986         g=*(exchange->gamma_i)*(1.0+exchange->ci2di2-frac);
987         dg=2.0*frac**(exchange->gamma_i)*h_cos/d2_h_cos2; // + in albe f..
988         */
989
990         h_cos=h_i+cos_theta; // + in albe formalism
991         d2_h_cos2=di2+(h_cos*h_cos);
992         frac=ci2/d2_h_cos2;
993         g=gamma_i*(1.0+ci2di2-frac);
994         dg=2.0*frac*gamma_i*h_cos/d2_h_cos2; // + in albe f..
995
996         /* zeta sum += f_c_ik * g_ijk */
997         if(d_ik<=Rk) {
998                 zeta_ij+=g;
999                 f_c_ik=1.0;
1000                 df_c_ik=0.0;
1001         }
1002         else {
1003                 s_r=Sk-Rk;
1004                 arg=M_PI*(d_ik-Rk)/s_r;
1005                 f_c_ik=0.5+0.5*cos(arg);
1006                 df_c_ik=0.5*sin(arg)*(M_PI/(s_r*d_ik));
1007                 zeta_ij+=f_c_ik*g;
1008         }
1009
1010         /* store even more data for second k loop */
1011         exchange->g[kcount]=g;
1012         exchange->dg[kcount]=dg;
1013         exchange->d_ik[kcount]=d_ik;
1014         exchange->cos_theta[kcount]=cos_theta;
1015         exchange->f_c_ik[kcount]=f_c_ik;
1016         exchange->df_c_ik[kcount]=df_c_ik;
1017
1018         /* increase k counter */
1019         kcount++;
1020
1021 #ifdef STATIC_LISTS
1022                                         }
1023 #elif LOWMEM_LISTS
1024                                         }
1025 #else
1026                                         } while(list_next_f(that)!=\
1027                                                 L_NO_NEXT_ELEMENT);
1028 #endif
1029
1030                                 }
1031
1032 /* j2 func here ... */
1033
1034
1035         if(brand_i==jtom->brand) {
1036                 S=params->S[brand_i];
1037                 R=params->R[brand_i];
1038                 B=params->B[brand_i];
1039                 A=params->A[brand_i];
1040                 r0=params->r0[brand_i];
1041                 mu=params->mu[brand_i];
1042                 lambda=params->lambda[brand_i];
1043         }
1044         else {
1045                 S=params->Smixed;
1046                 R=params->Rmixed;
1047                 B=params->Bmixed;
1048                 A=params->Amixed;
1049                 r0=params->r0_mixed;
1050                 mu=params->mu_m;
1051                 lambda=params->lambda_m;
1052         }
1053
1054         /* f_c, df_c */
1055         if(d_ij<R) {
1056                 f_c=1.0;
1057                 df_c=0.0;
1058         }
1059         else {
1060                 s_r=S-R;
1061                 arg=M_PI*(d_ij-R)/s_r;
1062                 f_c=0.5+0.5*cos(arg);
1063                 df_c=0.5*sin(arg)*(M_PI/(s_r*d_ij));
1064         }
1065
1066         /* f_a, df_a */
1067         f_a=-B*exp(-mu*(d_ij-r0));
1068         df_a=mu*f_a/d_ij;
1069
1070         /* f_r, df_r */
1071         f_r=A*exp(-lambda*(d_ij-r0));
1072         df_r=lambda*f_r/d_ij;
1073
1074         /* b, db */
1075         if(zeta_ij==0.0) {
1076                 b=1.0;
1077                 db=0.0;
1078         }
1079         else {
1080                 b=1.0/sqrt(1.0+zeta_ij);
1081                 db=-0.5*b/(1.0+zeta_ij);
1082         }
1083
1084         /* force contribution for atom i */
1085         scale=-0.5*(f_c*(df_r-b*df_a)+df_c*(f_r-b*f_a)); // - in albe formalism
1086         v3_scale(&force,&(dist_ij),scale);
1087         pthread_mutex_lock(&(amutex[ai->tag])); 
1088         v3_add(&(ai->f),&(ai->f),&force);
1089         pthread_mutex_unlock(&(amutex[ai->tag]));       
1090
1091         /* force contribution for atom j */
1092         v3_scale(&force,&force,-1.0); // dri rij = - drj rij
1093         pthread_mutex_lock(&(amutex[jtom->tag]));       
1094         v3_add(&(jtom->f),&(jtom->f),&force);
1095         pthread_mutex_unlock(&(amutex[jtom->tag]));     
1096
1097         /* virial */
1098         pthread_mutex_lock(&(amutex[ai->tag])); 
1099         virial_calc(ai,&force,&(dist_ij));
1100         pthread_mutex_unlock(&(amutex[ai->tag]));       
1101
1102 #ifdef DEBUG
1103 if(moldyn->time>DSTART&&moldyn->time<DEND) {
1104         if((ai==&(moldyn->atom[DATOM]))|(jtom==&(moldyn->atom[DATOM]))) {
1105                 printf("force 3bp (j2): [%d %d sum]\n",ai->tag,jtom->tag);
1106                 printf("  adding %f %f %f\n",force.x,force.y,force.z);
1107                 if(ai==&(moldyn->atom[0]))
1108                         printf("  total i: %f %f %f\n",ai->f.x,ai->f.y,ai->f.z);
1109                 if(jtom==&(moldyn->atom[0]))
1110                         printf("  total j: %f %f %f\n",jtom->f.x,jtom->f.y,jtom->f.z);
1111                 printf("  energy: %f = %f %f %f %f\n",0.5*f_c*(b*f_a+f_r),
1112                                                     f_c,b,f_a,f_r);
1113                 printf("          %f %f %f\n",zeta_ij,.0,.0);
1114         }
1115 }
1116 #endif
1117
1118         /* dzeta prefactor = - f_c f_a db, (* -0.5 due to force calc) */
1119         pre_dzeta=0.5*f_a*f_c*db;
1120
1121         /* energy contribution */
1122         energy=0.5*f_c*(f_r-b*f_a); // - in albe formalism
1123         pthread_mutex_lock(&emutex);
1124         moldyn->energy+=energy;
1125         pthread_mutex_unlock(&emutex);
1126         pthread_mutex_lock(&(amutex[ai->tag])); 
1127         ai->e+=energy;
1128         pthread_mutex_unlock(&(amutex[ai->tag]));       
1129
1130         /* reset k counter for second k loop */
1131         kcount=0;
1132                 
1133
1134                                 /* second loop over atoms k */
1135                                 for(k=0;k<27;k++) {
1136
1137                                         bc_ik=(k<dnlc)?0:1;
1138 #ifdef STATIC_LISTS
1139                                         q=0;
1140
1141                                         while(neighbour_i[k][q]!=-1) {
1142
1143                                                 ktom=&(itom[neighbour_i[k][q]]);
1144                                                 q++;
1145 #elif LOWMEM_LISTS
1146                                         q=neighbour_i[k];
1147
1148                                         while(q!=-1) {
1149
1150                                                 ktom=&(itom[q]);
1151                                                 q=lc->subcell->list[q];
1152 #else
1153                                         that=&(neighbour_i2[k]);
1154                                         list_reset_f(that);
1155                                         
1156                                         if(that->start==NULL)
1157                                                 continue;
1158
1159                                         do {
1160                                                 ktom=that->current->data;
1161 #endif
1162
1163                                                 if(!(ktom->attr&ATOM_ATTR_3BP))
1164                                                         continue;
1165
1166                                                 if(ktom==jtom)
1167                                                         continue;
1168
1169                                                 if(ktom==&(itom[i]))
1170                                                         continue;
1171
1172
1173 /* k2 func here ... */
1174 /* albe 3 body potential function (second k loop) */
1175
1176         if(kcount>ALBE_MAXN)
1177                 printf("FATAL: neighbours!\n");
1178
1179         /* d_ik2 */
1180         d_ik2=exchange->d_ik2[kcount];
1181
1182         if(brand_i==ktom->brand)
1183                 Sk2=params->S2[brand_i];
1184         else
1185                 Sk2=params->S2mixed;
1186
1187         /* return if d_ik > S */
1188         if(d_ik2>Sk2) {
1189                 kcount++;
1190                 continue;
1191         }
1192
1193         /* dist_ik, d_ik */
1194         dist_ik=exchange->dist_ik[kcount];
1195         d_ik=exchange->d_ik[kcount];
1196
1197         /* f_c_ik, df_c_ik */
1198         f_c_ik=exchange->f_c_ik[kcount];
1199         df_c_ik=exchange->df_c_ik[kcount];
1200
1201         /* g, dg, cos_theta */
1202         g=exchange->g[kcount];
1203         dg=exchange->dg[kcount];
1204         cos_theta=exchange->cos_theta[kcount];
1205
1206         /* cos_theta derivatives wrt j,k */
1207         dijdik_inv=1.0/(d_ij*d_ik);
1208         v3_scale(&dcosdrj,&dist_ik,dijdik_inv);         // j
1209         v3_scale(&tmp,&dist_ij,-cos_theta/d_ij2);
1210         v3_add(&dcosdrj,&dcosdrj,&tmp);
1211         v3_scale(&dcosdrk,&dist_ij,dijdik_inv);         // k
1212         v3_scale(&tmp,&dist_ik,-cos_theta/d_ik2);
1213         v3_add(&dcosdrk,&dcosdrk,&tmp);
1214
1215         /* f_c_ik * dg, df_c_ik * g */
1216         fcdg=f_c_ik*dg;
1217         dfcg=df_c_ik*g;
1218
1219         /* derivative wrt j */
1220         v3_scale(&force,&dcosdrj,fcdg*pre_dzeta);
1221
1222         /* force contribution */
1223         pthread_mutex_lock(&(amutex[jtom->tag]));       
1224         v3_add(&(jtom->f),&(jtom->f),&force);
1225         pthread_mutex_unlock(&(amutex[jtom->tag]));     
1226
1227 #ifdef DEBUG
1228 if(moldyn->time>DSTART&&moldyn->time<DEND) {
1229         if(jtom==&(moldyn->atom[DATOM])) {
1230                 printf("force 3bp (k2): [%d %d %d]\n",ai->tag,jtom->tag,ktom->tag);
1231                 printf("  adding %f %f %f\n",force.x,force.y,force.z);
1232                 printf("  total j: %f %f %f\n",jtom->f.x,jtom->f.y,jtom->f.z);
1233                 printf("  angle: %f\n",acos(cos_theta)*360.0/(2*M_PI));
1234                 printf("    d ij ik = %f %f\n",d_ij,d_ik);
1235         }
1236 }
1237 #endif
1238
1239         /* virial */
1240         pthread_mutex_lock(&(amutex[ai->tag])); 
1241         virial_calc(ai,&force,&dist_ij);
1242
1243         /* force contribution to atom i */
1244         v3_scale(&force,&force,-1.0);
1245         v3_add(&(ai->f),&(ai->f),&force);
1246         pthread_mutex_unlock(&(amutex[ai->tag]));       
1247
1248         /* derivative wrt k */
1249         v3_scale(&force,&dist_ik,-1.0*dfcg); // dri rik = - drk rik
1250         v3_scale(&tmp,&dcosdrk,fcdg);
1251         v3_add(&force,&force,&tmp);
1252         v3_scale(&force,&force,pre_dzeta);
1253
1254         /* force contribution */
1255         pthread_mutex_lock(&(amutex[ktom->tag]));       
1256         v3_add(&(ktom->f),&(ktom->f),&force);
1257         pthread_mutex_unlock(&(amutex[ktom->tag]));     
1258
1259 #ifdef DEBUG
1260 if(moldyn->time>DSTART&&moldyn->time<DEND) {
1261         if(ktom==&(moldyn->atom[DATOM])) {
1262                 printf("force 3bp (k2): [%d %d %d]\n",ai->tag,jtom->tag,ktom->tag);
1263                 printf("  adding %f %f %f\n",force.x,force.y,force.z);
1264                 printf("  total k: %f %f %f\n",ktom->f.x,ktom->f.y,ktom->f.z);
1265                 printf("  angle: %f\n",acos(cos_theta)*360.0/(2*M_PI));
1266                 printf("    d ij ik = %f %f\n",d_ij,d_ik);
1267         }
1268 }
1269 #endif
1270
1271         /* virial */
1272         pthread_mutex_lock(&(amutex[ai->tag])); 
1273         virial_calc(ai,&force,&dist_ik);
1274         
1275         /* force contribution to atom i */
1276         v3_scale(&force,&force,-1.0);
1277         v3_add(&(ai->f),&(ai->f),&force);
1278         pthread_mutex_unlock(&(amutex[ai->tag]));       
1279
1280         /* increase k counter */
1281         kcount++;       
1282
1283
1284
1285 #ifdef STATIC_LISTS
1286                                         }
1287 #elif LOWMEM_LISTS
1288                                         }
1289 #else
1290                                         } while(list_next_f(that)!=\
1291                                                 L_NO_NEXT_ELEMENT);
1292 #endif
1293
1294                                 }
1295                                 
1296 #ifdef STATIC_LISTS
1297                         }
1298 #elif LOWMEM_LISTS
1299                         }
1300 #else
1301                         } while(list_next_f(this)!=L_NO_NEXT_ELEMENT);
1302 #endif
1303                 
1304                 }
1305                 
1306 #ifdef DEBUG
1307         //printf("\n\n");
1308 #endif
1309 #ifdef VDEBUG
1310         printf("\n\n");
1311 #endif
1312
1313         }
1314
1315 #ifdef DEBUG
1316         //printf("\nATOM 0: %f %f %f\n\n",itom->f.x,itom->f.y,itom->f.z);
1317         if(moldyn->time>DSTART&&moldyn->time<DEND) {
1318                 printf("force:\n");
1319                 printf("  x: %0.40f\n",moldyn->atom[DATOM].f.x);
1320                 printf("  y: %0.40f\n",moldyn->atom[DATOM].f.y);
1321                 printf("  z: %0.40f\n",moldyn->atom[DATOM].f.z);
1322         }
1323 #endif
1324
1325         pthread_exit(NULL);
1326
1327         return 0;
1328 }
1329
1330 int albe_potential_force_calc(t_moldyn *moldyn) {
1331
1332         int i,j,ret;
1333         t_pft_data pft_data[MAX_THREADS];
1334         int count;
1335         pthread_t pft_thread[MAX_THREADS];
1336         t_atom *itom;
1337         t_virial *virial;
1338
1339         count=moldyn->count;
1340         itom=moldyn->atom;
1341
1342         /* reset energy */
1343         moldyn->energy=0.0;
1344
1345         /* reset global virial */
1346         memset(&(moldyn->gvir),0,sizeof(t_virial));
1347
1348         /* reset force, site energy and virial of every atom */
1349         for(i=0;i<count;i++) {
1350
1351                 /* reset force */
1352                 v3_zero(&(itom[i].f));
1353
1354                 /* reset virial */
1355                 virial=&(itom[i].virial);
1356                 virial->xx=0.0;
1357                 virial->yy=0.0;
1358                 virial->zz=0.0;
1359                 virial->xy=0.0;
1360                 virial->xz=0.0;
1361                 virial->yz=0.0;
1362         
1363                 /* reset site energy */
1364                 itom[i].e=0.0;
1365
1366         }
1367
1368         i=0;
1369         while(i<count) {
1370
1371                 /* start threads */
1372                 for(j=0;j<MAX_THREADS;j++) {
1373
1374                         /* break if all atoms are processed */
1375                         if(j+i==count)
1376                                 break;
1377
1378                         /* prepare thread data */
1379                         pft_data[j].moldyn=moldyn;
1380                         pft_data[j].i=j+i;
1381
1382                         ret=pthread_create(&(pft_thread[j]),NULL,
1383                                            potential_force_thread,
1384                                            &(pft_data[j]));
1385                         if(ret)  {
1386                                 perror("[albe fast] pf thread create");
1387                                 return ret;
1388                         }
1389                 }
1390
1391                 //printf("threads created! %d\n",j);
1392
1393                 /* join threads */
1394                 for(j=0;j<MAX_THREADS;j++) {
1395
1396                         if(j+i==count)
1397                                 break;
1398
1399                         ret=pthread_join(pft_thread[j],NULL);
1400                         if(ret) {
1401                                 perror("[albe fast] pf thread join");
1402                                 return ret;
1403                         }
1404                 }
1405
1406                 /* increment counter */
1407                 i+=MAX_THREADS;
1408
1409                 //printf("threads joined! -> %d\n",i);
1410
1411         }
1412
1413         /* some postprocessing */
1414         for(i=0;i<count;i++) {
1415                 /* calculate global virial */
1416                 moldyn->gvir.xx+=itom[i].r.x*itom[i].f.x;
1417                 moldyn->gvir.yy+=itom[i].r.y*itom[i].f.y;
1418                 moldyn->gvir.zz+=itom[i].r.z*itom[i].f.z;
1419                 moldyn->gvir.xy+=itom[i].r.y*itom[i].f.x;
1420                 moldyn->gvir.xz+=itom[i].r.z*itom[i].f.x;
1421                 moldyn->gvir.yz+=itom[i].r.z*itom[i].f.y;
1422
1423                 /* check forces regarding the given timestep */
1424                 if(v3_norm(&(itom[i].f))>\
1425                    0.1*moldyn->nnd*itom[i].mass/moldyn->tau_square)
1426                         printf("[moldyn] WARNING: pfc (high force: atom %d)\n",
1427                                i);
1428         }
1429
1430         pthread_exit(NULL);
1431
1432         return 0;
1433 }
1434
1435 #endif // PTHREADS