Merge branch 'leadoff'
[physik/posic.git] / potentials / albe_fast.c
index 29c303b..79162b1 100644 (file)
 #include <omp.h>
 #endif
 
-#ifdef PTHREAD
+#ifdef PTHREADS
 #include <pthread.h>
+#define MAX_THREADS 2
 #endif
 
 #include "../moldyn.h"
 #include "../math/math.h"
 #include "albe.h"
 
+#ifdef PTHREADS
+extern pthread_mutex_t *amutex;
+extern pthread_mutex_t emutex;
+#endif
+
 /*
  * virial calculation
  */
 
-#define albe_v_calc(a,f,d)     a->virial.xx+=f->x*d->x; \
-                               a->virial.yy+=f->y*d->y; \
-                               a->virial.zz+=f->z*d->z; \
-                               a->virial.xy+=f->x*d->y; \
-                               a->virial.xz+=f->x*d->z; \
-                               a->virial.yz+=f->y*d->z
+#define albe_v_calc(a,f,d)     (a)->virial.xx+=(f)->x*(d)->x; \
+                               (a)->virial.yy+=(f)->y*(d)->y; \
+                               (a)->virial.zz+=(f)->z*(d)->z; \
+                               (a)->virial.xy+=(f)->x*(d)->y; \
+                               (a)->virial.xz+=(f)->x*(d)->z; \
+                               (a)->virial.yz+=(f)->y*(d)->z
 
 #ifndef PTHREADS
 
@@ -446,7 +452,11 @@ int albe_potential_force_calc(t_moldyn *moldyn) {
        }
 
        /* force contribution for atom i */
+#ifdef MATTONI
+       scale=-0.5*(f_c*(df_r-b*df_a)); // - in albe formalism
+#else
        scale=-0.5*(f_c*(df_r-b*df_a)+df_c*(f_r-b*f_a)); // - in albe formalism
+#endif
        v3_scale(&force,&(dist_ij),scale);
        v3_add(&(ai->f),&(ai->f),&force);
 
@@ -455,7 +465,8 @@ int albe_potential_force_calc(t_moldyn *moldyn) {
        v3_add(&(jtom->f),&(jtom->f),&force);
 
        /* virial */
-       virial_calc(ai,&force,&(dist_ij));
+       albe_v_calc(ai,&force,&(dist_ij));
+       //virial_calc(ai,&force,&(dist_ij));
 
 #ifdef DEBUG
 if(moldyn->time>DSTART&&moldyn->time<DEND) {
@@ -589,17 +600,23 @@ if(moldyn->time>DSTART&&moldyn->time<DEND) {
 #endif
 
        /* virial */
-       virial_calc(ai,&force,&dist_ij);
+       albe_v_calc(ai,&force,&dist_ij);
+       //virial_calc(ai,&force,&dist_ij);
 
        /* force contribution to atom i */
        v3_scale(&force,&force,-1.0);
        v3_add(&(ai->f),&(ai->f),&force);
 
        /* derivative wrt k */
+#ifdef MATTONI
+       v3_scale(&tmp,&dcosdrk,fcdg);
+       v3_scale(&force,&tmp,pre_dzeta);
+#else
        v3_scale(&force,&dist_ik,-1.0*dfcg); // dri rik = - drk rik
        v3_scale(&tmp,&dcosdrk,fcdg);
        v3_add(&force,&force,&tmp);
        v3_scale(&force,&force,pre_dzeta);
+#endif
 
        /* force contribution */
        v3_add(&(ktom->f),&(ktom->f),&force);
@@ -617,7 +634,8 @@ if(moldyn->time>DSTART&&moldyn->time<DEND) {
 #endif
 
        /* virial */
-       virial_calc(ai,&force,&dist_ik);
+       albe_v_calc(ai,&force,&dist_ik);
+       //virial_calc(ai,&force,&dist_ik);
        
        /* force contribution to atom i */
        v3_scale(&force,&force,-1.0);
@@ -697,7 +715,7 @@ if(moldyn->time>DSTART&&moldyn->time<DEND) {
 
 typedef struct s_pft_data {
        t_moldyn *moldyn;
-       int i;
+       int start,end;
 } t_pft_data;
 
 void *potential_force_thread(void *ptr) {
@@ -779,23 +797,20 @@ void *potential_force_thread(void *ptr) {
        // optimized
        params=moldyn->pot_params;
 
+       /* get energy, force and virial for atoms */
 
-       /* get energy, force and virial of every atom */
-
-       /* first (and only) loop over atoms i */
-       for(i=0;i<count;i++) {
+       for(i=pft_data->start;i<pft_data->end;i++) {
 
                if(!(itom[i].attr&ATOM_ATTR_3BP))
-                       continue;
+                       return 0;
 
-               link_cell_neighbour_index(moldyn,
+               // thread safe this way!
+               dnlc=link_cell_neighbour_index(moldyn,
                                          (itom[i].r.x+moldyn->dim.x/2)/lc->x,
                                          (itom[i].r.y+moldyn->dim.y/2)/lc->y,
                                          (itom[i].r.z+moldyn->dim.z/2)/lc->z,
                                          neighbour_i);
 
-               dnlc=lc->dnlc;
-
                /* copy the neighbour lists */
 #ifdef STATIC_LISTS
 #elif LOWMEM_LISTS
@@ -823,7 +838,7 @@ void *potential_force_thread(void *ptr) {
                        while(p!=-1) {
 
                                jtom=&(itom[p]);
-                               p=lc->subcell->list[p];
+                               p=lc->subcell->list[p]; // thread safe!
 #else
                        this=&(neighbour_i[j]);
                        list_reset_f(this);
@@ -1076,16 +1091,33 @@ void *potential_force_thread(void *ptr) {
        }
 
        /* force contribution for atom i */
+#ifdef MATTONI
+       scale=-0.5*(f_c*(df_r-b*df_a)); // - in albe formalism
+#else
        scale=-0.5*(f_c*(df_r-b*df_a)+df_c*(f_r-b*f_a)); // - in albe formalism
+#endif
        v3_scale(&force,&(dist_ij),scale);
+       if(pthread_mutex_lock(&(amutex[ai->tag])))
+               perror("[albe fast] mutex lock (1)\n");
        v3_add(&(ai->f),&(ai->f),&force);
+       if(pthread_mutex_unlock(&(amutex[ai->tag])))
+               perror("[albe fast] mutex unlock (1)\n");
 
        /* force contribution for atom j */
        v3_scale(&force,&force,-1.0); // dri rij = - drj rij
+       if(pthread_mutex_lock(&(amutex[jtom->tag])))
+               perror("[albe fast] mutex lock (2)\n");
        v3_add(&(jtom->f),&(jtom->f),&force);
+       if(pthread_mutex_unlock(&(amutex[jtom->tag])))
+               perror("[albe fast] mutex unlock (2)\n");
 
        /* virial */
-       virial_calc(ai,&force,&(dist_ij));
+       if(pthread_mutex_lock(&(amutex[ai->tag])))
+               perror("[albe fast] mutex lock (3)\n");
+       albe_v_calc(ai,&force,&(dist_ij));
+       //virial_calc(ai,&force,&(dist_ij));
+       if(pthread_mutex_unlock(&(amutex[ai->tag])))
+               perror("[albe fast] mutex unlock (3)\n");
 
 #ifdef DEBUG
 if(moldyn->time>DSTART&&moldyn->time<DEND) {
@@ -1108,8 +1140,16 @@ if(moldyn->time>DSTART&&moldyn->time<DEND) {
 
        /* energy contribution */
        energy=0.5*f_c*(f_r-b*f_a); // - in albe formalism
+       if(pthread_mutex_lock(&emutex))
+               perror("[albe fast] mutex lock (energy)\n");
        moldyn->energy+=energy;
+       if(pthread_mutex_unlock(&emutex))
+               perror("[albe fast] mutex unlock (energy)\n");
+       if(pthread_mutex_lock(&(amutex[ai->tag])))
+               perror("[albe fast] mutex lock (4)\n");
        ai->e+=energy;
+       if(pthread_mutex_unlock(&(amutex[ai->tag])))
+               perror("[albe fast] mutex unlock (4)\n");
 
        /* reset k counter for second k loop */
        kcount=0;
@@ -1204,7 +1244,11 @@ if(moldyn->time>DSTART&&moldyn->time<DEND) {
        v3_scale(&force,&dcosdrj,fcdg*pre_dzeta);
 
        /* force contribution */
+       if(pthread_mutex_lock(&(amutex[jtom->tag])))
+               perror("[albe fast] mutex lock (5)\n");
        v3_add(&(jtom->f),&(jtom->f),&force);
+       if(pthread_mutex_unlock(&(amutex[jtom->tag])))
+               perror("[albe fast] mutex unlock (5)\n");
 
 #ifdef DEBUG
 if(moldyn->time>DSTART&&moldyn->time<DEND) {
@@ -1219,20 +1263,34 @@ if(moldyn->time>DSTART&&moldyn->time<DEND) {
 #endif
 
        /* virial */
-       virial_calc(ai,&force,&dist_ij);
+       if(pthread_mutex_lock(&(amutex[ai->tag])))
+               perror("[albe fast] mutex lock (6)\n");
+       albe_v_calc(ai,&force,&dist_ij);
+       //virial_calc(ai,&force,&dist_ij);
 
        /* force contribution to atom i */
        v3_scale(&force,&force,-1.0);
        v3_add(&(ai->f),&(ai->f),&force);
+       if(pthread_mutex_unlock(&(amutex[ai->tag])))
+               perror("[albe fast] mutex unlock (6)\n");
 
        /* derivative wrt k */
+#ifdef MATTONI
+       v3_scale(&tmp,&dcosdrk,fcdg);
+       v3_scale(&force,&tmp,pre_dzeta);
+#else
        v3_scale(&force,&dist_ik,-1.0*dfcg); // dri rik = - drk rik
        v3_scale(&tmp,&dcosdrk,fcdg);
        v3_add(&force,&force,&tmp);
        v3_scale(&force,&force,pre_dzeta);
+#endif
 
        /* force contribution */
+       if(pthread_mutex_lock(&(amutex[ktom->tag])))
+               perror("[albe fast] mutex lock (7)\n");
        v3_add(&(ktom->f),&(ktom->f),&force);
+       if(pthread_mutex_unlock(&(amutex[ktom->tag])))
+               perror("[albe fast] mutex unlock (7)\n");
 
 #ifdef DEBUG
 if(moldyn->time>DSTART&&moldyn->time<DEND) {
@@ -1247,11 +1305,16 @@ if(moldyn->time>DSTART&&moldyn->time<DEND) {
 #endif
 
        /* virial */
-       virial_calc(ai,&force,&dist_ik);
+       if(pthread_mutex_lock(&(amutex[ai->tag])))
+               perror("[albe fast] mutex lock (8)\n");
+       albe_v_calc(ai,&force,&dist_ik);
+       //virial_calc(ai,&force,&dist_ik);
        
        /* force contribution to atom i */
        v3_scale(&force,&force,-1.0);
        v3_add(&(ai->f),&(ai->f),&force);
+       if(pthread_mutex_unlock(&(amutex[ai->tag])))
+               perror("[albe fast] mutex unlock (8)\n");
 
        /* increase k counter */
        kcount++;       
@@ -1278,6 +1341,8 @@ if(moldyn->time>DSTART&&moldyn->time<DEND) {
 #endif
                
                }
+
+       } // i loop
                
 #ifdef DEBUG
        //printf("\n\n");
@@ -1286,8 +1351,6 @@ if(moldyn->time>DSTART&&moldyn->time<DEND) {
        printf("\n\n");
 #endif
 
-       }
-
 #ifdef DEBUG
        //printf("\nATOM 0: %f %f %f\n\n",itom->f.x,itom->f.y,itom->f.z);
        if(moldyn->time>DSTART&&moldyn->time<DEND) {
@@ -1298,35 +1361,17 @@ if(moldyn->time>DSTART&&moldyn->time<DEND) {
        }
 #endif
 
-       /* some postprocessing */
-#ifdef PARALLEL
-       #pragma omp parallel for
-#endif
-       for(i=0;i<count;i++) {
-               /* calculate global virial */
-               moldyn->gvir.xx+=itom[i].r.x*itom[i].f.x;
-               moldyn->gvir.yy+=itom[i].r.y*itom[i].f.y;
-               moldyn->gvir.zz+=itom[i].r.z*itom[i].f.z;
-               moldyn->gvir.xy+=itom[i].r.y*itom[i].f.x;
-               moldyn->gvir.xz+=itom[i].r.z*itom[i].f.x;
-               moldyn->gvir.yz+=itom[i].r.z*itom[i].f.y;
-
-               /* check forces regarding the given timestep */
-               if(v3_norm(&(itom[i].f))>\
-                   0.1*moldyn->nnd*itom[i].mass/moldyn->tau_square)
-                       printf("[moldyn] WARNING: pfc (high force: atom %d)\n",
-                              i);
-       }
+       pthread_exit(NULL);
 
        return 0;
 }
 
 int albe_potential_force_calc(t_moldyn *moldyn) {
 
-       int i,ret;
-       t_pft_data *pft_data;
+       int i,j,ret;
+       t_pft_data pft_data[MAX_THREADS];
        int count;
-       pthread_t *pft_thread;
+       pthread_t pft_thread[MAX_THREADS];
        t_atom *itom;
        t_virial *virial;
 
@@ -1359,27 +1404,23 @@ int albe_potential_force_calc(t_moldyn *moldyn) {
 
        }
 
-       /* alloc thread memory */
-       pft_thread=malloc(count*sizeof(pthread_t));
-       if(pft_thread==NULL) {
-               perror("[albe fast] alloc thread mem");
-               return -1;
-       }
-       pft_data=malloc(count*sizeof(t_pft_data));
-       if(pft_data==NULL) {
-               perror("[albe fast] alloc thread mem");
-               return -1;
-       }
-
        /* start threads */
-       for(i=0;i<count;i++) {
+       for(j=0;j<MAX_THREADS;j++) {
 
                /* prepare thread data */
-               pft_data[i].moldyn=moldyn;
-               pft_data[i].i=i;
+               pft_data[j].moldyn=moldyn;
+               pft_data[j].start=j*(count/MAX_THREADS);
+               if(j==MAX_THREADS-1) {
+                       pft_data[j].end=count;
+               }
+               else {
+                       pft_data[j].end=pft_data[j].start;
+                       pft_data[j].end+=count/MAX_THREADS;
+               }
 
-               ret=pthread_create(&(pft_thread[i]),NULL,
-                                   potential_force_thread,&(pft_data[i]));
+               ret=pthread_create(&(pft_thread[j]),NULL,
+                                   potential_force_thread,
+                                   &(pft_data[j]));
                if(ret)  {
                        perror("[albe fast] pf thread create");
                        return ret;
@@ -1387,14 +1428,32 @@ int albe_potential_force_calc(t_moldyn *moldyn) {
        }
 
        /* join threads */
-       for(i=0;i<count;i++) {
-               ret=pthread_join(pft_thread[i],NULL);
+       for(j=0;j<MAX_THREADS;j++) {
+
+               ret=pthread_join(pft_thread[j],NULL);
                if(ret) {
                        perror("[albe fast] pf thread join");
                        return ret;
                }
        }
 
+       /* some postprocessing */
+       for(i=0;i<count;i++) {
+               /* calculate global virial */
+               moldyn->gvir.xx+=itom[i].r.x*itom[i].f.x;
+               moldyn->gvir.yy+=itom[i].r.y*itom[i].f.y;
+               moldyn->gvir.zz+=itom[i].r.z*itom[i].f.z;
+               moldyn->gvir.xy+=itom[i].r.y*itom[i].f.x;
+               moldyn->gvir.xz+=itom[i].r.z*itom[i].f.x;
+               moldyn->gvir.yz+=itom[i].r.z*itom[i].f.y;
+
+               /* check forces regarding the given timestep */
+               if(v3_norm(&(itom[i].f))>\
+                   0.1*moldyn->nnd*itom[i].mass/moldyn->tau_square)
+                       printf("[moldyn] WARNING: pfc (high force: atom %d)\n",
+                              i);
+       }
+
        return 0;
 }