pthreads implemented (fucking slow!)
authorhackbard <hackbard@sage.physik.uni-augsburg.de>
Wed, 8 Oct 2008 15:12:05 +0000 (17:12 +0200)
committerhackbard <hackbard@sage.physik.uni-augsburg.de>
Wed, 8 Oct 2008 15:12:05 +0000 (17:12 +0200)
moldyn.c
potentials/albe_fast.c

index d7d89bc..c27fc62 100644 (file)
--- a/moldyn.c
+++ b/moldyn.c
 #undef PSE_NAME
 #undef PSE_COL
 
+#ifdef PTHREADS
+/* global mutexes */
+pthread_mutex_t *amutex;
+pthread_mutex_t emutex;
+#endif
+
 /*
  * the moldyn functions
  */
@@ -66,13 +72,27 @@ int moldyn_init(t_moldyn *moldyn,int argc,char **argv) {
        rand_init(&(moldyn->random),NULL,1);
        moldyn->random.status|=RAND_STAT_VERBOSE;
 
+#ifdef PTHREADS
+       pthread_mutex_init(&emutex,NULL);
+#endif
+
        return 0;
 }
 
 int moldyn_shutdown(t_moldyn *moldyn) {
 
+#ifdef PTHREADS
+       int i;
+#endif
+
        printf("[moldyn] shutdown\n");
 
+#ifdef PTHREADS
+       for(i=0;i<moldyn->count;i++)
+               pthread_mutex_destroy(&(amutex[i]));
+       pthread_mutex_destroy(&emutex);
+#endif
+
        moldyn_log_shutdown(moldyn);
        link_cell_shutdown(moldyn);
        rand_close(&(moldyn->random));
@@ -500,6 +520,9 @@ int create_lattice(t_moldyn *moldyn,u8 type,double lc,int element,double mass,
        void *ptr;
        t_atom *atom;
        char name[16];
+#ifdef PTHREADS
+       pthread_mutex_t *mutex;
+#endif
 
        new=a*b*c;
        count=moldyn->count;
@@ -522,6 +545,16 @@ int create_lattice(t_moldyn *moldyn,u8 type,double lc,int element,double mass,
        moldyn->atom=ptr;
        atom=&(moldyn->atom[count]);
 
+#ifdef PTHREADS
+       ptr=realloc(amutex,(count+new)*sizeof(pthread_mutex_t));
+       if(!ptr) {
+               perror("[moldyn] mutex realloc (add atom)");
+               return -1;
+       }
+       amutex=ptr;
+       mutex=&(amutex[count]);
+#endif
+
        /* no atoms on the boundaries (only reason: it looks better!) */
        if(!origin) {
                orig.x=0.5*lc;
@@ -579,6 +612,9 @@ int create_lattice(t_moldyn *moldyn,u8 type,double lc,int element,double mass,
                atom[ret].tag=count+ret;
                check_per_bound(moldyn,&(atom[ret].r));
                atom[ret].r_0=atom[ret].r;
+#ifdef PTHREADS
+               pthread_mutex_init(&(mutex[ret]),NULL);
+#endif
        }
 
        /* update total system mass */
@@ -613,6 +649,16 @@ int add_atom(t_moldyn *moldyn,int element,double mass,u8 brand,u8 attr,
        moldyn->lc.subcell->list=ptr;
 #endif
 
+#ifdef PTHREADS
+       ptr=realloc(amutex,(count+1)*sizeof(pthread_mutex_t));
+       if(!ptr) {
+               perror("[moldyn] mutex realloc (add atom)");
+               return -1;
+       }
+       amutex=ptr;
+       pthread_mutex_init(&(amutex[count]),NULL);
+#endif
+
        atom=moldyn->atom;
 
        /* initialize new atom */
index 29c303b..952f1fe 100644 (file)
 #include <omp.h>
 #endif
 
-#ifdef PTHREAD
+#ifdef PTHREADS
 #include <pthread.h>
+#define MAX_THREADS 4
 #endif
 
 #include "../moldyn.h"
 #include "../math/math.h"
 #include "albe.h"
 
+#ifdef PTHREADS
+extern pthread_mutex_t *amutex;
+extern pthread_mutex_t emutex;
+#endif
+
 /*
  * virial calculation
  */
@@ -1078,14 +1084,20 @@ void *potential_force_thread(void *ptr) {
        /* force contribution for atom i */
        scale=-0.5*(f_c*(df_r-b*df_a)+df_c*(f_r-b*f_a)); // - in albe formalism
        v3_scale(&force,&(dist_ij),scale);
+       pthread_mutex_lock(&(amutex[ai->tag])); 
        v3_add(&(ai->f),&(ai->f),&force);
+       pthread_mutex_unlock(&(amutex[ai->tag]));       
 
        /* force contribution for atom j */
        v3_scale(&force,&force,-1.0); // dri rij = - drj rij
+       pthread_mutex_lock(&(amutex[jtom->tag]));       
        v3_add(&(jtom->f),&(jtom->f),&force);
+       pthread_mutex_unlock(&(amutex[jtom->tag]));     
 
        /* virial */
+       pthread_mutex_lock(&(amutex[ai->tag])); 
        virial_calc(ai,&force,&(dist_ij));
+       pthread_mutex_unlock(&(amutex[ai->tag]));       
 
 #ifdef DEBUG
 if(moldyn->time>DSTART&&moldyn->time<DEND) {
@@ -1108,8 +1120,12 @@ if(moldyn->time>DSTART&&moldyn->time<DEND) {
 
        /* energy contribution */
        energy=0.5*f_c*(f_r-b*f_a); // - in albe formalism
+       pthread_mutex_lock(&emutex);
        moldyn->energy+=energy;
+       pthread_mutex_unlock(&emutex);
+       pthread_mutex_lock(&(amutex[ai->tag])); 
        ai->e+=energy;
+       pthread_mutex_unlock(&(amutex[ai->tag]));       
 
        /* reset k counter for second k loop */
        kcount=0;
@@ -1204,7 +1220,9 @@ if(moldyn->time>DSTART&&moldyn->time<DEND) {
        v3_scale(&force,&dcosdrj,fcdg*pre_dzeta);
 
        /* force contribution */
+       pthread_mutex_lock(&(amutex[jtom->tag]));       
        v3_add(&(jtom->f),&(jtom->f),&force);
+       pthread_mutex_unlock(&(amutex[jtom->tag]));     
 
 #ifdef DEBUG
 if(moldyn->time>DSTART&&moldyn->time<DEND) {
@@ -1219,11 +1237,13 @@ if(moldyn->time>DSTART&&moldyn->time<DEND) {
 #endif
 
        /* virial */
+       pthread_mutex_lock(&(amutex[ai->tag])); 
        virial_calc(ai,&force,&dist_ij);
 
        /* force contribution to atom i */
        v3_scale(&force,&force,-1.0);
        v3_add(&(ai->f),&(ai->f),&force);
+       pthread_mutex_unlock(&(amutex[ai->tag]));       
 
        /* derivative wrt k */
        v3_scale(&force,&dist_ik,-1.0*dfcg); // dri rik = - drk rik
@@ -1232,7 +1252,9 @@ if(moldyn->time>DSTART&&moldyn->time<DEND) {
        v3_scale(&force,&force,pre_dzeta);
 
        /* force contribution */
+       pthread_mutex_lock(&(amutex[ktom->tag]));       
        v3_add(&(ktom->f),&(ktom->f),&force);
+       pthread_mutex_unlock(&(amutex[ktom->tag]));     
 
 #ifdef DEBUG
 if(moldyn->time>DSTART&&moldyn->time<DEND) {
@@ -1247,11 +1269,13 @@ if(moldyn->time>DSTART&&moldyn->time<DEND) {
 #endif
 
        /* virial */
+       pthread_mutex_lock(&(amutex[ai->tag])); 
        virial_calc(ai,&force,&dist_ik);
        
        /* force contribution to atom i */
        v3_scale(&force,&force,-1.0);
        v3_add(&(ai->f),&(ai->f),&force);
+       pthread_mutex_unlock(&(amutex[ai->tag]));       
 
        /* increase k counter */
        kcount++;       
@@ -1298,35 +1322,17 @@ if(moldyn->time>DSTART&&moldyn->time<DEND) {
        }
 #endif
 
-       /* some postprocessing */
-#ifdef PARALLEL
-       #pragma omp parallel for
-#endif
-       for(i=0;i<count;i++) {
-               /* calculate global virial */
-               moldyn->gvir.xx+=itom[i].r.x*itom[i].f.x;
-               moldyn->gvir.yy+=itom[i].r.y*itom[i].f.y;
-               moldyn->gvir.zz+=itom[i].r.z*itom[i].f.z;
-               moldyn->gvir.xy+=itom[i].r.y*itom[i].f.x;
-               moldyn->gvir.xz+=itom[i].r.z*itom[i].f.x;
-               moldyn->gvir.yz+=itom[i].r.z*itom[i].f.y;
-
-               /* check forces regarding the given timestep */
-               if(v3_norm(&(itom[i].f))>\
-                   0.1*moldyn->nnd*itom[i].mass/moldyn->tau_square)
-                       printf("[moldyn] WARNING: pfc (high force: atom %d)\n",
-                              i);
-       }
+       pthread_exit(NULL);
 
        return 0;
 }
 
 int albe_potential_force_calc(t_moldyn *moldyn) {
 
-       int i,ret;
-       t_pft_data *pft_data;
+       int i,j,ret;
+       t_pft_data pft_data[MAX_THREADS];
        int count;
-       pthread_t *pft_thread;
+       pthread_t pft_thread[MAX_THREADS];
        t_atom *itom;
        t_virial *virial;
 
@@ -1359,42 +1365,70 @@ int albe_potential_force_calc(t_moldyn *moldyn) {
 
        }
 
-       /* alloc thread memory */
-       pft_thread=malloc(count*sizeof(pthread_t));
-       if(pft_thread==NULL) {
-               perror("[albe fast] alloc thread mem");
-               return -1;
-       }
-       pft_data=malloc(count*sizeof(t_pft_data));
-       if(pft_data==NULL) {
-               perror("[albe fast] alloc thread mem");
-               return -1;
-       }
+       i=0;
+       while(i<count) {
 
-       /* start threads */
-       for(i=0;i<count;i++) {
+               /* start threads */
+               for(j=0;j<MAX_THREADS;j++) {
 
-               /* prepare thread data */
-               pft_data[i].moldyn=moldyn;
-               pft_data[i].i=i;
+                       /* break if all atoms are processed */
+                       if(j+i==count)
+                               break;
 
-               ret=pthread_create(&(pft_thread[i]),NULL,
-                                   potential_force_thread,&(pft_data[i]));
-               if(ret)  {
-                       perror("[albe fast] pf thread create");
-                       return ret;
+                       /* prepare thread data */
+                       pft_data[j].moldyn=moldyn;
+                       pft_data[j].i=j+i;
+
+                       ret=pthread_create(&(pft_thread[j]),NULL,
+                                           potential_force_thread,
+                                           &(pft_data[j]));
+                       if(ret)  {
+                               perror("[albe fast] pf thread create");
+                               return ret;
+                       }
+               }
+
+               //printf("threads created! %d\n",j);
+
+               /* join threads */
+               for(j=0;j<MAX_THREADS;j++) {
+
+                       if(j+i==count)
+                               break;
+
+                       ret=pthread_join(pft_thread[j],NULL);
+                       if(ret) {
+                               perror("[albe fast] pf thread join");
+                               return ret;
+                       }
                }
+
+               /* increment counter */
+               i+=MAX_THREADS;
+
+               //printf("threads joined! -> %d\n",i);
+
        }
 
-       /* join threads */
+       /* some postprocessing */
        for(i=0;i<count;i++) {
-               ret=pthread_join(pft_thread[i],NULL);
-               if(ret) {
-                       perror("[albe fast] pf thread join");
-                       return ret;
-               }
+               /* calculate global virial */
+               moldyn->gvir.xx+=itom[i].r.x*itom[i].f.x;
+               moldyn->gvir.yy+=itom[i].r.y*itom[i].f.y;
+               moldyn->gvir.zz+=itom[i].r.z*itom[i].f.z;
+               moldyn->gvir.xy+=itom[i].r.y*itom[i].f.x;
+               moldyn->gvir.xz+=itom[i].r.z*itom[i].f.x;
+               moldyn->gvir.yz+=itom[i].r.z*itom[i].f.y;
+
+               /* check forces regarding the given timestep */
+               if(v3_norm(&(itom[i].f))>\
+                   0.1*moldyn->nnd*itom[i].mass/moldyn->tau_square)
+                       printf("[moldyn] WARNING: pfc (high force: atom %d)\n",
+                              i);
        }
 
+       pthread_exit(NULL);
+
        return 0;
 }