Has anyone ever used MPI_THREAD_MULTIPLE with thread intensive applications? All my applications hang and found that this MPI feature was only lightly tested and doesn't work in all cases. Does anyone have a workaround?
Below is a simple application which fails to complete due to MPI:
#include <mpi.h> 
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdarg.h>
#include <pthread.h>
#include <sys/time.h>
#define NUM_CELLS 5		//Total number of integers to sort
/* Initial process to create numbers and send to first cell */ 
void *start(void *data)
{
	int i, num;
	time_t t;
	srand((unsigned) time(&t));
	//Create array of random numbers and print
	for(i = 0; i < NUM_CELLS; i++) 
	{
		num = rand() % 100;
    	printf("0 SEND\n");
		MPI_Send(&num, 1, MPI_INT, 1, 1, MPI_COMM_WORLD); 
    	printf("0 SENT\n");
  }
  return NULL;
}
/* Process for individual sort cell in sort pump */
void *sort_cell(void *data)
{	
  	int *pos = (int *)data;
	int num=2, i;
	for(i = 0; i < NUM_CELLS; i++)
	{
		//Receive num
		printf("%d WAIT\n", *pos);           
		if(*pos == 1) 
			  MPI_Recv(&num, 1, MPI_INT, 0, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
		else if(*pos == 2)
			  MPI_Recv(&num, 1, MPI_INT, 1, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
		else if(*pos == 3)
			  MPI_Recv(&num, 1, MPI_INT, 2, 3, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
		else if(*pos == 4)
			  MPI_Recv(&num, 1, MPI_INT, 3, 4, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
		else if(*pos == 5)
			  MPI_Recv(&num, 1, MPI_INT, 4, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
		printf("%d RECV\n", *pos);
			
		//Send number to next cell
		printf("%d SEND\n", *pos); 
		if(*pos == 1)
		  MPI_Send(&num, 1, MPI_INT, 2, 2, MPI_COMM_WORLD);    
		else if(*pos == 2)
		  MPI_Send(&num, 1, MPI_INT, 3, 3, MPI_COMM_WORLD);
		else if(*pos == 3) 
		  MPI_Send(&num, 1, MPI_INT, 4, 4, MPI_COMM_WORLD);    
		else if(*pos == 4) 
		  MPI_Send(&num, 1, MPI_INT, 1, 5, MPI_COMM_WORLD);
		printf("%d SENT\n", *pos);
  }	
  return NULL;
}
int main(int argc, char **argv)
{
   int i;
   double elapsedTime;
   struct timeval t1, t2;
   //Start timer
   gettimeofday(&t1, NULL);
   int my_rank, provided; 
   MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);	
   MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
	
   //Stop timer
   gettimeofday(&t2, NULL);
   elapsedTime = (t2.tv_sec - t1.tv_sec) * 1000.0;      //sec to ms
   elapsedTime += (t2.tv_usec - t1.tv_usec) / 1000.0;   //us to ms
   printf("Rank %d - Setup time: %f milliseconds\n", my_rank, elapsedTime);
   //Start timer
   gettimeofday(&t1, NULL);
  //Execute processes in parallel
  if(my_rank == 0)
  {
    int num_threads = 1;
    
    pthread_t threads[num_threads];
    
    pthread_create(&threads[0], NULL, start, NULL);
    
    for(i = 0; i < num_threads; i++)
    	(void) pthread_join(threads[i], NULL); 
  }
  if(my_rank == 1)
  {
	int pos = 1;
    int num_threads = 2;
    
    pthread_t threads[num_threads];
    
    pthread_create(&threads[0], NULL, sort_cell, (void *) &pos);
	int pos1 = 5;
    pthread_create(&threads[1], NULL, sort_cell, (void *) &pos1);
    
    for(i = 0; i < num_threads; i++)
    	(void) pthread_join(threads[i], NULL); 
  }
  if(my_rank == 2)
  {
	int pos = 2;
    int num_threads = 1;
    
    pthread_t threads[num_threads];
    
    pthread_create(&threads[0], NULL, sort_cell, (void *) &pos);
    
    for(i = 0; i < num_threads; i++)
    	(void) pthread_join(threads[i], NULL);
  }
  if(my_rank == 3)
  {
	int pos = 3;
    int num_threads = 1;
    
    pthread_t threads[num_threads];
    
    pthread_create(&threads[0], NULL, sort_cell, (void *) &pos);
    
    for(i = 0; i < num_threads; i++)
    	(void) pthread_join(threads[i], NULL);
  }
  if(my_rank == 4)
  {
	int pos = 4;
    int num_threads = 1;
    
    pthread_t threads[num_threads];
    
    pthread_create(&threads[0], NULL, sort_cell, (void *) &pos);
    
    for(i = 0; i < num_threads; i++)
    	(void) pthread_join(threads[i], NULL);
  }
  MPI_Barrier(MPI_COMM_WORLD);
   //Stop timer
  gettimeofday(&t2, NULL);
   elapsedTime = (t2.tv_sec - t1.tv_sec) * 1000.0;      //sec to ms
  elapsedTime += (t2.tv_usec - t1.tv_usec) / 1000.0;   //us to ms
   printf("Rank %d - Execution time: %f milliseconds\n", my_rank, elapsedTime);
   MPI_Finalize();
	
   return 0;
}
:
                       
                    
0 Answer(s)