Has anyone ever used MPI_THREAD_MULTIPLE with thread intensive applications? All my applications hang and found that this MPI feature was only lightly tested and doesn't work in all cases. Does anyone have a workaround?
Below is a simple application which fails to complete due to MPI:
#include <mpi.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdarg.h>
#include <pthread.h>
#include <sys/time.h>
#define NUM_CELLS 5 //Total number of integers to sort
/* Initial process to create numbers and send to first cell */
void *start(void *data)
{
int i, num;
time_t t;
srand((unsigned) time(&t));
//Create array of random numbers and print
for(i = 0; i < NUM_CELLS; i++)
{
num = rand() % 100;
printf("0 SEND\n");
MPI_Send(&num, 1, MPI_INT, 1, 1, MPI_COMM_WORLD);
printf("0 SENT\n");
}
return NULL;
}
/* Process for individual sort cell in sort pump */
void *sort_cell(void *data)
{
int *pos = (int *)data;
int num=2, i;
for(i = 0; i < NUM_CELLS; i++)
{
//Receive num
printf("%d WAIT\n", *pos);
if(*pos == 1)
MPI_Recv(&num, 1, MPI_INT, 0, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
else if(*pos == 2)
MPI_Recv(&num, 1, MPI_INT, 1, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
else if(*pos == 3)
MPI_Recv(&num, 1, MPI_INT, 2, 3, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
else if(*pos == 4)
MPI_Recv(&num, 1, MPI_INT, 3, 4, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
else if(*pos == 5)
MPI_Recv(&num, 1, MPI_INT, 4, 5, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
printf("%d RECV\n", *pos);
//Send number to next cell
printf("%d SEND\n", *pos);
if(*pos == 1)
MPI_Send(&num, 1, MPI_INT, 2, 2, MPI_COMM_WORLD);
else if(*pos == 2)
MPI_Send(&num, 1, MPI_INT, 3, 3, MPI_COMM_WORLD);
else if(*pos == 3)
MPI_Send(&num, 1, MPI_INT, 4, 4, MPI_COMM_WORLD);
else if(*pos == 4)
MPI_Send(&num, 1, MPI_INT, 1, 5, MPI_COMM_WORLD);
printf("%d SENT\n", *pos);
}
return NULL;
}
int main(int argc, char **argv)
{
int i;
double elapsedTime;
struct timeval t1, t2;
//Start timer
gettimeofday(&t1, NULL);
int my_rank, provided;
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
//Stop timer
gettimeofday(&t2, NULL);
elapsedTime = (t2.tv_sec - t1.tv_sec) * 1000.0; //sec to ms
elapsedTime += (t2.tv_usec - t1.tv_usec) / 1000.0; //us to ms
printf("Rank %d - Setup time: %f milliseconds\n", my_rank, elapsedTime);
//Start timer
gettimeofday(&t1, NULL);
//Execute processes in parallel
if(my_rank == 0)
{
int num_threads = 1;
pthread_t threads[num_threads];
pthread_create(&threads[0], NULL, start, NULL);
for(i = 0; i < num_threads; i++)
(void) pthread_join(threads[i], NULL);
}
if(my_rank == 1)
{
int pos = 1;
int num_threads = 2;
pthread_t threads[num_threads];
pthread_create(&threads[0], NULL, sort_cell, (void *) &pos);
int pos1 = 5;
pthread_create(&threads[1], NULL, sort_cell, (void *) &pos1);
for(i = 0; i < num_threads; i++)
(void) pthread_join(threads[i], NULL);
}
if(my_rank == 2)
{
int pos = 2;
int num_threads = 1;
pthread_t threads[num_threads];
pthread_create(&threads[0], NULL, sort_cell, (void *) &pos);
for(i = 0; i < num_threads; i++)
(void) pthread_join(threads[i], NULL);
}
if(my_rank == 3)
{
int pos = 3;
int num_threads = 1;
pthread_t threads[num_threads];
pthread_create(&threads[0], NULL, sort_cell, (void *) &pos);
for(i = 0; i < num_threads; i++)
(void) pthread_join(threads[i], NULL);
}
if(my_rank == 4)
{
int pos = 4;
int num_threads = 1;
pthread_t threads[num_threads];
pthread_create(&threads[0], NULL, sort_cell, (void *) &pos);
for(i = 0; i < num_threads; i++)
(void) pthread_join(threads[i], NULL);
}
MPI_Barrier(MPI_COMM_WORLD);
//Stop timer
gettimeofday(&t2, NULL);
elapsedTime = (t2.tv_sec - t1.tv_sec) * 1000.0; //sec to ms
elapsedTime += (t2.tv_usec - t1.tv_usec) / 1000.0; //us to ms
printf("Rank %d - Execution time: %f milliseconds\n", my_rank, elapsedTime);
MPI_Finalize();
return 0;
}
:
0 Answer(s)