/* matrix8.c - Parallel Matrix Multiplication improvement on matrix7: timing compile: mpicc -Wall -O -o matrix8 matrix8.c run: mpirun -np num_procs matrix8 file */ #include #include #include #include /* Aux fcts ------------------------------------------------------- */ int * allocVector(int n) { return (int *)malloc(n*sizeof(int)); } int ** allocMatrix(int m, int n) { int ** newM = (int **)malloc(m*sizeof(int *)); int i; for (i = 0; i < m; i++) newM[i] = allocVector(n); return newM; } void readMatrix(FILE * fin, int ** M, int m, int n) { int i, j; for (i = 0; i < m; i++) for (j = 0; j < n; j++) fscanf(fin, "%d", &(M[i][j])); } void writeMatrix(FILE * fout, int ** M, int m, int n) { int i, j; for (i = 0; i < m; i++) { for (j = 0; j < n; j++) fprintf(fout, "%d ", M[i][j]); putc('\n', fout); } } /* Auxiliary function: transpose m*n matrix into n*m matrix */ int ** transpose(int ** M, int m, int n) { int ** MT; int i, j; MT = allocMatrix(n, m); for (i = 0; i < m; i++) for (j = 0; j < n; j++) MT[j][i] = M[i][j]; return MT; } /* Auxiliary function: dot product of two vectors of length n */ int dotProd(int * V1, int * V2, int n) { int dp = 0; int i; for (i = 0; i < n; i++) dp = dp + V1[i] * V2[i]; return dp; } /* Master: Spread mult M1*M2 (a m*m matrix) over p workers; each computes approx m/p rows */ int ** matrixProdMaster(int ** M1, int ** M2T, int m, int n, int p) { int i, j, r, s; MPI_Status status; int ** M3 = allocMatrix(m,m); s = m/p; r = m%p; // First r workers get sent s+1 rows of M1 for (i = 0; i < r; i++) for (j = 0; j < s+1; j++) MPI_Send(M1[i * (s+1) + j], n, MPI_INT, i+1, 0, MPI_COMM_WORLD); // Remaining p-r workers get sent s rows of M1 for (i = r; i < p; i++) for (j = 0; j < s; j++) MPI_Send(M1[r + i * s + j], n, MPI_INT, i+1, 0, MPI_COMM_WORLD); // All rows of M2T sent to all workers for (j = 0; j < m; j++) for (i = 0; i < p; i++) MPI_Send(M2T[j], n, MPI_INT, i+1, 0, MPI_COMM_WORLD); // First r workers deliver s+1 rows of M3 for (i = 0; i < r; i++) for (j = 0; j < s+1; j++) MPI_Recv(M3[i * (s+1) + j], m, MPI_INT, i+1, 0, MPI_COMM_WORLD, &status); // Remaining p-r workers deliver s rows of M3 for (i = r; i < p; i++) for (j = 0; j < s; j++) MPI_Recv(M3[r + i * s + j], m, MPI_INT, i+1, 0, MPI_COMM_WORLD, &status); return M3; } /* Worker: Compute approx m/p rows of resulting m*m matrix */ void matrixProdWorker(int m, int n, int p, int id) { MPI_Status status; int i, j, s, r; s = m/p; r = m%p; if (id <= r) s++; int ** R = allocMatrix(s, n); int * C = allocVector(n); int ** RR = allocMatrix(s, m); for (i = 0; i < s; i++) MPI_Recv(R[i], n, MPI_INT, 0, 0, MPI_COMM_WORLD, &status); for (j = 0; j < m; j++) { MPI_Recv(C, n, MPI_INT, 0, 0, MPI_COMM_WORLD, &status); for (i = 0; i < s; i++) RR[i][j] = dotProd(R[i], C, n); } for (i = 0; i < s; i++) MPI_Send(RR[i], m, MPI_INT, 0, 0, MPI_COMM_WORLD); } /* Main fct ------------------------------------------------------- */ int main(int argc, char ** argv) { int p, id, m, n; double elapsed_time; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &p); MPI_Comm_rank(MPI_COMM_WORLD, &id); if (id == 0) { /* master porcessor */ FILE * fin = fopen(argv[1], "r"); fscanf(fin, "%d %d", &m, &n); int ** M1 = allocMatrix(m,n); int ** M2 = allocMatrix(n,m); readMatrix(fin, M1, m, n); readMatrix(fin, M2, n, m); fclose(fin); // start the timer MPI_Barrier(MPI_COMM_WORLD); elapsed_time = - MPI_Wtime(); int i; for(i = 1; i < p; i++) { MPI_Send(&m, 1, MPI_INT, i, 0, MPI_COMM_WORLD); MPI_Send(&n, 1, MPI_INT, i, 0, MPI_COMM_WORLD); } if (p > 1) { int ** M2T = transpose(M2, n, m); int ** M3 = matrixProdMaster(M1, M2T, m, n, p-1); // stop the timer elapsed_time += MPI_Wtime(); // write matrix to /dev/null FILE * fout = fopen("/dev/null", "w"); writeMatrix(fout, M3, m, m); // printf("Multiplying %dx%d matrices on %d workers: %f secs\n", m, n, p-1, elapsed_time); // printf("%d %d %2d %2f\n", m, n, p-1, elapsed_time); printf("%d * %d; %2d processors; %f secs\n", m,n,p,elapsed_time); } else printf("Must have at least 2 processors\n"); } else { MPI_Status status; // start the timer MPI_Barrier(MPI_COMM_WORLD); // elapsed_time = - MPI_Wtime(); MPI_Recv(&m, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status); MPI_Recv(&n, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status); matrixProdWorker(m, n, p-1, id); // stop the timer // elapsed_time += MPI_Wtime(); } MPI_Finalize(); return 0; }