MPI_Cart_Create 使用方法

在使用MPI的过程中可能会涉及到一些针对特定的拓扑关系的模拟情况,如FOX算法求矩阵乘积。此时需要涉及到如何简便地模拟出特定的方阵以及元素之间的关系。MPI提供了一套简便地API可以快速地创建出携带所需要的拓扑关系的MPI_Comm

MPI Process Topology Functions

该系列的函数即是MPI中用于处理相关的拓扑关系的相关工具。常用的包括了MPI_Cart_create等方法,具体的API可以参考MSDN。其中较为常用的包括MPI_Cart_createMPI_Dims_create等方法。

MPI_Cart_create

该方法API如下所示

1
2
3
4
5
6
7
8
int MPIAPI MPI_Cart_create(
      MPI_Comm              comm_old,
      int                   ndims,
      _In_count_(ndims) int *dims,
      _In_count_(ndims) int *periods,
      int                   reorder,
_Out_ MPI_Comm              *comm_cart
);

其中ndims表示维度数量,dims表示全局的各个维度的大小,periods表示(不知道),reorder表示是否按照划分的小块重新计算rank值。

使用该方法后产生的MPI_Comm将会携带所需要的维度信息,例如

1
MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, false, &cart_comm);

执行后,cart_comm携带的维度信息为全局为一个dims[0] * dims[1]的矩阵。

MPI_Dims_create

未使用过,故不确定其真实效果

使用案例

Fox算法求矩阵乘积

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
#include <iostream>
#include <mpi.h>
#include <cmath>
#include <fstream>

using namespace std;

#define RIDX(i, j, dim) (i * dim + j)

const int N = 256;

int main() {
MPI_Init(NULL, NULL);

int rank_num, world_size;
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank_num);

if (pow(int(sqrt(world_size)), 2) != world_size)
printf("Wrong world size\n");


int proc_sqrt = floor(sqrt(static_cast<double>(world_size)));
int n = N / proc_sqrt; // N is big matrix size
int n_sqrt = n * n; // small matrix size (n * n)

/* check for pragmas */
if (world_size < 4)
{
printf("This algorithm requires at least 4 processors\n");
MPI_Finalize();
return 0;
}
if (proc_sqrt * proc_sqrt != world_size)
{
printf("processor count must be square.\n");
MPI_Finalize();
return 0;
}
if (N % proc_sqrt !=0) {
printf("N mod procs_sqrt !=0 ");
MPI_Finalize();
return 0;
}

if (rank_num == 0)
{
printf("Computing %d * %d matrix, submatrix size is %d * %d\n", N, N, n, n);
}

/* create matrixs */
int *A = new int[n_sqrt];
int *B = new int[n_sqrt];
int *C = new int[n_sqrt];
int *T = new int[n_sqrt];

for (int i = 0; i < n; ++i)
{
for (int j = 0; j < n; ++j)
{
A[RIDX(i, j, n)] = (i + j) * rank_num;
B[RIDX(i, j, n)] = (i + j) * rank_num;
C[RIDX(i, j, n)] = 0;
}
}

/* split comm */
MPI_Comm cart_comm, cart_col, cart_row;
int all_rank, col_rank, row_rank;
int cart_coords[2];
int dims[2], periods[2];
dims[0] = dims[1] = proc_sqrt;
periods[0] = periods[1] = true;
/* create global node matrix */
MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, false, &cart_comm);
MPI_Comm_rank(cart_comm, &all_rank);
MPI_Cart_coords(cart_comm, all_rank, 2, cart_coords);
/* split comm via col & row */
MPI_Comm_split(cart_comm, cart_coords[0], cart_coords[1], &cart_row);
MPI_Comm_split(cart_comm, cart_coords[1], cart_coords[0], &cart_col);
MPI_Comm_rank(cart_row, &row_rank);
MPI_Comm_rank(cart_col, &col_rank);


MPI_Request req_send, req_recv;
MPI_Status status;
for (int i = 0; i < proc_sqrt; ++i)
{
/* swap rows of B */
MPI_Isend(B, n_sqrt, MPI_INT, (cart_coords[0] - 1 + proc_sqrt) % proc_sqrt, 1, cart_col, &req_send);

int broader = (i + cart_coords[0]) % proc_sqrt;
if (broader == cart_coords[1]) std::copy(A, A + n_sqrt, T);
/* boardcast A */
MPI_Bcast(T, n_sqrt, MPI_INT, broader, cart_row);

/* local mul */
for (int r = 0; r < n; ++r)
{
for (int c = 0; c < n; ++c)
{
for (int k = 0; k < n; ++k)
{
C[RIDX(r, c, n)] = T[RIDX(r, k, n)] * B[RIDX(k, c, n)];
}
}
}

MPI_Wait(&req_send, &status);
/* finish row swap */
MPI_Recv(T, n_sqrt, MPI_INT, (cart_coords[0] + 1) % proc_sqrt, 1, cart_col, &status);
std::copy(T, T + n_sqrt, B);
}

/* gather global matrix */
int *matrixA = new int[N * N];
int *matrixB = new int[N * N];
int *matrixC = new int[N * N];

MPI_Gather(A, n_sqrt, MPI_INT, matrixA, n_sqrt, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Gather(B, n_sqrt, MPI_INT, matrixB, n_sqrt, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Gather(C, n_sqrt, MPI_INT, matrixC, n_sqrt, MPI_INT, 0, MPI_COMM_WORLD);

if (rank_num == 0)
{

ofstream Af("c.data/a.txt"), Bf("c.data/b.txt"), Cf("c.data/c.txt");
for (int i = 0; i < N; ++i)
{
for (int j = 0; j < N; ++j)
{
Af << matrixA[RIDX(i, j, N)] << "\t";
}
Af << "\n";
}

for (int i = 0; i < N; ++i)
{
for (int j = 0; j < N; ++j)
{
Bf << matrixB[RIDX(i, j, N)] << "\t";
}
Bf << "\n";
}

for (int i = 0; i < N; ++i)
{
for (int j = 0; j < N; ++j)
{
Cf << C[RIDX(i, j, N)] << "\t";
}
Cf << "\n";
}

Af.close(), Bf.close(), Cf.close();
}


/* free resources */
MPI_Comm_free(&cart_comm);
MPI_Comm_free(&cart_col);
MPI_Comm_free(&cart_row);
delete[] A;
delete[] B;
delete[] C;
delete[] T;
delete[] matrixA;
delete[] matrixB;
delete[] matrixC;
MPI_Finalize();
}

MPI_Cart_Create 使用方法
http://anyin233.github.io/2022/12/25/MPI-topology-with-MPI-Cart-Create/
Author
anyin233
Posted on
December 25, 2022
Licensed under