描述了使能box_order后,本地进程数据的大小和位置,以及计算所需分配的空间。
C interface:
ptrdiff_t kml_fft_mpi_local_size_3d_transposed_ext(ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, MPI_Comm comm, enum SCALFFT_DECOMPOSE_TYPE_E decomp_type, const int *order, ptrdiff_t *low, ptrdiff_t *high);
ptrdiff_t kml_fftf_mpi_local_size_3d_transposed_ext(ptrdiff_t n0, ptrdiff_t n1, ptrdiff_t n2, MPI_Comm comm, enum SCALFFT_DECOMPOSE_TYPE_E decomp_type, const int *order, ptrdiff_t *low, ptrdiff_t *high);
函数返回一个ptrdiff_t类型的值,表示要分配的buffer元素个数,成功则返回值大于等于0,失败返回-1。
参数名 |
数据类型 |
描述 |
输入/输出 |
---|---|---|---|
n0 |
ptrdiff_t * |
待处理数据第1个维度大小,约束:n0 ≥ 1 |
输入 |
n1 |
ptrdiff_t * |
待处理数据第2个维度大小,约束:n1 ≥ 1 |
输入 |
n2 |
ptrdiff_t * |
待处理数据第3个维度大小,约束:n2 ≥ 1 |
输入 |
comm |
MPI_Comm |
通信域 |
输入 |
decomp_type |
enum SCALFFT_DECOMPOSE_TYPE_E |
分解算法,SCALFFT_DECOMPOSE_TYPE_SLAB,SCALFFT_DECOMPOSE_TYPE_PENCIL,SCALFFT_DECOMPOSE_TYPE_BRICK |
输入 |
order |
int * |
是长度为3的一维数组,输入数据维度的顺序,取值为0、1、2的任意顺序组合,输入NULL,默认order为0、1、2。 |
输入 |
low |
ptrdiff_t * |
本地数据起点 |
输出 |
high |
ptrdiff_t * |
本地数据终点 |
输出 |
C: "kfft-mpi.h"
C interface
const int n0 = 4, n1 = 4, n2 = 4; kml_fft_plan plan; int provided; MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided); MPI_Comm comm = MPI_COMM_WORLD; kml_fft_complex *in = NULL; kml_fft_complex *out = NULL; /* get local data size and allocate */ ptrdiff_t in_low[3] = {0}; ptrdiff_t in_high[3] = {0}; ptrdiff_t out_low[3] = {0}; ptrdiff_t out_high[3] = {0}; ptrdiff_t n[3] = {n0, n1, n2}; int in_order[3] = {0, 1, 2}; int out_order[3] = {1, 2, 0}; ptrdiff_t in_alloc_local = kml_fft_mpi_local_size_3d_transposed_ext(n[0], n[1], n[2], comm, SCALFFT_DECOMPOSE_TYPE_PENCIL, in_order, in_low, in_high); if (in_alloc_local == -1) { printf("[%s][%d] allocate size fail!!!\n", __func__, __LINE__); } ptrdiff_t out_alloc_local = kml_fft_mpi_local_size_3d_transposed_ext(n[0], n[1], n[2], comm, SCALFFT_DECOMPOSE_TYPE_PENCIL, out_order, out_low, out_high); if (out_alloc_local == -1) { printf("[%s][%d] allocate size fail!!!\n", __func__, __LINE__); } in = (kml_fft_complex *)kml_fft_malloc(sizeof(kml_fft_complex) * in_alloc_local); if (in == NULL) { printf("[%s][%d] malloc memory fail!!!\n", __func__, __LINE__); } out = (kml_fft_complex *)kml_fft_malloc(sizeof(kml_fft_complex) * out_alloc_local); if (out == NULL) { printf("[%s][%d] malloc memory fail!!!\n", __func__, __LINE__); } /* create plan */ int in_low_int[3] = {in_low[0], in_low[1], in_low[2]}; int in_high_int[3] = {in_high[0], in_high[1], in_high[2]}; int out_low_int[3] = {out_low[0], out_low[1], out_low[2]}; int out_high_int[3] = {out_high[0], out_high[1], out_high[2]}; kml_fft_mpi_options options = { .a2a_algo = A2A_ALGO_AUTO_TUNING, .decomp_type = SCALFFT_DECOMPOSE_TYPE_PENCIL }; plan = kml_fft_mpi_plan_create(BACKEND_KFFT, in_low_int, in_high_int, in_order, out_low_int, out_high_int, out_order, comm, options); /* execute plan */ int scale = 0; kml_fft_mpi_execute_dft_ext(plan, in, out, scale, KML_FFT_FORWARD); kml_fft_mpi_execute_dft_ext(plan, out, in, scale, KML_FFT_BACKWARD); // kml_fft_mpi_forward_c2c(plan, in, out, scale); // kml_fft_mpi_backward_c2c(plan, out, in, scale); kml_fft_destroy_plan_ext(plan); kml_fft_free(in); kml_fft_free(out); MPI_Finalize();[l1] [l2]