P?GETRF

计算矩阵A的LU分解，允许行交换（partial pivoting）。

分解结果为A=PLU，其中P为置换矩阵、L为下三角矩阵或下阶梯矩阵且对角线为1，U是上三角矩阵或上阶梯矩阵。

接口定义

C Interface：

void psgetrf_(const int *m, const int *n, float *a, const int *ia, const int *ja, const int *desca, int *ipiv, int *info)

void pdgetrf_(const int *m, const int *n, double*a, const int *ia, const int *ja, const int *desca, int *ipiv, int *info)

void pcgetrf_(const int *m, const int *n, float _Complex *a, const int *ia, const int *ja, const int *desca, int *ipiv, int *info)

void pzgetrf_(const int *m, const int *n, double _Complex*a, const int *ia, const int *ja, const int *desca, int *ipiv, int *info)

Fortran Interface：

PSGETRF(m, n, a, ia, ja, desca, ipiv, info)

PDGETRF(m, n, a, ia, ja, desca, ipiv, info)

PCGETRF(m, n, a, ia, ja, desca, ipiv, info)

PZGETRF(m, n, a, ia, ja, desca, ipiv, info)

参数

参数	类型	范围	说明	输入/输出
m	整型	全局	要操作的行数，比如子矩阵的行数。	输入
n	整型	全局	要操作的列数，比如子矩阵的列数。	输入
a	在psgetrf中为单精度浮点型数组。在pdgetrf中为双精度浮点型数组。在pcgetrf中为单精度复数型数组。在pzgetrf中为双精度复数型数组。	本地	调用前保存分布式矩阵A的本地M*N部分。调用后保存本地部分存放的分解结果L和U，不保存L的对角线元素（均为1）。	输入，输出
ia	整型	全局	子矩阵在全局矩阵中的行索引。	输入
ja	整型	全局	子矩阵在全局矩阵中的列索引。	输入
desca	整型数组	本地，全局	分布式矩阵A的矩阵描述符。	输入
ipiv	整型	本地	包含了主元信息。	输出
info	整型	全局	执行结果：等于0：成功。小于0：第-info个参数值不合法。大于0：矩阵U对角线上第info个元素为0，矩阵分解完成但U是奇异的，会导致求解线程方程组时出现除以零的错误。	输出

依赖

#include <kscalapack.h>

示例

C Interface：

    int izero=0;
    int ione=1;
    int myrank_mpi, nprocs_mpi;
    MPI_Init( &argc, &argv);
    MPI_Comm_rank(MPI_COMM_WORLD, &myrank_mpi);
    MPI_Comm_size(MPI_COMM_WORLD, &nprocs_mpi);
 
    int n = 8;       // (Global) Matrix size
    int nprow = 2;   // Number of row procs
    int npcol = 2;   // Number of column procs
    int nb = 4;      // (Global) Block size
    char uplo='L';   // Matrix is lower triangular
    char layout='R'; // Block cyclic, Row major processor mapping
    
    printf("Usage: ./test matrix_size block_size nprocs_row nprocs_col\n");
 
    if(argc > 1) {
        n = atoi(argv[1]);
    }
    if(argc > 2) {
        nb = atoi(argv[2]);
    }
    if(argc > 3) {
        nprow = atoi(argv[3]);
    }
    if(argc > 4) {
        npcol = atoi(argv[4]);
    }
    assert(nprow * npcol == nprocs_mpi);
    // Initialize BLACS
    int iam, nprocs;
    int zero = 0;
    int ictxt, myrow, mycol;
    blacs_pinfo_(&iam, &nprocs) ; // BLACS rank and world size
    blacs_get_(&zero, &zero, &ictxt ); // -> Create context
    blacs_gridinit_(&ictxt, &layout, &nprow, &npcol ); // Context -> Initialize the grid
    blacs_gridinfo_(&ictxt, &nprow, &npcol, &myrow, &mycol ); // Context -> Context grid info (# procs row/col, current procs row/col)
 
    // Compute the size of the local matrices
    int mpA    = numroc_( &n, &nb, &myrow, &izero, &nprow ); // My proc -> row of local A
    int nqA    = numroc_( &n, &nb, &mycol, &izero, &npcol ); // My proc -> col of local A
   
    //allocate and fill the matrix A
    double *A;
    A = (double *)calloc(mpA*nqA,sizeof(double)) ;
    if (A==NULL){ printf("Error of memory allocation A on proc %dx%d\n",myrow,mycol); exit(0); }
    int k = 0;
    for (int j = 0; j < nqA; j++) { // local col
        int l_j = j / nb; // which block
        int x_j = j % nb; // where within that block
        int J   = (l_j * npcol + mycol) * nb + x_j; // global col
        for (int i = 0; i < mpA; i++) { // local row
            int l_i = i / nb; // which block
            int x_i = i % nb; // where within that block
            int I   = (l_i * nprow + myrow) * nb + x_i; // global row
            assert(I < n);
            assert(J < n);
            if(I == J) {
                A[k] = i + j + (rand())%10;
            } else {
                A[k] = i + j + 5  + (rand())%10;
            }
            //printf("%d %d -> %d %d -> %f\n", i, j, I, J, A[k]);
            k++;
        }
    }
    //creat descriptor
    int descA[9];
    int info=0;
    int ipiv[10] = {0};
    int lddA = mpA > 1 ? mpA : 1;
    descinit_( descA,  &n, &n, &nb, &nb, &izero, &izero, &ictxt, &lddA, &info);
    if(info != 0) {
        printf("Error in descinit, info = %d\n", info);
    }
    
    //run pdpotrf_ and time
    double MPIt1 = MPI_Wtime();
    printf("[%dx%d] Starting \n", myrow, mycol);
    pdgetrf_(&n, &n, A, &ione, &ione, descA, ipiv,&info);
    if (info != 0) {
        printf("Error in caculate, info = %d\n", info);
    }
    double MPIt2 = MPI_Wtime();
    printf("[%dx%d] Done, time %e s.\n", myrow, mycol, MPIt2 - MPIt1);
    free(A);
 
    //exit and finanlize
    blacs_gridexit_(&ictxt);
    MPI_Finalize();
    return 0;
   
    /* 
     * Origin A: 
     *  
    4.00000 19.0000 25.0000 16.0000 19.0000 19.0000 25.0000 16.0000
    22.0000 8.00000 17.0000 25.0000 22.0000 21.0000 17.0000 25.0000
    23.0000 22.0000 7.00000 19.0000 23.0000 22.0000 18.0000 19.0000
    21.0000 18.0000 23.0000 13.0000 21.0000 18.0000 23.0000 22.0000
    19.0000 19.0000 25.0000 16.0000 4.00000 19.0000 25.0000 16.0000
    22.0000 21.0000 17.0000 25.0000 22.0000 8.00000 17.0000 25.0000
    23.0000 22.0000 18.0000 19.0000 23.0000 22.0000 7.00000 19.0000
    21.0000 18.0000 23.0000 22.0000 21.0000 18.0000 23.0000 13.0000
    */

    // After PDGETRF: 
    /*
    23.0000 22.0000 7.00000 19.0000 23.0000 22.0000 18.0000 19.0000
    0.173913 15.1739 23.7826 12.6957 15.0000 15.1739 21.8696 12.6957
    0.956522 -0.859599 30.7479 17.7393 12.8940 13.0000 18.5817 17.7393
    0.913043 -0.137536 0.646538 -14.0708 -6.27341 -8.40499 -2.44069 -5.07082
    0.826087 0.0544413 0.582891 0.762348 -18.5499 -1.17005 -0.0305972 -6.86113
    0.956522 -0.00286533 0.337340 -0.0624197 0.253277 -17.6137 -6.56767 2.29955
    1.00000 0.00000 0.357749 0.451018 0.0961398 0.0424351 -16.2651 -3.49711
    0.913043 -0.137536 0.646538 0.360379 0.216315 0.290848 -0.0218688 -11.5045
    */

父主题： 矩阵分解函数