km.h:
... #pragma omp declare simd simdlen(4) notinbranch float expf(float) ...
test.c:
#include <km.h> #include <stdio.h> #include <stdlib.h> int main() { long loop = 1e7; int len = 8192; float *a = (float*)malloc(sizeof(float) * len); float *b = (float*)malloc(sizeof(float) * len); float *d = (float*)malloc(sizeof(float) * len); for (int i = 0; i < len; i++) { a[i] = rand() * 7.7680f - 6.3840f; b[i] = rand() * 8.7680f - 6.3840f; d[i] = 0; } for (int j = 0; j < len; j++) { d[j] = expf(a[j]); } return 1; }
编译指令:
gcc test.c -lkm -lksvml -lm -fopenmp-simd -fno-math-errno -O3
使用nm命令查看调用接口:
nm -D a.out
出现_ZGVnN4v_前缀接口表示调用成功。
#include <km.h> #include <stdio.h> #include <stdlib.h> int main() { long loop = 1e7; int len = 8192; float *a = (float*)malloc(sizeof(float) * len); float *b = (float*)malloc(sizeof(float) * len); float *d = (float*)malloc(sizeof(float) * len); for (int i = 0; i < len; i++) { a[i] = rand() * 7.7680f - 6.3840f; b[i] = rand() * 8.7680f - 6.3840f; d[i] = 0; } for (int i = 0; i < loop; i++) { for (int j = 0; j < len; j++) { d[j] = expf(a[j]); } } return 1; }
则需要添加额外编译选项提示编译器不将外层循环和内存循环合并,编译指令:
gcc test.c -lkm -lksvml -lm -fopenmp-simd -fno-math-errno -O3 -fno-tree-loop-ivcanon -fno-loop-interchange
使用nm命令查看调用接口:
nm -D a.out
出现_ZGVnN4v_前缀接口表示调用成功。