Skip to content

Commit c1c0d5c

Browse files
authored
Merge pull request #3492 from binebrank/arm_sve_zgemm
SVE zgemm&cgemm (and other BLAS 3 complex)
2 parents 8ac2c1d + 19d435b commit c1c0d5c

34 files changed

+7910
-106
lines changed

CONTRIBUTORS.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,3 +201,6 @@ In chronological order:
201201
* Bine Brank <https://github.com/binebrank>
202202
* [2021-10-27] Add vector-length-agnostic DGEMM kernels for Arm SVE
203203
* [2021-11-20] Vector-length-agnostic Arm SVE copy routines for DGEMM, DTRMM, DSYMM
204+
* [2021-11-12] SVE kernels for SGEMM, STRMM and corresponding SVE copy functions
205+
* [2022-01-06] SVE kernels for CGEMM, ZGEMM, CTRMM, ZTRMM and corresponding SVE copy functions
206+
* [2022-01-18] SVE kernels and copy functions for TRSM

kernel/CMakeLists.txt

Lines changed: 78 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -323,55 +323,93 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
323323

324324

325325
#hemm
326-
GenerateNamedObjects("generic/zhemm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "" "hemm_iutcopy" false "" "" false ${float_type})
327-
GenerateNamedObjects("generic/zhemm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "hemm_iltcopy" false "" "" false ${float_type})
326+
if (NOT DEFINED ${float_char}HEMMUTCOPY_M)
327+
set(HEMMUTCOPY_M "generic/zhemm_utcopy_${${float_char}GEMM_UNROLL_M}.c")
328+
set(HEMMLTCOPY_M "generic/zhemm_ltcopy_${${float_char}GEMM_UNROLL_M}.c")
329+
else ()
330+
set(HEMMUTCOPY_M "${KERNELDIR}/${${float_char}HEMMUTCOPY_M}")
331+
set(HEMMLTCOPY_M "${KERNELDIR}/${${float_char}HEMMLTCOPY_M}")
332+
endif()
333+
GenerateNamedObjects(${HEMMUTCOPY_M} "" "hemm_iutcopy" false "" "" false ${float_type})
334+
GenerateNamedObjects(${HEMMLTCOPY_M} "LOWER" "hemm_iltcopy" false "" "" false ${float_type})
328335
GenerateNamedObjects("generic/zhemm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "hemm_outcopy" false "" "" false ${float_type})
329336
GenerateNamedObjects("generic/zhemm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "hemm_oltcopy" false "" "" false ${float_type})
330337

331338
# symm for c and z
339+
if (NOT DEFINED ${float_char}SYMMUCOPY_M)
340+
set(SYMMUCOPY_M "generic/zsymm_ucopy_${${float_char}GEMM_UNROLL_M}.c")
341+
set(SYMMLCOPY_M "generic/zsymm_lcopy_${${float_char}GEMM_UNROLL_M}.c")
342+
else ()
343+
set(SYMMUCOPY_M "${KERNELDIR}/${${float_char}SYMMUCOPY_M}")
344+
set(SYMMLCOPY_M "${KERNELDIR}/${${float_char}SYMMLCOPY_M}")
345+
endif()
332346
GenerateNamedObjects("generic/zsymm_ucopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "symm_outcopy" false "" "" false ${float_type})
333-
GenerateNamedObjects("generic/zsymm_ucopy_${${float_char}GEMM_UNROLL_M}.c" "" "symm_iutcopy" false "" "" false ${float_type})
347+
GenerateNamedObjects(${SYMMUCOPY_M} "" "symm_iutcopy" false "" "" false ${float_type})
334348

335349
GenerateNamedObjects("generic/zsymm_lcopy_${${float_char}GEMM_UNROLL_N}.c" "LOWER;OUTER" "symm_oltcopy" false "" "" false ${float_type})
336-
GenerateNamedObjects("generic/zsymm_lcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "symm_iltcopy" false "" "" false ${float_type})
350+
GenerateNamedObjects(${SYMMLCOPY_M} "LOWER" "symm_iltcopy" false "" "" false ${float_type})
351+
337352

338-
GenerateNamedObjects("generic/ztrmm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trmm_iunucopy" false "" "" false ${float_type})
339-
GenerateNamedObjects("generic/ztrmm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "" "trmm_iunncopy" false "" "" false ${float_type})
353+
if (NOT DEFINED ${float_char}TRMMUNCOPY_M)
354+
set(TRMMUNCOPY_M "generic/ztrmm_uncopy_${${float_char}GEMM_UNROLL_M}.c")
355+
set(TRMMLNCOPY_M "generic/ztrmm_lncopy_${${float_char}GEMM_UNROLL_M}.c")
356+
set(TRMMUTCOPY_M "generic/ztrmm_utcopy_${${float_char}GEMM_UNROLL_M}.c")
357+
set(TRMMLTCOPY_M "generic/ztrmm_ltcopy_${${float_char}GEMM_UNROLL_M}.c")
358+
else ()
359+
set(TRMMUNCOPY_M "${KERNELDIR}/${${float_char}TRMMUNCOPY_M}")
360+
set(TRMMLNCOPY_M "${KERNELDIR}/${${float_char}TRMMLNCOPY_M}")
361+
set(TRMMUTCOPY_M "${KERNELDIR}/${${float_char}TRMMUTCOPY_M}")
362+
set(TRMMLTCOPY_M "${KERNELDIR}/${${float_char}TRMMLTCOPY_M}")
363+
endif ()
364+
GenerateNamedObjects(${TRMMUNCOPY_M} "UNIT" "trmm_iunucopy" false "" "" false ${float_type})
365+
GenerateNamedObjects(${TRMMUNCOPY_M} "" "trmm_iunncopy" false "" "" false ${float_type})
340366
GenerateNamedObjects("generic/ztrmm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trmm_ounucopy" false "" "" false ${float_type})
341367
GenerateNamedObjects("generic/ztrmm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trmm_ounncopy" false "" "" false ${float_type})
342368

343-
GenerateNamedObjects("generic/ztrmm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trmm_ilnucopy" false "" "" false ${float_type})
344-
GenerateNamedObjects("generic/ztrmm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trmm_ilnncopy" false "" "" false ${float_type})
369+
GenerateNamedObjects(${TRMMLNCOPY_M} "LOWER;UNIT" "trmm_ilnucopy" false "" "" false ${float_type})
370+
GenerateNamedObjects(${TRMMLNCOPY_M} "LOWER" "trmm_ilnncopy" false "" "" false ${float_type})
345371
GenerateNamedObjects("generic/ztrmm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trmm_olnucopy" false "" "" false ${float_type})
346372
GenerateNamedObjects("generic/ztrmm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trmm_olnncopy" false "" "" false ${float_type})
347373

348-
GenerateNamedObjects("generic/ztrmm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trmm_iutucopy" false "" "" false ${float_type})
349-
GenerateNamedObjects("generic/ztrmm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "" "trmm_iutncopy" false "" "" false ${float_type})
374+
GenerateNamedObjects(${TRMMUTCOPY_M} "UNIT" "trmm_iutucopy" false "" "" false ${float_type})
375+
GenerateNamedObjects(${TRMMUTCOPY_M} "" "trmm_iutncopy" false "" "" false ${float_type})
350376
GenerateNamedObjects("generic/ztrmm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trmm_outucopy" false "" "" false ${float_type})
351377
GenerateNamedObjects("generic/ztrmm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trmm_outncopy" false "" "" false ${float_type})
352378

353-
GenerateNamedObjects("generic/ztrmm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trmm_iltucopy" false "" "" false ${float_type})
354-
GenerateNamedObjects("generic/ztrmm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trmm_iltncopy" false "" "" false ${float_type})
379+
GenerateNamedObjects(${TRMMLTCOPY_M} "LOWER;UNIT" "trmm_iltucopy" false "" "" false ${float_type})
380+
GenerateNamedObjects(${TRMMLTCOPY_M} "LOWER" "trmm_iltncopy" false "" "" false ${float_type})
355381
GenerateNamedObjects("generic/ztrmm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trmm_oltucopy" false "" "" false ${float_type})
356382
GenerateNamedObjects("generic/ztrmm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trmm_oltncopy" false "" "" false ${float_type})
357383

358-
GenerateNamedObjects("generic/ztrsm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trsm_iunucopy" false "" "" false ${float_type})
359-
GenerateNamedObjects("generic/ztrsm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "" "trsm_iunncopy" false "" "" false ${float_type})
384+
385+
if (NOT DEFINED ZTRSMCOPYLN_M)
386+
set(ZTRSMUNCOPY_M "generic/ztrsm_uncopy_${${float_char}GEMM_UNROLL_M}.c")
387+
set(ZTRSMLNCOPY_M "generic/ztrsm_lncopy_${${float_char}GEMM_UNROLL_M}.c")
388+
set(ZTRSMUTCOPY_M "generic/ztrsm_utcopy_${${float_char}GEMM_UNROLL_M}.c")
389+
set(ZTRSMLTCOPY_M "generic/ztrsm_ltcopy_${${float_char}GEMM_UNROLL_M}.c")
390+
else ()
391+
set(ZTRSMUNCOPY_M "${KERNELDIR}/${ZTRSMCOPYUN_M}")
392+
set(ZTRSMLNCOPY_M "${KERNELDIR}/${ZTRSMCOPYLN_M}")
393+
set(ZTRSMUTCOPY_M "${KERNELDIR}/${ZTRSMCOPYUT_M}")
394+
set(ZTRSMLTCOPY_M "${KERNELDIR}/${ZTRSMCOPYLT_M}")
395+
endif ()
396+
GenerateNamedObjects(${ZTRSMUNCOPY_M} "UNIT" "trsm_iunucopy" false "" "" false ${float_type})
397+
GenerateNamedObjects(${ZTRSMUNCOPY_M} "" "trsm_iunncopy" false "" "" false ${float_type})
360398
GenerateNamedObjects("generic/ztrsm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_ounucopy" false "" "" false ${float_type})
361399
GenerateNamedObjects("generic/ztrsm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trsm_ounncopy" false "" "" false ${float_type})
362400

363-
GenerateNamedObjects("generic/ztrsm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_ilnucopy" false "" "" false ${float_type})
364-
GenerateNamedObjects("generic/ztrsm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trsm_ilnncopy" false "" "" false ${float_type})
401+
GenerateNamedObjects(${ZTRSMLNCOPY_M} "LOWER;UNIT" "trsm_ilnucopy" false "" "" false ${float_type})
402+
GenerateNamedObjects(${ZTRSMLNCOPY_M} "LOWER" "trsm_ilnncopy" false "" "" false ${float_type})
365403
GenerateNamedObjects("generic/ztrsm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_olnucopy" false "" "" false ${float_type})
366404
GenerateNamedObjects("generic/ztrsm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_olnncopy" false "" "" false ${float_type})
367405

368-
GenerateNamedObjects("generic/ztrsm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trsm_iutucopy" false "" "" false ${float_type})
369-
GenerateNamedObjects("generic/ztrsm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "" "trsm_iutncopy" false "" "" false ${float_type})
406+
GenerateNamedObjects(${ZTRSMUTCOPY_M} "UNIT" "trsm_iutucopy" false "" "" false ${float_type})
407+
GenerateNamedObjects(${ZTRSMUTCOPY_M} "" "trsm_iutncopy" false "" "" false ${float_type})
370408
GenerateNamedObjects("generic/ztrsm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_outucopy" false "" "" false ${float_type})
371409
GenerateNamedObjects("generic/ztrsm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trsm_outncopy" false "" "" false ${float_type})
372410

373-
GenerateNamedObjects("generic/ztrsm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_iltucopy" false "" "" false ${float_type})
374-
GenerateNamedObjects("generic/ztrsm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trsm_iltncopy" false "" "" false ${float_type})
411+
GenerateNamedObjects(${ZTRSMLTCOPY_M} "LOWER;UNIT" "trsm_iltucopy" false "" "" false ${float_type})
412+
GenerateNamedObjects(${ZTRSMLTCOPY_M} "LOWER" "trsm_iltncopy" false "" "" false ${float_type})
375413
GenerateNamedObjects("generic/ztrsm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy" false "" "" false ${float_type})
376414
GenerateNamedObjects("generic/ztrsm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_oltncopy" false "" "" false ${float_type})
377415

@@ -465,23 +503,35 @@ endif ()
465503
GenerateNamedObjects("generic/trmm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trmm_oltucopy" false "" "" false ${float_type})
466504
GenerateNamedObjects("generic/trmm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trmm_oltncopy" false "" "" false ${float_type})
467505

468-
GenerateNamedObjects("generic/trsm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trsm_iunucopy" false "" "" false ${float_type})
469-
GenerateNamedObjects("generic/trsm_uncopy_${${float_char}GEMM_UNROLL_M}.c" "" "trsm_iunncopy" false "" "" false ${float_type})
506+
507+
if (NOT DEFINED TRSMCOPYLN_M)
508+
set(TRSMUNCOPY_M "generic/trsm_uncopy_${${float_char}GEMM_UNROLL_M}.c")
509+
set(TRSMLNCOPY_M "generic/trsm_lncopy_${${float_char}GEMM_UNROLL_M}.c")
510+
set(TRSMUTCOPY_M "generic/trsm_utcopy_${${float_char}GEMM_UNROLL_M}.c")
511+
set(TRSMLTCOPY_M "generic/trsm_ltcopy_${${float_char}GEMM_UNROLL_M}.c")
512+
else ()
513+
set(TRSMUNCOPY_M "${KERNELDIR}/${TRSMCOPYUN_M}")
514+
set(TRSMLNCOPY_M "${KERNELDIR}/${TRSMCOPYLN_M}")
515+
set(TRSMUTCOPY_M "${KERNELDIR}/${TRSMCOPYUT_M}")
516+
set(TRSMLTCOPY_M "${KERNELDIR}/${TRSMCOPYLT_M}")
517+
endif ()
518+
GenerateNamedObjects(${TRSMUNCOPY_M} "UNIT" "trsm_iunucopy" false "" "" false ${float_type})
519+
GenerateNamedObjects(${TRSMUNCOPY_M} "" "trsm_iunncopy" false "" "" false ${float_type})
470520
GenerateNamedObjects("generic/trsm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_ounucopy" false "" "" false ${float_type})
471521
GenerateNamedObjects("generic/trsm_uncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trsm_ounncopy" false "" "" false ${float_type})
472522

473-
GenerateNamedObjects("generic/trsm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_ilnucopy" false "" "" false ${float_type})
474-
GenerateNamedObjects("generic/trsm_lncopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trsm_ilnncopy" false "" "" false ${float_type})
523+
GenerateNamedObjects(${TRSMLNCOPY_M} "LOWER;UNIT" "trsm_ilnucopy" false "" "" false ${float_type})
524+
GenerateNamedObjects(${TRSMLNCOPY_M} "LOWER" "trsm_ilnncopy" false "" "" false ${float_type})
475525
GenerateNamedObjects("generic/trsm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_olnucopy" false "" "" false ${float_type})
476526
GenerateNamedObjects("generic/trsm_lncopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_olnncopy" false "" "" false ${float_type})
477527

478-
GenerateNamedObjects("generic/trsm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "UNIT" "trsm_iutucopy" false "" "" false ${float_type})
479-
GenerateNamedObjects("generic/trsm_utcopy_${${float_char}GEMM_UNROLL_M}.c" "" "trsm_iutncopy" false "" "" false ${float_type})
528+
GenerateNamedObjects(${TRSMUTCOPY_M} "UNIT" "trsm_iutucopy" false "" "" false ${float_type})
529+
GenerateNamedObjects(${TRSMUTCOPY_M} "" "trsm_iutncopy" false "" "" false ${float_type})
480530
GenerateNamedObjects("generic/trsm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_outucopy" false "" "" false ${float_type})
481531
GenerateNamedObjects("generic/trsm_utcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER" "trsm_outncopy" false "" "" false ${float_type})
482532

483-
GenerateNamedObjects("generic/trsm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_iltucopy" false "" "" false ${float_type})
484-
GenerateNamedObjects("generic/trsm_ltcopy_${${float_char}GEMM_UNROLL_M}.c" "LOWER" "trsm_iltncopy" false "" "" false ${float_type})
533+
GenerateNamedObjects(${TRSMLTCOPY_M} "LOWER;UNIT" "trsm_iltucopy" false "" "" false ${float_type})
534+
GenerateNamedObjects(${TRSMLTCOPY_M} "LOWER" "trsm_iltncopy" false "" "" false ${float_type})
485535
GenerateNamedObjects("generic/trsm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy" false "" "" false ${float_type})
486536
GenerateNamedObjects("generic/trsm_ltcopy_${${float_char}GEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_oltncopy" false "" "" false ${float_type})
487537

0 commit comments

Comments
 (0)