Skip to content

Commit a7b1f9b

Browse files
committed
Implementation of BF16 based gemv
1. Add a new API -- sbgemv to support bfloat16 based gemv 2. Implement a generic kernel for sbgemv 3. Implement an avx512-bf16 based kernel for sbgemv Signed-off-by: Chen, Guobing <guobing.chen@intel.com>
1 parent 67f39ad commit a7b1f9b

24 files changed

+5111
-16
lines changed

cblas.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -393,6 +393,7 @@ void cblas_sbf16tos(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *in, OPE
393393
void cblas_dbf16tod(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *in, OPENBLAS_CONST blasint incin, double *out, OPENBLAS_CONST blasint incout);
394394
/* dot production of BFLOAT16 input arrays, and output as float */
395395
float cblas_sbdot(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST bfloat16 *y, OPENBLAS_CONST blasint incy);
396+
void cblas_sbgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS_CONST bfloat16 *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST bfloat16 *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float beta, float *y, OPENBLAS_CONST blasint incy);
396397

397398
#ifdef __cplusplus
398399
}

cmake/kernel.cmake

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,8 +184,8 @@ macro(SetDefaultL2)
184184
set(XHEMV_V_KERNEL ../generic/zhemv_k.c)
185185
set(XHEMV_M_KERNEL ../generic/zhemv_k.c)
186186
if (BUILD_BFLOAT16)
187-
set(SBGEMVNKERNEL ../arm/gemv_n.c)
188-
set(SBGEMVTKERNEL ../arm/gemv_t.c)
187+
set(SBGEMVNKERNEL ../x86_64/sbgemv_n.c)
188+
set(SBGEMVTKERNEL ../x86_64/sbgemv_t.c)
189189
set(SHGERKERNEL ../generic/ger.c)
190190
endif ()
191191
endmacro ()

common_interface.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,8 @@ void BLASFUNC(xgeru)(blasint *, blasint *, xdouble *, xdouble *, blasint *,
250250
void BLASFUNC(xgerc)(blasint *, blasint *, xdouble *, xdouble *, blasint *,
251251
xdouble *, blasint *, xdouble *, blasint *);
252252

253+
void BLASFUNC(sbgemv)(char *, blasint *, blasint *, float *, bfloat16 *, blasint *,
254+
bfloat16 *, blasint *, float *, float *, blasint *);
253255
void BLASFUNC(sgemv)(char *, blasint *, blasint *, float *, float *, blasint *,
254256
float *, blasint *, float *, float *, blasint *);
255257
void BLASFUNC(dgemv)(char *, blasint *, blasint *, double *, double *, blasint *,

common_level2.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,10 @@
4444
extern "C" {
4545
#endif
4646

47+
int sbgemv_n(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG);
48+
int sbgemv_t(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG);
49+
int sbgemv_thread_n(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG, int);
50+
int sbgemv_thread_t(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG, int);
4751
int sger_k (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
4852
int dger_k (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
4953
int qger_k (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *);

common_macro.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -646,10 +646,12 @@
646646

647647
#elif defined(BFLOAT16)
648648

649-
#define D_TO_BF16_K SBDTOBF16_K
650-
#define D_BF16_TO_K DBF16TOD_K
651-
#define S_TO_BF16_K SBSTOBF16_K
652-
#define S_BF16_TO_K SBF16TOS_K
649+
#define D_TO_BF16_K SBDTOBF16_K
650+
#define D_BF16_TO_K DBF16TOD_K
651+
#define S_TO_BF16_K SBSTOBF16_K
652+
#define S_BF16_TO_K SBF16TOS_K
653+
#define SBGEMV_N SBGEMV_N_K
654+
#define SBGEMV_T SBGEMV_T_K
653655

654656
#define AMAX_K SAMAX_K
655657
#define AMIN_K SAMIN_K

common_param.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,8 @@ BLASLONG (*isbmin_k) (BLASLONG, float *, BLASLONG);
7878
int (*sbscal_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
7979
int (*sbswap_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
8080

81-
int (*sbgemv_n) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
82-
int (*sbgemv_t) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
81+
int (*sbgemv_n) (BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG);
82+
int (*sbgemv_t) (BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG);
8383
int (*sbger_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
8484

8585
int (*sbsymv_L) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);

common_sb.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
#define SBDTOBF16_K sbdtobf16_k
99
#define SBF16TOS_K sbf16tos_k
1010
#define DBF16TOD_K dbf16tod_k
11+
#define SBGEMV_N_K sbgemv_n
12+
#define SBGEMV_T_K sbgemv_t
1113

1214
#define SBGEMM_ONCOPY sbgemm_oncopy
1315
#define SBGEMM_OTCOPY sbgemm_otcopy
@@ -29,6 +31,8 @@
2931
#define SBDTOBF16_K gotoblas -> sbdtobf16_k
3032
#define SBF16TOS_K gotoblas -> sbf16tos_k
3133
#define DBF16TOD_K gotoblas -> dbf16tod_k
34+
#define SBGEMV_N_K gotoblas -> sbgemv_n
35+
#define SBGEMV_T_K gotoblas -> sbgemv_t
3236

3337
#define SBGEMM_ONCOPY gotoblas -> sbgemm_oncopy
3438
#define SBGEMM_OTCOPY gotoblas -> sbgemm_otcopy

driver/level2/Makefile

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,13 @@ XBLASOBJS += \
413413
xtbmv_thread_RUU.$(SUFFIX) xtbmv_thread_RUN.$(SUFFIX) \
414414
xtbmv_thread_RLU.$(SUFFIX) xtbmv_thread_RLN.$(SUFFIX) \
415415
xtbmv_thread_CUU.$(SUFFIX) xtbmv_thread_CUN.$(SUFFIX) \
416-
xtbmv_thread_CLU.$(SUFFIX) xtbmv_thread_CLN.$(SUFFIX) \
416+
xtbmv_thread_CLU.$(SUFFIX) xtbmv_thread_CLN.$(SUFFIX)
417+
418+
ifeq ($(BUILD_BFLOAT16),1)
419+
SBBLASOBJS += \
420+
sbgemv_thread_n$(TSUFFIX).$(SUFFIX) \
421+
sbgemv_thread_t$(TSUFFIX).$(SUFFIX)
422+
endif
417423

418424
endif
419425

@@ -3693,4 +3699,12 @@ xtrsv_CUU.$(SUFFIX) xtrsv_CUU.$(PSUFFIX) : ztrsv_L.c ../../param.h
36933699
xtrsv_CUN.$(SUFFIX) xtrsv_CUN.$(PSUFFIX) : ztrsv_L.c ../../param.h
36943700
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=4 -UUNIT $< -o $(@F)
36953701

3702+
ifeq ($(BUILD_BFLOAT16),1)
3703+
sbgemv_thread_n.$(SUFFIX) sbgemv_thread_n.$(PSUFFIX) : sbgemv_thread.c ../../common.h
3704+
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F)
3705+
sbgemv_thread_t.$(SUFFIX) sbgemv_thread_t.$(PSUFFIX) : sbgemv_thread.c ../../common.h
3706+
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DTRANSA -UCONJ -UXCONJ $< -o $(@F)
3707+
endif
3708+
3709+
36963710
include ../../Makefile.tail

driver/level2/sbgemv_thread.c

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
/*********************************************************************/
2+
/* Copyright 2009, 2010 The University of Texas at Austin. */
3+
/* All rights reserved. */
4+
/* */
5+
/* Redistribution and use in source and binary forms, with or */
6+
/* without modification, are permitted provided that the following */
7+
/* conditions are met: */
8+
/* */
9+
/* 1. Redistributions of source code must retain the above */
10+
/* copyright notice, this list of conditions and the following */
11+
/* disclaimer. */
12+
/* */
13+
/* 2. Redistributions in binary form must reproduce the above */
14+
/* copyright notice, this list of conditions and the following */
15+
/* disclaimer in the documentation and/or other materials */
16+
/* provided with the distribution. */
17+
/* */
18+
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
19+
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
20+
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
21+
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
22+
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
23+
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
24+
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
25+
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
26+
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
27+
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
28+
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
29+
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
30+
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
31+
/* POSSIBILITY OF SUCH DAMAGE. */
32+
/* */
33+
/* The views and conclusions contained in the software and */
34+
/* documentation are those of the authors and should not be */
35+
/* interpreted as representing official policies, either expressed */
36+
/* or implied, of The University of Texas at Austin. */
37+
/*********************************************************************/
38+
39+
#include <stdio.h>
40+
#include <stdlib.h>
41+
#include "common.h"
42+
43+
#ifndef TRANSA
44+
#define SBGEMV SBGEMV_N
45+
#else
46+
#define SBGEMV SBGEMV_T
47+
#endif
48+
49+
static int sbgemv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *dummy1, FLOAT *dummy2, BLASLONG dummy3){
50+
51+
bfloat16 *a, *x;
52+
float *y;
53+
BLASLONG lda, incx, incy;
54+
BLASLONG m_from, m_to, n_from, n_to;
55+
56+
a = (bfloat16 *)args->a;
57+
x = (bfloat16 *)args->b;
58+
y = (float *)args->c;
59+
60+
lda = args->lda;
61+
incx = args->ldb;
62+
incy = args->ldc;
63+
64+
#ifndef TRANSA // N
65+
m_from = *(range_m + 0);
66+
m_to = *(range_m + 1);
67+
n_from = 0;
68+
n_to = args -> n;
69+
a += m_from;
70+
y += m_from * incy;
71+
#else // T
72+
m_from = 0;
73+
m_to = args->m;
74+
n_from = *(range_n + 0);
75+
n_to = *(range_n + 1);
76+
a += n_from * lda;
77+
y += n_from * incy;
78+
#endif
79+
80+
SBGEMV(m_to - m_from, n_to - n_from, *((FLOAT *)(args->alpha)), a, lda, x, incx, *((FLOAT *)(args->beta)), y, incy);
81+
82+
return 0;
83+
}
84+
85+
int CNAME(BLASLONG m, BLASLONG n, float alpha, bfloat16 *a, BLASLONG lda, bfloat16 *x, BLASLONG incx, float beta, float *y, BLASLONG incy, int threads)
86+
{
87+
blas_arg_t args;
88+
blas_queue_t queue[MAX_CPU_NUMBER];
89+
BLASLONG range[MAX_CPU_NUMBER + 1];
90+
91+
#ifndef TRANSA
92+
BLASLONG width_for_split = m;
93+
#else
94+
BLASLONG width_for_split = n;
95+
#endif
96+
97+
BLASLONG BLOCK_WIDTH = width_for_split/threads;
98+
99+
int mode = BLAS_BFLOAT16 | BLAS_REAL;
100+
101+
args.m = m;
102+
args.n = n;
103+
args.a = (void *)a;
104+
args.b = (void *)x;
105+
args.c = (void *)y;
106+
args.lda = lda;
107+
args.ldb = incx;
108+
args.ldc = incy;
109+
args.alpha = (void *)&alpha;
110+
args.beta = (void *)&beta;
111+
112+
range[0] = 0;
113+
114+
int thread_idx;
115+
116+
for (thread_idx=0; thread_idx<threads; thread_idx++) {
117+
if (thread_idx != threads-1) {
118+
range[thread_idx + 1] = range[thread_idx] + BLOCK_WIDTH;
119+
} else {
120+
range[thread_idx + 1] = range[thread_idx] + width_for_split;
121+
}
122+
123+
queue[thread_idx].mode = mode;
124+
queue[thread_idx].routine = sbgemv_kernel;
125+
queue[thread_idx].args = &args;
126+
#ifndef TRANSA
127+
queue[thread_idx].range_m = &range[thread_idx];
128+
queue[thread_idx].range_n = NULL;
129+
#else
130+
queue[thread_idx].range_m = NULL;
131+
queue[thread_idx].range_n = &range[thread_idx];
132+
#endif
133+
queue[thread_idx].sa = NULL;
134+
queue[thread_idx].sb = NULL;
135+
queue[thread_idx].next = &queue[thread_idx + 1];
136+
137+
width_for_split -= BLOCK_WIDTH;
138+
}
139+
140+
if (thread_idx) {
141+
queue[0].sa = NULL;
142+
queue[0].sb = NULL;
143+
queue[thread_idx - 1].next = NULL;
144+
145+
exec_blas(thread_idx, queue);
146+
}
147+
148+
return 0;
149+
}

driver/others/blas_server_omp.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,6 @@ fprintf(stderr,"UNHANDLED COMPLEX\n");
352352
/* Other types in future */
353353
}
354354
}
355-
if (!sb) fprintf(stderr,"SB not declared!!!\n");
356355
queue->sb=sb;
357356
}
358357
}

exports/gensymbol

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151
zgeadd, dzsum);
5252

5353
@blasobjs = (lsame, xerbla);
54-
@bfblasobjs = (sbgemm, sbdot, sbstobf16, sbdtobf16, sbf16tos, dbf16tod);
54+
@bfblasobjs = (sbgemm, sbgemv, sbdot, sbstobf16, sbdtobf16, sbf16tos, dbf16tod);
5555
@cblasobjsc = (
5656
cblas_caxpy, cblas_ccopy, cblas_cdotc, cblas_cdotu, cblas_cgbmv, cblas_cgemm, cblas_cgemv,
5757
cblas_cgerc, cblas_cgeru, cblas_chbmv, cblas_chemm, cblas_chemv, cblas_cher2, cblas_cher2k,
@@ -94,7 +94,7 @@
9494

9595
@cblasobjs = ( cblas_xerbla );
9696

97-
@bfcblasobjs = (cblas_sbgemm, cblas_sbdot, cblas_sbstobf16, cblas_sbdtobf16, cblas_sbf16tos, cblas_dbf16tod);
97+
@bfcblasobjs = (cblas_sbgemm, cblas_sbgemv, cblas_sbdot, cblas_sbstobf16, cblas_sbdtobf16, cblas_sbf16tos, cblas_dbf16tod);
9898

9999
@exblasobjs = (
100100
qamax,qamin,qasum,qaxpy,qcabs1,qcopy,qdot,qgbmv,qgemm,

interface/Makefile

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ SBLAS3OBJS = \
4848

4949
ifeq ($(BUILD_BFLOAT16),1)
5050
SBBLAS1OBJS = sbdot.$(SUFFIX)
51+
SBBLAS2OBJS = sbgemv.$(SUFFIX)
5152
SBBLAS3OBJS = sbgemm.$(SUFFIX)
5253
SBEXTOBJS = sbstobf16.$(SUFFIX) sbdtobf16.$(SUFFIX) sbf16tos.$(SUFFIX) dbf16tod.$(SUFFIX)
5354
endif
@@ -284,6 +285,7 @@ CSBLAS3OBJS = \
284285

285286
ifeq ($(BUILD_BFLOAT16),1)
286287
CSBBLAS1OBJS = cblas_sbdot.$(SUFFIX)
288+
CSBBLAS2OBJS = cblas_sbgemv.$(SUFFIX)
287289
CSBBLAS3OBJS = cblas_sbgemm.$(SUFFIX)
288290
CSBEXTOBJS = cblas_sbstobf16.$(SUFFIX) cblas_sbdtobf16.$(SUFFIX) cblas_sbf16tos.$(SUFFIX) cblas_dbf16tod.$(SUFFIX)
289291
endif
@@ -382,6 +384,7 @@ SBLAS1OBJS += $(CSBLAS1OBJS)
382384
SBLAS2OBJS += $(CSBLAS2OBJS)
383385
SBLAS3OBJS += $(CSBLAS3OBJS)
384386
SBBLAS1OBJS += $(CSBBLAS1OBJS)
387+
SBBLAS2OBJS += $(CSBBLAS2OBJS)
385388
SBBLAS3OBJS += $(CSBBLAS3OBJS)
386389
DBLAS1OBJS += $(CDBLAS1OBJS)
387390
DBLAS2OBJS += $(CDBLAS2OBJS)
@@ -399,7 +402,7 @@ CBAUXOBJS += $(CXERBLAOBJ)
399402
endif
400403

401404
SBLASOBJS = $(SBLAS1OBJS) $(SBLAS2OBJS) $(SBLAS3OBJS)
402-
SBBLASOBJS = $(SBBLAS1OBJS) $(SBBLAS3OBJS)
405+
SBBLASOBJS = $(SBBLAS1OBJS) $(SBBLAS2OBJS) $(SBBLAS3OBJS)
403406
DBLASOBJS = $(DBLAS1OBJS) $(DBLAS2OBJS) $(DBLAS3OBJS)
404407
QBLASOBJS = $(QBLAS1OBJS) $(QBLAS2OBJS) $(QBLAS3OBJS)
405408
CBLASOBJS = $(CBLAS1OBJS) $(CBLAS2OBJS) $(CBLAS3OBJS)
@@ -538,7 +541,7 @@ clean ::
538541
level1 : $(SBEXTOBJS) $(SBBLAS1OBJS) $(SBLAS1OBJS) $(DBLAS1OBJS) $(QBLAS1OBJS) $(CBLAS1OBJS) $(ZBLAS1OBJS) $(XBLAS1OBJS)
539542
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^
540543

541-
level2 : $(SBLAS2OBJS) $(DBLAS2OBJS) $(QBLAS2OBJS) $(CBLAS2OBJS) $(ZBLAS2OBJS) $(XBLAS2OBJS)
544+
level2 : $(SBBLAS2OBJS) $(SBLAS2OBJS) $(DBLAS2OBJS) $(QBLAS2OBJS) $(CBLAS2OBJS) $(ZBLAS2OBJS) $(XBLAS2OBJS)
542545
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^
543546

544547
level3 : $(SBBLAS3OBJS) $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS)
@@ -929,6 +932,11 @@ xgeru.$(SUFFIX) xgeru.$(PSUFFIX) : zger.c
929932
xgerc.$(SUFFIX) xgerc.$(PSUFFIX) : zger.c
930933
$(CC) -c $(CFLAGS) -DCONJ $< -o $(@F)
931934

935+
ifeq ($(BUILD_BFLOAT16),1)
936+
sbgemv.$(SUFFIX) sbgemv.$(PSUFFIX) : sbgemv.c
937+
$(CC) $(CFLAGS) -c $< -o $(@F)
938+
endif
939+
932940
ifndef USE_NETLIB_GEMV
933941
sgemv.$(SUFFIX) sgemv.$(PSUFFIX): gemv.c
934942
$(CC) -c $(CFLAGS) -o $(@F) $<
@@ -1656,6 +1664,11 @@ cblas_csscal.$(SUFFIX) cblas_csscal.$(PSUFFIX) : zscal.c
16561664
cblas_zdscal.$(SUFFIX) cblas_zdscal.$(PSUFFIX) : zscal.c
16571665
$(CC) $(CFLAGS) -DCBLAS -c -DSSCAL $< -o $(@F)
16581666

1667+
ifeq ($(BUILD_BFLOAT16),1)
1668+
cblas_sbgemv.$(SUFFIX) cblas_sbgemv.$(PSUFFIX) : sbgemv.c
1669+
$(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F)
1670+
endif
1671+
16591672
cblas_sgemv.$(SUFFIX) cblas_sgemv.$(PSUFFIX): gemv.c
16601673
$(CC) -DCBLAS -c $(CFLAGS) -o $(@F) $<
16611674

interface/gemv.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,6 @@ void CNAME(enum CBLAS_ORDER order,
191191
}
192192

193193
#endif
194-
//printf("m=%d, n=%d, trans=%d, incx=%d, incy=%d, alpha=%f, beta=%f\n", m, n, trans, incx, incy, alpha, beta);
195194
if ((m==0) || (n==0)) return;
196195

197196
lenx = n;

0 commit comments

Comments
 (0)