Skip to content

Commit b7c0fa6

Browse files
committed
Init AMD Bulldozer codebase.
1 parent 7110d17 commit b7c0fa6

File tree

8 files changed

+172
-8
lines changed

8 files changed

+172
-8
lines changed

Makefile.system

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -277,14 +277,14 @@ ifeq ($(ARCH), x86)
277277
DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \
278278
CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
279279
ifneq ($(NO_AVX), 1)
280-
DYNAMIC_CORE += SANDYBRIDGE
280+
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER
281281
endif
282282
endif
283283

284284
ifeq ($(ARCH), x86_64)
285285
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
286286
ifneq ($(NO_AVX), 1)
287-
DYNAMIC_CORE += SANDYBRIDGE
287+
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER
288288
endif
289289
endif
290290

cpuid.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,8 @@
125125
#define HAVE_MISALIGNSSE (1 << 15)
126126
#define HAVE_128BITFPU (1 << 16)
127127
#define HAVE_FASTMOVU (1 << 17)
128-
#define HAVE_AVX (1 << 18)
128+
#define HAVE_AVX (1 << 18)
129+
#define HAVE_FMA4 (1 << 19)
129130

130131
#define CACHE_INFO_L1_I 1
131132
#define CACHE_INFO_L1_D 2

cpuid_x86.c

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@
4343
#ifdef NO_AVX
4444
#define CPUTYPE_SANDYBRIDGE CPUTYPE_NEHALEM
4545
#define CORE_SANDYBRIDGE CORE_NEHALEM
46+
#define CPUTYPE_BULLDOZER CPUTYPE_BARCELONA
47+
#define CORE_BULLDOZER CORE_BARCELONA
4648
#endif
4749

4850
#ifndef CPUIDEMU
@@ -228,6 +230,9 @@ int get_cputype(int gettype){
228230
cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
229231
if ((ecx & (1 << 6)) != 0) feature |= HAVE_SSE4A;
230232
if ((ecx & (1 << 7)) != 0) feature |= HAVE_MISALIGNSSE;
233+
#ifndef NO_AVX
234+
if ((ecx & (1 << 16)) != 0) feature |= HAVE_FMA4;
235+
#endif
231236
if ((edx & (1 << 30)) != 0) feature |= HAVE_3DNOWEX;
232237
if ((edx & (1 << 31)) != 0) feature |= HAVE_3DNOW;
233238
}
@@ -1075,8 +1080,12 @@ int get_cpuname(void){
10751080
return CPUTYPE_OPTERON;
10761081
case 1:
10771082
case 10:
1078-
case 6: //AMD Bulldozer Opteron 6200 / Opteron 4200 / AMD FX-Series
10791083
return CPUTYPE_BARCELONA;
1084+
case 6: //AMD Bulldozer Opteron 6200 / Opteron 4200 / AMD FX-Series
1085+
if(support_avx())
1086+
return CPUTYPE_BULLDOZER;
1087+
else
1088+
return CPUTYPE_BARCELONA; //OS don't support AVX.
10801089
case 5:
10811090
return CPUTYPE_BOBCAT;
10821091
}
@@ -1427,8 +1436,13 @@ int get_coretype(void){
14271436
if (family == 0xf){
14281437
if ((exfamily == 0) || (exfamily == 2)) return CORE_OPTERON;
14291438
else if (exfamily == 5) return CORE_BOBCAT;
1430-
else if (exfamily == 6) return CORE_BARCELONA; //AMD Bulldozer Opteron 6200 / Opteron 4200 / AMD FX-Series
1431-
else return CORE_BARCELONA;
1439+
else if (exfamily == 6) {
1440+
//AMD Bulldozer Opteron 6200 / Opteron 4200 / AMD FX-Series
1441+
if(support_avx())
1442+
return CORE_BULLDOZER;
1443+
else
1444+
return CORE_BARCELONA; //OS don't support AVX. Use old kernels.
1445+
}else return CORE_BARCELONA;
14321446
}
14331447
}
14341448

driver/others/dynamic.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,11 @@ extern gotoblas_t gotoblas_BARCELONA;
6363
extern gotoblas_t gotoblas_BOBCAT;
6464
#ifndef NO_AVX
6565
extern gotoblas_t gotoblas_SANDYBRIDGE;
66+
extern gotoblas_t gotoblas_BULLDOZER;
6667
#else
6768
//Use NEHALEM kernels for sandy bridge
6869
#define gotoblas_SANDYBRIDGE gotoblas_NEHALEM
70+
#define gotoblas_BULLDOZER gotoblas_BARCELONA
6971
#endif
7072

7173

@@ -202,6 +204,14 @@ static gotoblas_t *get_coretype(void){
202204
else return &gotoblas_OPTERON;
203205
} else if (exfamily == 5) {
204206
return &gotoblas_BOBCAT;
207+
} else if (exfamily == 6) {
208+
//AMD Bulldozer Opteron 6200 / Opteron 4200 / AMD FX-Series
209+
if(support_avx())
210+
return &gotoblas_BULLDOZER;
211+
else{
212+
fprintf(stderr, "OpenBLAS : Your OS doesn't support AVX. Use Barcelona kernels.\n");
213+
return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
214+
}
205215
} else {
206216
return &gotoblas_BARCELONA;
207217
}
@@ -238,6 +248,7 @@ static char *corename[] = {
238248
"Nano",
239249
"Sandybridge",
240250
"Bobcat",
251+
"Bulldozer",
241252
};
242253

243254
char *gotoblas_corename(void) {
@@ -259,6 +270,7 @@ char *gotoblas_corename(void) {
259270
if (gotoblas == &gotoblas_NANO) return corename[15];
260271
if (gotoblas == &gotoblas_SANDYBRIDGE) return corename[16];
261272
if (gotoblas == &gotoblas_BOBCAT) return corename[17];
273+
if (gotoblas == &gotoblas_BULLDOZER) return corename[18];
262274

263275
return corename[0];
264276
}

getarch.c

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
350350
#define CORENAME "OPTERON"
351351
#endif
352352

353-
#if defined(FORCE_BARCELONA) || defined(FORCE_SHANGHAI) || defined(FORCE_ISTANBUL) || defined (FORCE_BULLDOZER)
353+
#if defined(FORCE_BARCELONA) || defined(FORCE_SHANGHAI) || defined(FORCE_ISTANBUL)
354354
#define FORCE
355355
#define FORCE_INTEL
356356
#define ARCHITECTURE "X86"
@@ -380,6 +380,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
380380
#define CORENAME "BOBCAT"
381381
#endif
382382

383+
#if defined (FORCE_BULLDOZER)
384+
#define FORCE
385+
#define FORCE_INTEL
386+
#define ARCHITECTURE "X86"
387+
#define SUBARCHITECTURE "BULLDOZER"
388+
#define ARCHCONFIG "-DBARCELONA " \
389+
"-DL1_DATA_SIZE=49152 -DL1_DATA_LINESIZE=64 " \
390+
"-DL2_SIZE=1024000 -DL2_LINESIZE=64 -DL3_SIZE=16777216 " \
391+
"-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 " \
392+
"-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 " \
393+
"-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU" \
394+
"-DHAVE_AVX -DHAVE_FMA4"
395+
#define LIBNAME "bulldozer"
396+
#define CORENAME "BULLDOZER"
397+
#endif
398+
383399
#ifdef FORCE_SSE_GENERIC
384400
#define FORCE
385401
#define FORCE_INTEL

kernel/x86/KERNEL.BULLDOZER

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
SGEMMKERNEL = gemm_kernel_4x4_barcelona.S
2+
SGEMMINCOPY =
3+
SGEMMITCOPY =
4+
SGEMMONCOPY = ../generic/gemm_ncopy_4.c
5+
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
6+
SGEMMINCOPYOBJ =
7+
SGEMMITCOPYOBJ =
8+
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
9+
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
10+
DGEMMKERNEL = gemm_kernel_2x4_barcelona.S
11+
DGEMMINCOPY = ../generic/gemm_ncopy_2.c
12+
DGEMMITCOPY = ../generic/gemm_tcopy_2.c
13+
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
14+
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
15+
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
16+
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
17+
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
18+
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
19+
CGEMMKERNEL = zgemm_kernel_2x2_barcelona.S
20+
CGEMMINCOPY =
21+
CGEMMITCOPY =
22+
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
23+
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
24+
CGEMMINCOPYOBJ =
25+
CGEMMITCOPYOBJ =
26+
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
27+
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
28+
ZGEMMKERNEL = zgemm_kernel_1x2_barcelona.S
29+
ZGEMMINCOPY = ../generic/zgemm_ncopy_1.c
30+
ZGEMMITCOPY = ../generic/zgemm_tcopy_1.c
31+
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
32+
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
33+
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
34+
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
35+
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
36+
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
37+
38+
STRSMKERNEL_LN = trsm_kernel_LN_4x4_sse.S
39+
STRSMKERNEL_LT = trsm_kernel_LT_4x4_sse.S
40+
STRSMKERNEL_RN = trsm_kernel_LT_4x4_sse.S
41+
STRSMKERNEL_RT = trsm_kernel_RT_4x4_sse.S
42+
43+
DTRSMKERNEL_LN = trsm_kernel_LN_2x4_sse2.S
44+
DTRSMKERNEL_LT = trsm_kernel_LT_2x4_sse2.S
45+
DTRSMKERNEL_RN = trsm_kernel_LT_2x4_sse2.S
46+
DTRSMKERNEL_RT = trsm_kernel_RT_2x4_sse2.S
47+
48+
CTRSMKERNEL_LN = ztrsm_kernel_LN_2x2_sse.S
49+
CTRSMKERNEL_LT = ztrsm_kernel_LT_2x2_sse.S
50+
CTRSMKERNEL_RN = ztrsm_kernel_LT_2x2_sse.S
51+
CTRSMKERNEL_RT = ztrsm_kernel_RT_2x2_sse.S
52+
53+
ZTRSMKERNEL_LN = ztrsm_kernel_LT_1x2_sse2.S
54+
ZTRSMKERNEL_LT = ztrsm_kernel_LT_1x2_sse2.S
55+
ZTRSMKERNEL_RN = ztrsm_kernel_LT_1x2_sse2.S
56+
ZTRSMKERNEL_RT = ztrsm_kernel_RT_1x2_sse2.S
57+
58+
CGEMM3MKERNEL = zgemm3m_kernel_4x4_barcelona.S
59+
ZGEMM3MKERNEL = zgemm3m_kernel_2x4_barcelona.S

kernel/x86_64/KERNEL.BULLDOZER

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
ZGEMVNKERNEL = zgemv_n_dup.S
2+
ZGEMVTKERNEL = zgemv_t_dup.S
3+
4+
SGEMMKERNEL = gemm_kernel_8x4_barcelona.S
5+
SGEMMINCOPY = ../generic/gemm_ncopy_8.c
6+
SGEMMITCOPY = ../generic/gemm_tcopy_8.c
7+
SGEMMONCOPY = gemm_ncopy_4_opteron.S
8+
SGEMMOTCOPY = gemm_tcopy_4_opteron.S
9+
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
10+
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
11+
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
12+
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
13+
DGEMMKERNEL = gemm_kernel_4x4_barcelona.S
14+
DGEMMINCOPY =
15+
DGEMMITCOPY =
16+
DGEMMONCOPY = gemm_ncopy_4_opteron.S
17+
DGEMMOTCOPY = gemm_tcopy_4_opteron.S
18+
DGEMMINCOPYOBJ =
19+
DGEMMITCOPYOBJ =
20+
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
21+
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
22+
CGEMMKERNEL = zgemm_kernel_4x2_barcelona.S
23+
CGEMMINCOPY = ../generic/zgemm_ncopy_4.c
24+
CGEMMITCOPY = ../generic/zgemm_tcopy_4.c
25+
CGEMMONCOPY = zgemm_ncopy_2.S
26+
CGEMMOTCOPY = zgemm_tcopy_2.S
27+
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
28+
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
29+
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
30+
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
31+
ZGEMMKERNEL = zgemm_kernel_2x2_barcelona.S
32+
ZGEMMINCOPY =
33+
ZGEMMITCOPY =
34+
ZGEMMONCOPY = zgemm_ncopy_2.S
35+
ZGEMMOTCOPY = zgemm_tcopy_2.S
36+
ZGEMMINCOPYOBJ =
37+
ZGEMMITCOPYOBJ =
38+
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
39+
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
40+
41+
STRSMKERNEL_LN = trsm_kernel_LN_8x4_sse.S
42+
STRSMKERNEL_LT = trsm_kernel_LT_8x4_sse.S
43+
STRSMKERNEL_RN = trsm_kernel_LT_8x4_sse.S
44+
STRSMKERNEL_RT = trsm_kernel_RT_8x4_sse.S
45+
46+
DTRSMKERNEL_LN = trsm_kernel_LN_4x4_barcelona.S
47+
DTRSMKERNEL_LT = trsm_kernel_LT_4x4_barcelona.S
48+
DTRSMKERNEL_RN = trsm_kernel_LT_4x4_barcelona.S
49+
DTRSMKERNEL_RT = trsm_kernel_RT_4x4_barcelona.S
50+
51+
CTRSMKERNEL_LN = ztrsm_kernel_LN_4x2_sse.S
52+
CTRSMKERNEL_LT = ztrsm_kernel_LT_4x2_sse.S
53+
CTRSMKERNEL_RN = ztrsm_kernel_LT_4x2_sse.S
54+
CTRSMKERNEL_RT = ztrsm_kernel_RT_4x2_sse.S
55+
56+
ZTRSMKERNEL_LN = ztrsm_kernel_LN_2x2_sse2.S
57+
ZTRSMKERNEL_LT = ztrsm_kernel_LT_2x2_sse2.S
58+
ZTRSMKERNEL_RN = ztrsm_kernel_LT_2x2_sse2.S
59+
ZTRSMKERNEL_RT = ztrsm_kernel_RT_2x2_sse2.S
60+
61+
CGEMM3MKERNEL = zgemm3m_kernel_8x4_barcelona.S
62+
ZGEMM3MKERNEL = zgemm3m_kernel_4x4_barcelona.S

param.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
143143

144144
#endif
145145

146-
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT)
146+
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER)
147147

148148
#define SNUMOPT 8
149149
#define DNUMOPT 4

0 commit comments

Comments
 (0)