Skip to content

Commit 9f28161

Browse files
committed
BENCH: add benchmarks using codspeed.io
1 parent f034745 commit 9f28161

File tree

9 files changed

+1099
-0
lines changed

9 files changed

+1099
-0
lines changed

.github/workflows/codspeed-bench.yml

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
name: Run codspeed benchmarks
2+
3+
on: [push, pull_request]
4+
5+
concurrency:
6+
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
7+
cancel-in-progress: true
8+
9+
permissions:
10+
contents: read # to fetch code (actions/checkout)
11+
12+
jobs:
13+
benchmarks:
14+
if: "github.repository == 'OpenMathLib/OpenBLAS'"
15+
strategy:
16+
fail-fast: false
17+
matrix:
18+
os: [ubuntu-latest]
19+
fortran: [gfortran]
20+
build: [make]
21+
pyver: ["3.12"]
22+
runs-on: ${{ matrix.os }}
23+
steps:
24+
- uses: actions/checkout@v3
25+
- uses: actions/setup-python@v3
26+
with:
27+
python-version: ${{ matrix.pyver }}
28+
29+
- name: Print system information
30+
run: |
31+
if [ "$RUNNER_OS" == "Linux" ]; then
32+
cat /proc/cpuinfo
33+
fi
34+
35+
- name: Install Dependencies
36+
run: |
37+
if [ "$RUNNER_OS" == "Linux" ]; then
38+
sudo apt-get update
39+
sudo apt-get install -y gfortran cmake ccache libtinfo5
40+
else
41+
echo "::error::$RUNNER_OS not supported"
42+
exit 1
43+
fi
44+
45+
- name: Compilation cache
46+
uses: actions/cache@v3
47+
with:
48+
path: ~/.ccache
49+
# We include the commit sha in the cache key, as new cache entries are
50+
# only created if there is no existing entry for the key yet.
51+
# GNU make and cmake call the compilers differently. It looks like
52+
# that causes the cache to mismatch. Keep the ccache for both build
53+
# tools separate to avoid polluting each other.
54+
key: ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}-${{ github.sha }}
55+
# Restore a matching ccache cache entry. Prefer same branch and same Fortran compiler.
56+
restore-keys: |
57+
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}
58+
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}
59+
ccache-${{ runner.os }}-${{ matrix.build }}
60+
61+
- name: Write out the .pc
62+
run: |
63+
cd benchmark/pybench
64+
cat > openblas.pc << EOF
65+
libdir=${{ github.workspace }}
66+
includedir= ${{ github.workspace }}
67+
openblas_config= OpenBLAS 0.3.27 DYNAMIC_ARCH NO_AFFINITY Haswell MAX_THREADS=64
68+
version=0.0.99
69+
extralib=-lm -lpthread -lgfortran -lquadmath -L${{ github.workspace }} -lopenblas
70+
Name: openblas
71+
Description: OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version
72+
Version: ${version}
73+
URL: https://github.com/xianyi/OpenBLAS
74+
Libs: ${{ github.workspace }}/libopenblas.so -Wl,-rpath,${{ github.workspace }}
75+
Libs.private: -lm -lpthread -lgfortran -lquadmath -L${{ github.workspace }} -lopenblas
76+
Cflags: -I${{ github.workspace}}
77+
EOF
78+
cat openblas.pc
79+
80+
- name: Configure ccache
81+
run: |
82+
if [ "${{ matrix.build }}" = "make" ]; then
83+
# Add ccache to path
84+
if [ "$RUNNER_OS" = "Linux" ]; then
85+
echo "/usr/lib/ccache" >> $GITHUB_PATH
86+
elif [ "$RUNNER_OS" = "macOS" ]; then
87+
echo "$(brew --prefix)/opt/ccache/libexec" >> $GITHUB_PATH
88+
else
89+
echo "::error::$RUNNER_OS not supported"
90+
exit 1
91+
fi
92+
fi
93+
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB).
94+
test -d ~/.ccache || mkdir -p ~/.ccache
95+
echo "max_size = 300M" > ~/.ccache/ccache.conf
96+
echo "compression = true" >> ~/.ccache/ccache.conf
97+
ccache -s
98+
99+
- name: Build OpenBLAS
100+
run: |
101+
case "${{ matrix.build }}" in
102+
"make")
103+
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0 FC="ccache ${{ matrix.fortran }}"
104+
;;
105+
"cmake")
106+
mkdir build && cd build
107+
cmake -DDYNAMIC_ARCH=1 \
108+
-DNOFORTRAN=0 \
109+
-DBUILD_WITHOUT_LAPACK=0 \
110+
-DCMAKE_VERBOSE_MAKEFILE=ON \
111+
-DCMAKE_BUILD_TYPE=Release \
112+
-DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \
113+
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
114+
-DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \
115+
..
116+
cmake --build .
117+
;;
118+
*)
119+
echo "::error::Configuration not supported"
120+
exit 1
121+
;;
122+
esac
123+
124+
- name: Show ccache status
125+
continue-on-error: true
126+
run: ccache -s
127+
128+
- name: Install benchmark dependencies
129+
run: pip install meson ninja numpy pytest pytest-codspeed --user
130+
131+
- name: Build the wrapper
132+
run: |
133+
cd benchmark/pybench
134+
export PKG_CONFIG_PATH=$PWD
135+
meson setup build --prefix=$PWD/build-install
136+
meson install -C build
137+
#
138+
# sanity check
139+
cd build/openblas_wrap
140+
python -c'import _flapack; print(dir(_flapack))'
141+
142+
- name: Run benchmarks
143+
uses: CodSpeedHQ/action@v2
144+
with:
145+
token: ${{ secrets.CODSPEED_TOKEN }}
146+
run: |
147+
cd benchmark/pybench
148+
export PYTHONPATH=$PWD/build-install/lib/python${{matrix.pyver}}/site-packages/
149+
OPENBLAS_NUM_THREADS=1 pytest benchmarks/bench_blas.py --codspeed
150+

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,3 +109,4 @@ benchmark/smallscaling
109109
CMakeCache.txt
110110
CMakeFiles/*
111111
.vscode
112+
**/__pycache__
Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
import pytest
2+
import numpy as np
3+
from openblas_wrap import (
4+
# level 1
5+
dnrm2, ddot, daxpy,
6+
# level 3
7+
dgemm, dsyrk,
8+
# lapack
9+
dgesv, # linalg.solve
10+
dgesdd, dgesdd_lwork, # linalg.svd
11+
dsyev, dsyev_lwork, # linalg.eigh
12+
)
13+
14+
# ### BLAS level 1 ###
15+
16+
# dnrm2
17+
18+
dnrm2_sizes = [100, 1000]
19+
20+
def run_dnrm2(n, x, incx):
21+
res = dnrm2(x, n, incx=incx)
22+
return res
23+
24+
25+
@pytest.mark.parametrize('n', dnrm2_sizes)
26+
def test_nrm2(benchmark, n):
27+
rndm = np.random.RandomState(1234)
28+
x = np.array(rndm.uniform(size=(n,)), dtype=float)
29+
result = benchmark(run_dnrm2, n, x, 1)
30+
31+
32+
# ddot
33+
34+
ddot_sizes = [100, 1000]
35+
36+
def run_ddot(x, y,):
37+
res = ddot(x, y)
38+
return res
39+
40+
41+
@pytest.mark.parametrize('n', ddot_sizes)
42+
def test_dot(benchmark, n):
43+
rndm = np.random.RandomState(1234)
44+
x = np.array(rndm.uniform(size=(n,)), dtype=float)
45+
y = np.array(rndm.uniform(size=(n,)), dtype=float)
46+
result = benchmark(run_ddot, x, y)
47+
48+
49+
# daxpy
50+
51+
daxpy_sizes = [100, 1000]
52+
53+
def run_daxpy(x, y,):
54+
res = daxpy(x, y, a=2.0)
55+
return res
56+
57+
58+
@pytest.mark.parametrize('n', daxpy_sizes)
59+
def test_daxpy(benchmark, n):
60+
rndm = np.random.RandomState(1234)
61+
x = np.array(rndm.uniform(size=(n,)), dtype=float)
62+
y = np.array(rndm.uniform(size=(n,)), dtype=float)
63+
result = benchmark(run_daxpy, x, y)
64+
65+
66+
67+
68+
# ### BLAS level 3 ###
69+
70+
# dgemm
71+
72+
gemm_sizes = [100, 1000]
73+
74+
def run_gemm(a, b, c):
75+
alpha = 1.0
76+
res = dgemm(alpha, a, b, c=c, overwrite_c=True)
77+
return res
78+
79+
80+
@pytest.mark.parametrize('n', gemm_sizes)
81+
def test_gemm(benchmark, n):
82+
rndm = np.random.RandomState(1234)
83+
a = np.array(rndm.uniform(size=(n, n)), dtype=float, order='F')
84+
b = np.array(rndm.uniform(size=(n, n)), dtype=float, order='F')
85+
c = np.empty((n, n), dtype=float, order='F')
86+
result = benchmark(run_gemm, a, b, c)
87+
assert result is c
88+
89+
90+
# dsyrk
91+
92+
syrk_sizes = [100, 1000]
93+
94+
95+
def run_syrk(a, c):
96+
res = dsyrk(1.0, a, c=c, overwrite_c=True)
97+
return res
98+
99+
100+
@pytest.mark.parametrize('n', syrk_sizes)
101+
def test_syrk(benchmark, n):
102+
rndm = np.random.RandomState(1234)
103+
a = np.array(rndm.uniform(size=(n, n)), dtype=float, order='F')
104+
c = np.empty((n, n), dtype=float, order='F')
105+
result = benchmark(run_syrk, a, c)
106+
assert result is c
107+
108+
109+
# ### LAPACK ###
110+
111+
# linalg.solve
112+
113+
gesv_sizes = [100, 1000]
114+
115+
116+
def run_gesv(a, b):
117+
res = dgesv(a, b, overwrite_a=True, overwrite_b=True)
118+
return res
119+
120+
121+
@pytest.mark.parametrize('n', gesv_sizes)
122+
def test_gesv(benchmark, n):
123+
rndm = np.random.RandomState(1234)
124+
a = (np.array(rndm.uniform(size=(n, n)), dtype=float, order='F') +
125+
np.eye(n, order='F'))
126+
b = np.array(rndm.uniform(size=(n, 1)), order='F')
127+
lu, piv, x, info = benchmark(run_gesv, a, b)
128+
assert lu is a
129+
assert x is b
130+
assert info == 0
131+
132+
133+
# linalg.svd
134+
135+
gesdd_sizes = [(100, 5), (1000, 222)]
136+
137+
138+
def run_gesdd(a, lwork):
139+
res = dgesdd(a, lwork=lwork, full_matrices=False, overwrite_a=False)
140+
return res
141+
142+
143+
@pytest.mark.parametrize('mn', gesdd_sizes)
144+
def test_gesdd(benchmark, mn):
145+
m, n = mn
146+
rndm = np.random.RandomState(1234)
147+
a = np.array(rndm.uniform(size=(m, n)), dtype=float, order='F')
148+
149+
lwork, info = dgesdd_lwork(m, n)
150+
lwork = int(lwork)
151+
assert info == 0
152+
153+
u, s, vt, info = benchmark(run_gesdd, a, lwork)
154+
155+
assert info == 0
156+
np.testing.assert_allclose(u @ np.diag(s) @ vt, a, atol=1e-13)
157+
158+
159+
# linalg.eigh
160+
161+
syev_sizes = [50, 200]
162+
163+
164+
def run_syev(a, lwork):
165+
res = dsyev(a, lwork=lwork, overwrite_a=True)
166+
return res
167+
168+
169+
@pytest.mark.parametrize('n', syev_sizes)
170+
def test_syev(benchmark, n):
171+
rndm = np.random.RandomState(1234)
172+
a = rndm.uniform(size=(n, n))
173+
a = np.asarray(a + a.T, dtype=float, order='F')
174+
a_ = a.copy()
175+
176+
lwork, info = dsyev_lwork(n)
177+
lwork = int(lwork)
178+
assert info == 0
179+
180+
w, v, info = benchmark(run_syev, a, lwork)
181+
182+
assert info == 0
183+
assert a is v # overwrite_a=True
184+
185+

benchmark/pybench/meson.build

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
#
2+
# Taken from SciPy (of course)
3+
#
4+
project(
5+
'openblas-wrap',
6+
'c', 'fortran',
7+
version: '0.1',
8+
license: 'BSD-3',
9+
meson_version: '>= 1.1.0',
10+
default_options: [
11+
'buildtype=debugoptimized',
12+
'b_ndebug=if-release',
13+
'c_std=c17',
14+
'fortran_std=legacy',
15+
],
16+
)
17+
18+
py3 = import('python').find_installation(pure: false)
19+
py3_dep = py3.dependency()
20+
21+
cc = meson.get_compiler('c')
22+
23+
_global_c_args = cc.get_supported_arguments(
24+
'-Wno-unused-but-set-variable',
25+
'-Wno-unused-function',
26+
'-Wno-conversion',
27+
'-Wno-misleading-indentation',
28+
)
29+
add_project_arguments(_global_c_args, language : 'c')
30+
31+
# We need -lm for all C code (assuming it uses math functions, which is safe to
32+
# assume for SciPy). For C++ it isn't needed, because libstdc++/libc++ is
33+
# guaranteed to depend on it. For Fortran code, Meson already adds `-lm`.
34+
m_dep = cc.find_library('m', required : false)
35+
if m_dep.found()
36+
add_project_link_arguments('-lm', language : 'c')
37+
endif
38+
39+
generate_f2pymod = find_program('openblas_wrap/generate_f2pymod.py')
40+
41+
openblas = dependency('openblas', method: 'pkg-config', required: true)
42+
openblas_dep = declare_dependency(
43+
dependencies: openblas,
44+
compile_args: []
45+
)
46+
47+
48+
subdir('openblas_wrap')

0 commit comments

Comments
 (0)