OpenMathLib
diff --git a/‎.github/workflows/codspeed-bench.yml
Lines changed: 150 additions & 0 deletions b/‎.github/workflows/codspeed-bench.yml
Lines changed: 150 additions & 0 deletions
diff --git a/‎.gitignore
Lines changed: 1 addition & 0 deletions b/‎.gitignore
Lines changed: 1 addition & 0 deletions
diff --git a/‎benchmark/pybench/benchmarks/bench_blas.py
Lines changed: 185 additions & 0 deletions b/‎benchmark/pybench/benchmarks/bench_blas.py
Lines changed: 185 additions & 0 deletions
diff --git a/‎benchmark/pybench/meson.build
Lines changed: 48 additions & 0 deletions b/‎benchmark/pybench/meson.build
Lines changed: 48 additions & 0 deletions
@@ -0,0 +1,150 @@
+name: Run codspeed benchmarks
+
+on: [push, pull_request]
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read # to fetch code (actions/checkout)
+
+jobs:
+  benchmarks:
+    if: "github.repository == 'OpenMathLib/OpenBLAS'"
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        fortran: [gfortran]
+        build: [make]
+        pyver: ["3.12"]
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v3
+        with:
+            python-version: ${{ matrix.pyver }}
+
+      - name: Print system information
+        run: |
+          if [ "$RUNNER_OS" == "Linux" ]; then
+            cat /proc/cpuinfo
+          fi
+
+      - name: Install Dependencies
+        run: |
+          if [ "$RUNNER_OS" == "Linux" ]; then
+            sudo apt-get update
+            sudo apt-get install -y gfortran cmake ccache libtinfo5
+          else
+            echo "::error::$RUNNER_OS not supported"
+            exit 1
+          fi
+
+      - name: Compilation cache
+        uses: actions/cache@v3
+        with:
+          path: ~/.ccache
+          # We include the commit sha in the cache key, as new cache entries are
+          # only created if there is no existing entry for the key yet.
+          # GNU make and cmake call the compilers differently. It looks like
+          # that causes the cache to mismatch. Keep the ccache for both build
+          # tools separate to avoid polluting each other.
+          key: ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}-${{ github.sha }}
+          # Restore a matching ccache cache entry. Prefer same branch and same Fortran compiler.
+          restore-keys: |
+            ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}
+            ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}
+            ccache-${{ runner.os }}-${{ matrix.build }}
+
+      - name: Write out the .pc
+        run: |
+             cd benchmark/pybench
+             cat > openblas.pc << EOF
+             libdir=${{ github.workspace }}
+             includedir= ${{ github.workspace }}
+             openblas_config= OpenBLAS 0.3.27 DYNAMIC_ARCH NO_AFFINITY Haswell MAX_THREADS=64
+             version=0.0.99
+             extralib=-lm -lpthread -lgfortran -lquadmath -L${{ github.workspace }} -lopenblas
+             Name: openblas
+             Description: OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version
+             Version: ${version}
+             URL: https://github.com/xianyi/OpenBLAS
+             Libs: ${{ github.workspace }}/libopenblas.so -Wl,-rpath,${{ github.workspace }}
+             Libs.private: -lm -lpthread -lgfortran -lquadmath -L${{ github.workspace }} -lopenblas
+             Cflags: -I${{ github.workspace}}
+             EOF
+             cat openblas.pc
+
+      - name: Configure ccache
+        run: |
+          if [ "${{ matrix.build }}" = "make" ]; then
+            # Add ccache to path
+            if [ "$RUNNER_OS" = "Linux" ]; then
+              echo "/usr/lib/ccache" >> $GITHUB_PATH
+            elif [ "$RUNNER_OS" = "macOS" ]; then
+              echo "$(brew --prefix)/opt/ccache/libexec" >> $GITHUB_PATH
+            else
+              echo "::error::$RUNNER_OS not supported"
+              exit 1
+            fi
+          fi
+          # Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB).
+          test -d ~/.ccache || mkdir -p ~/.ccache
+          echo "max_size = 300M" > ~/.ccache/ccache.conf
+          echo "compression = true" >> ~/.ccache/ccache.conf
+          ccache -s
+
+      - name: Build OpenBLAS
+        run: |
+          case "${{ matrix.build }}" in
+            "make")
+              make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0 FC="ccache ${{ matrix.fortran }}"
+              ;;
+            "cmake")
+              mkdir build && cd build
+              cmake -DDYNAMIC_ARCH=1 \
+                    -DNOFORTRAN=0 \
+                    -DBUILD_WITHOUT_LAPACK=0 \
+                    -DCMAKE_VERBOSE_MAKEFILE=ON \
+                    -DCMAKE_BUILD_TYPE=Release \
+                    -DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \
+                    -DCMAKE_C_COMPILER_LAUNCHER=ccache \
+                    -DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \
+                    ..
+              cmake --build .
+              ;;
+            *)
+              echo "::error::Configuration not supported"
+              exit 1
+              ;;
+          esac
+
+      - name: Show ccache status
+        continue-on-error: true
+        run: ccache -s
+
+      - name: Install benchmark dependencies
+        run: pip install meson ninja numpy pytest pytest-codspeed --user
+
+      - name: Build the wrapper
+        run: |
+          cd benchmark/pybench
+          export PKG_CONFIG_PATH=$PWD
+          meson setup build  --prefix=$PWD/build-install
+          meson install -C build
+          #
+          # sanity check
+          cd build/openblas_wrap
+          python -c'import _flapack; print(dir(_flapack))'
+
+      - name: Run benchmarks
+        uses: CodSpeedHQ/action@v2
+        with:
+          token: ${{ secrets.CODSPEED_TOKEN }}
+          run: |
+            cd benchmark/pybench
+            export PYTHONPATH=$PWD/build-install/lib/python${{matrix.pyver}}/site-packages/
+            OPENBLAS_NUM_THREADS=1 pytest benchmarks/bench_blas.py --codspeed
+
@@ -109,3 +109,4 @@ benchmark/smallscaling
 CMakeCache.txt
 CMakeFiles/*
 .vscode
+**/__pycache__
@@ -0,0 +1,185 @@
+import pytest
+import numpy as np
+from openblas_wrap import (
+    # level 1
+    dnrm2, ddot, daxpy,
+    # level 3
+    dgemm, dsyrk,
+    # lapack
+    dgesv,                   # linalg.solve
+    dgesdd, dgesdd_lwork,    # linalg.svd
+    dsyev, dsyev_lwork,      # linalg.eigh
+)
+
+# ### BLAS level 1 ###
+
+# dnrm2
+
+dnrm2_sizes = [100, 1000]
+
+def run_dnrm2(n, x, incx):
+    res = dnrm2(x, n, incx=incx)
+    return res
+
+
+@pytest.mark.parametrize('n', dnrm2_sizes)
+def test_nrm2(benchmark, n):
+    rndm = np.random.RandomState(1234)
+    x = np.array(rndm.uniform(size=(n,)), dtype=float)
+    result = benchmark(run_dnrm2, n, x, 1)
+
+
+# ddot
+
+ddot_sizes = [100, 1000]
+
+def run_ddot(x, y,):
+    res = ddot(x, y)
+    return res
+
+
+@pytest.mark.parametrize('n', ddot_sizes)
+def test_dot(benchmark, n):
+    rndm = np.random.RandomState(1234)
+    x = np.array(rndm.uniform(size=(n,)), dtype=float)
+    y = np.array(rndm.uniform(size=(n,)), dtype=float)
+    result = benchmark(run_ddot, x, y)
+
+
+# daxpy
+
+daxpy_sizes = [100, 1000]
+
+def run_daxpy(x, y,):
+    res = daxpy(x, y, a=2.0)
+    return res
+
+
+@pytest.mark.parametrize('n', daxpy_sizes)
+def test_daxpy(benchmark, n):
+    rndm = np.random.RandomState(1234)
+    x = np.array(rndm.uniform(size=(n,)), dtype=float)
+    y = np.array(rndm.uniform(size=(n,)), dtype=float)
+    result = benchmark(run_daxpy, x, y)
+
+
+
+
+# ### BLAS level 3 ###
+
+# dgemm
+
+gemm_sizes = [100, 1000]
+
+def run_gemm(a, b, c):
+    alpha = 1.0
+    res = dgemm(alpha, a, b, c=c, overwrite_c=True)
+    return res
+
+
+@pytest.mark.parametrize('n', gemm_sizes)
+def test_gemm(benchmark, n):
+    rndm = np.random.RandomState(1234)
+    a = np.array(rndm.uniform(size=(n, n)), dtype=float, order='F')
+    b = np.array(rndm.uniform(size=(n, n)), dtype=float, order='F')
+    c = np.empty((n, n), dtype=float, order='F')
+    result = benchmark(run_gemm, a, b, c)
+    assert result is c
+
+
+# dsyrk
+
+syrk_sizes = [100, 1000]
+
+
+def run_syrk(a, c):
+    res = dsyrk(1.0, a, c=c, overwrite_c=True)
+    return res
+
+
+@pytest.mark.parametrize('n', syrk_sizes)
+def test_syrk(benchmark, n):
+    rndm = np.random.RandomState(1234)
+    a = np.array(rndm.uniform(size=(n, n)), dtype=float, order='F')
+    c = np.empty((n, n), dtype=float, order='F')
+    result = benchmark(run_syrk, a, c)
+    assert result is c
+
+
+# ### LAPACK ###
+
+# linalg.solve
+
+gesv_sizes = [100, 1000]
+
+
+def run_gesv(a, b):
+    res = dgesv(a, b, overwrite_a=True, overwrite_b=True)
+    return res
+
+
+@pytest.mark.parametrize('n', gesv_sizes)
+def test_gesv(benchmark, n):
+    rndm = np.random.RandomState(1234)
+    a = (np.array(rndm.uniform(size=(n, n)), dtype=float, order='F') +
+         np.eye(n, order='F'))
+    b = np.array(rndm.uniform(size=(n, 1)), order='F')
+    lu, piv, x, info = benchmark(run_gesv, a, b)
+    assert lu is a
+    assert x is b
+    assert info == 0
+
+
+# linalg.svd
+
+gesdd_sizes = [(100, 5), (1000, 222)]
+
+
+def run_gesdd(a, lwork):
+    res = dgesdd(a, lwork=lwork, full_matrices=False, overwrite_a=False)
+    return res
+
+
+@pytest.mark.parametrize('mn', gesdd_sizes)
+def test_gesdd(benchmark, mn):
+    m, n = mn
+    rndm = np.random.RandomState(1234)
+    a = np.array(rndm.uniform(size=(m, n)), dtype=float, order='F')
+
+    lwork, info = dgesdd_lwork(m, n)
+    lwork = int(lwork)
+    assert info == 0
+
+    u, s, vt, info = benchmark(run_gesdd, a, lwork)
+
+    assert info == 0
+    np.testing.assert_allclose(u @ np.diag(s) @ vt, a, atol=1e-13)
+
+
+# linalg.eigh
+
+syev_sizes = [50, 200]
+
+
+def run_syev(a, lwork):
+    res = dsyev(a, lwork=lwork, overwrite_a=True)
+    return res
+
+
+@pytest.mark.parametrize('n', syev_sizes)
+def test_syev(benchmark, n):
+    rndm = np.random.RandomState(1234)
+    a = rndm.uniform(size=(n, n))
+    a = np.asarray(a + a.T, dtype=float, order='F')
+    a_ = a.copy()
+
+    lwork, info = dsyev_lwork(n)
+    lwork = int(lwork)
+    assert info == 0
+
+    w, v, info = benchmark(run_syev, a, lwork)
+
+    assert info == 0
+    assert a is v  # overwrite_a=True
+
+
@@ -0,0 +1,48 @@
+#
+# Taken from SciPy (of course)
+#
+project(
+  'openblas-wrap',
+  'c', 'fortran',
+  version: '0.1',
+  license: 'BSD-3',
+  meson_version: '>= 1.1.0',
+  default_options: [
+    'buildtype=debugoptimized',
+    'b_ndebug=if-release',
+    'c_std=c17',
+    'fortran_std=legacy',
+  ],
+)
+
+py3 = import('python').find_installation(pure: false)
+py3_dep = py3.dependency()
+
+cc = meson.get_compiler('c')
+
+_global_c_args = cc.get_supported_arguments(
+  '-Wno-unused-but-set-variable',
+  '-Wno-unused-function',
+  '-Wno-conversion',
+  '-Wno-misleading-indentation',
+)
+add_project_arguments(_global_c_args, language : 'c')
+
+# We need -lm for all C code (assuming it uses math functions, which is safe to
+# assume for SciPy). For C++ it isn't needed, because libstdc++/libc++ is
+# guaranteed to depend on it. For Fortran code, Meson already adds `-lm`.
+m_dep = cc.find_library('m', required : false)
+if m_dep.found()
+  add_project_link_arguments('-lm', language : 'c')
+endif
+
+generate_f2pymod = find_program('openblas_wrap/generate_f2pymod.py')
+
+openblas = dependency('openblas', method: 'pkg-config', required: true)
+openblas_dep = declare_dependency(
+  dependencies: openblas,
+  compile_args: []
+)
+
+
+subdir('openblas_wrap')