Skip to content

Commit 0b0bb99

Browse files
authored
Merge pull request #5265 from guoyuanplct/develop
kernel/riscv64:Added support for omatcopy on RISCV64_ZVL256B
2 parents 8afddc1 + 7732a55 commit 0b0bb99

File tree

4 files changed

+268
-3
lines changed

4 files changed

+268
-3
lines changed

.github/workflows/c910v.yml

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,39 @@ jobs:
8383
8484
- name: test
8585
run: |
86-
export PATH=$GITHUB_WORKSPACE/qemu-install/bin/:$PATH
87-
qemu-riscv64 ./utest/openblas_utest
88-
qemu-riscv64 ./utest/openblas_utest_ext
86+
run_with_retry() {
87+
local cmd="$1"
88+
local time_out=10
89+
local retries=10
90+
local attempt=0
91+
92+
for ((i=1; i<=retries; i++)); do
93+
attempt=$((i))
94+
if timeout -s 12 --preserve-status $time_out $cmd; then
95+
echo "Command succeeded on attempt $i."
96+
return 0
97+
else
98+
local exit_code=$?
99+
if [ $exit_code -eq 140 ]; then
100+
echo "Attempt $i timed out (retrying...)"
101+
time_out=$((time_out + 5))
102+
else
103+
echo "Attempt $i failed with exit code $exit_code. Aborting workflow."
104+
exit $exit_code
105+
fi
106+
fi
107+
done
108+
echo "All $retries attempts failed, giving up."
109+
echo "Final failure was due to timeout."
110+
echo "Aborting workflow."
111+
exit $exit_code
112+
}
113+
export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH
114+
which qemu-riscv64
115+
export QEMU_BIN=$(which qemu-riscv64)
116+
run_with_retry "$QEMU_BIN ./utest/openblas_utest"
117+
run_with_retry "$QEMU_BIN ./utest/openblas_utest_ext"
118+
89119
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xscblat1
90120
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xdcblat1
91121
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xccblat1

kernel/riscv64/KERNEL.RISCV64_ZVL256B

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,3 +201,9 @@ endif
201201
ifndef ZGEMM_BETA
202202
ZGEMM_BETA = ../generic/zgemm_beta.c
203203
endif
204+
205+
ZOMATCOPY_CN = zomatcopy_cn_vector.c
206+
COMATCOPY_CN = zomatcopy_cn_vector.c
207+
208+
DOMATCOPY_CN = omatcopy_cn_vector.c
209+
SOMATCOPY_CN = omatcopy_cn_vector.c

kernel/riscv64/omatcopy_cn_vector.c

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
/***************************************************************************
2+
Copyright (c) 2013, The OpenBLAS Project
3+
All rights reserved.
4+
Redistribution and use in source and binary forms, with or without
5+
modification, are permitted provided that the following conditions are
6+
met:
7+
1. Redistributions of source code must retain the above copyright
8+
notice, this list of conditions and the following disclaimer.
9+
2. Redistributions in binary form must reproduce the above copyright
10+
notice, this list of conditions and the following disclaimer in
11+
the documentation and/or other materials provided with the
12+
distribution.
13+
3. Neither the name of the OpenBLAS project nor the names of
14+
its contributors may be used to endorse or promote products
15+
derived from this software without specific prior written permission.
16+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
25+
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
*****************************************************************************/
27+
28+
#include "common.h"
29+
#if !defined(DOUBLE)
30+
#define VSETVL_MAX RISCV_RVV(vsetvlmax_e32m4)()
31+
#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n)
32+
#define FLOAT_V_T vfloat32m4_t
33+
#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m4)
34+
#define VSEV_FLOAT RISCV_RVV(vse32_v_f32m4)
35+
#define VFMULVF_FLOAT RISCV_RVV(vfmul_vf_f32m4)
36+
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m4)
37+
#else
38+
#define VSETVL_MAX RISCV_RVV(vsetvlmax_e64m4)()
39+
#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n)
40+
#define FLOAT_V_T vfloat64m4_t
41+
#define VLEV_FLOAT RISCV_RVV(vle64_v_f64m4)
42+
#define VSEV_FLOAT RISCV_RVV(vse64_v_f64m4)
43+
#define VFMULVF_FLOAT RISCV_RVV(vfmul_vf_f64m4)
44+
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m4)
45+
#endif
46+
47+
48+
int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb)
49+
{
50+
BLASLONG i,j;
51+
FLOAT *aptr,*bptr;
52+
size_t vl;
53+
54+
FLOAT_V_T va, vb,va1,vb1;
55+
if ( rows <= 0 ) return(0);
56+
if ( cols <= 0 ) return(0);
57+
58+
aptr = a;
59+
bptr = b;
60+
61+
if ( alpha == 0.0 )
62+
{
63+
vl = VSETVL_MAX;
64+
va = VFMVVF_FLOAT(0, vl);
65+
for ( i=0; i<cols ; i++ )
66+
{
67+
for(j=0; j<rows; j+=vl)
68+
{
69+
vl = VSETVL(rows - j);
70+
VSEV_FLOAT(bptr + j, va, vl);
71+
}
72+
bptr += ldb;
73+
}
74+
return(0);
75+
}
76+
77+
if ( alpha == 1.0 )
78+
{
79+
for ( i=0; i<cols ; i++ )
80+
{
81+
for(j=0; j<rows; j+=vl)
82+
{
83+
vl = VSETVL(rows - j);
84+
va = VLEV_FLOAT(aptr + j, vl);
85+
VSEV_FLOAT(bptr + j, va, vl);
86+
}
87+
aptr += lda;
88+
bptr += ldb;
89+
}
90+
return(0);
91+
}
92+
i = 0;
93+
if( cols % 2 ){
94+
95+
for(j=0; j<rows; j+=vl)
96+
{
97+
vl = VSETVL(rows - j);
98+
va = VLEV_FLOAT(aptr + j, vl);
99+
va = VFMULVF_FLOAT(va, alpha, vl);
100+
VSEV_FLOAT(bptr + j, va, vl);
101+
}
102+
aptr += lda;
103+
bptr += ldb;
104+
i = 1;
105+
}
106+
for ( ; i<cols ; i+=2 )
107+
{
108+
for(j=0; j<rows; j+=vl)
109+
{
110+
vl = VSETVL(rows - j);
111+
va = VLEV_FLOAT(aptr + j, vl);
112+
va1= VLEV_FLOAT(aptr + lda + j, vl);
113+
va = VFMULVF_FLOAT(va, alpha, vl);
114+
va1= VFMULVF_FLOAT(va1, alpha, vl);
115+
VSEV_FLOAT(bptr + j, va, vl);
116+
VSEV_FLOAT(bptr + ldb + j, va1, vl);
117+
}
118+
aptr += 2 * lda;
119+
bptr += 2 * ldb;
120+
}
121+
122+
return(0);
123+
}

kernel/riscv64/zomatcopy_cn_vector.c

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
/***************************************************************************
2+
Copyright (c) 2013, The OpenBLAS Project
3+
All rights reserved.
4+
Redistribution and use in source and binary forms, with or without
5+
modification, are permitted provided that the following conditions are
6+
met:
7+
1. Redistributions of source code must retain the above copyright
8+
notice, this list of conditions and the following disclaimer.
9+
2. Redistributions in binary form must reproduce the above copyright
10+
notice, this list of conditions and the following disclaimer in
11+
the documentation and/or other materials provided with the
12+
distribution.
13+
3. Neither the name of the OpenBLAS project nor the names of
14+
its contributors may be used to endorse or promote products
15+
derived from this software without specific prior written permission.
16+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
25+
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
*****************************************************************************/
27+
28+
#include "common.h"
29+
30+
31+
#if !defined(DOUBLE)
32+
#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n)
33+
#define FLOAT_V_T vfloat32m4_t
34+
#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m4)
35+
#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4)
36+
#define VSEV_FLOAT RISCV_RVV(vse32_v_f32m4)
37+
#define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m4)
38+
#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f32m4)
39+
#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f32m4)
40+
#define VFMUL_VF_FLOAT RISCV_RVV(vfmul_vf_f32m4)
41+
#define VSEV_FLOAT RISCV_RVV(vse32_v_f32m4)
42+
#define VLSEG2_FLOAT RISCV_RVV(vlseg2e32_v_f32m4x2)
43+
#define VSSEG2_FLOAT RISCV_RVV(vsseg2e32_v_f32m4x2)
44+
#define FLOAT_VX2_T vfloat32m4x2_t
45+
#define VGET_VX2 RISCV_RVV(vget_v_f32m4x2_f32m4)
46+
#define VSET_VX2 RISCV_RVV(vset_v_f32m4_f32m4x2)
47+
#else
48+
#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n)
49+
#define FLOAT_V_T vfloat64m4_t
50+
#define VLEV_FLOAT RISCV_RVV(vle64_v_f64m4)
51+
#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4)
52+
#define VSEV_FLOAT RISCV_RVV(vse64_v_f64m4)
53+
#define VSSEV_FLOAT RISCV_RVV(vsse64_v_f64m4)
54+
#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f64m4)
55+
#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f64m4)
56+
#define VFMUL_VF_FLOAT RISCV_RVV(vfmul_vf_f64m4)
57+
#define VSEV_FLOAT RISCV_RVV(vse64_v_f64m4)
58+
#define VLSEG2_FLOAT RISCV_RVV(vlseg2e64_v_f64m4x2)
59+
#define VSSEG2_FLOAT RISCV_RVV(vsseg2e64_v_f64m4x2)
60+
#define FLOAT_VX2_T vfloat64m4x2_t
61+
#define VGET_VX2 RISCV_RVV(vget_v_f64m4x2_f64m4)
62+
#define VSET_VX2 RISCV_RVV(vset_v_f64m4_f64m4x2)
63+
#endif
64+
65+
int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb)
66+
{
67+
BLASLONG i,j,ia;
68+
FLOAT *aptr,*bptr;
69+
FLOAT_V_T bptr_v0 , bptr_v1 , aptr_v0 ,aptr_v1;
70+
FLOAT_VX2_T va, vb;
71+
unsigned int gvl = 0;
72+
73+
if ( rows <= 0 ) return(0);
74+
if ( cols <= 0 ) return(0);
75+
76+
aptr = a;
77+
bptr = b;
78+
79+
lda *= 2;
80+
ldb *= 2;
81+
for ( i=0; i<cols ; i++ )
82+
{
83+
ia = 0;
84+
for(j=0; j<rows ; j+=gvl)
85+
{
86+
gvl = VSETVL(rows - j);
87+
va = VLSEG2_FLOAT(aptr + ia, gvl);
88+
aptr_v0 = VGET_VX2(va, 0);
89+
aptr_v1 = VGET_VX2(va, 1);
90+
bptr_v1 = VFMUL_VF_FLOAT( aptr_v1, alpha_r,gvl);
91+
bptr_v1 = VFMACCVF_FLOAT(bptr_v1, alpha_i, aptr_v0, gvl);
92+
bptr_v0 = VFMUL_VF_FLOAT( aptr_v0,alpha_r, gvl);
93+
bptr_v0 = VFNMSACVF_FLOAT(bptr_v0, alpha_i, aptr_v1, gvl);
94+
vb = VSET_VX2(vb, 0, bptr_v0);
95+
vb = VSET_VX2(vb, 1, bptr_v1);
96+
VSSEG2_FLOAT(&bptr[ia], vb, gvl);
97+
ia += gvl * 2 ;
98+
99+
}
100+
aptr += lda;
101+
bptr += ldb;
102+
}
103+
104+
return(0);
105+
106+
}

0 commit comments

Comments
 (0)