Description
Description
Given the SBBCSD input in the C code below, xBBCSD reads an unitialized value in line 808:
Line 808 in b231dd5
The problem was detected by initializing all arrays with intent OUT
with NaN. Valgrind's memcheck can diagnose the problem, too, if no initialization is performed.
As far as I can tell, the fix is as follows:
@@ -805,7 +805,7 @@
CALL SLARTGP( B22BULGE, B22E(I-1), WORK(IU2SN+I-1),
$ WORK(IU2CS+I-1), R )
ELSE IF( NU .LT. MU ) THEN
- CALL SLARTGS( B21E(I), B21E(I+1), NU, WORK(IU2CS+I-1),
+ CALL SLARTGS( B21E(I), B21D(I+1), NU, WORK(IU2CS+I-1),
$ WORK(IU2SN+I-1) )
ELSE
CALL SLARTGS( B22D(I), B22E(I), MU, WORK(IU2CS+I-1),
In general, I do not comprehend the details of this functions but I arrived at this suggestion because it matches the code in lines 727, 792, and 871. Those lines reference B12D(I)
and B12E(I-1)
, where B12D
and B12E
contain the Q
diagonal and Q-1
subdiagonal entries, respectively, of a certain matrix. The index I
can be as large as Q
meaning that the reference I+1
would be out of bounds for B12E
but not for B12D
.
@briansutton Can you confirm the validity of the suggested patch?
Find below C99 code triggering the problem. It can be compiled as follows:
gcc -Wextra -Wall -std=c99 -pedantic sbbcsd-uninit-read.c -llapack
Output without fix (e.g., with Debian OpenBLAS 0.3.21 or commit b231dd5):
SBBCSD failed with info=3
theta +nan +nan +nan +0.00e+00
phi +nan +nan +0.00e+00
V1T
-nan -nan -nan -nan
+nan +nan +nan +nan
-nan -nan -nan -nan
+0.000000000e+00 +0.000000000e+00 +0.000000000e+00 +1.000000000e+00
Output with SBBCSD fix:
theta +0.00e+00 +0.00e+00 +6.73e-05 +1.57e+00
phi +0.00e+00 +0.00e+00 +0.00e+00
V1T
+3.595401645e-01 -9.329686165e-01 +1.733144186e-02 +0.000000000e+00
+0.000000000e+00 +0.000000000e+00 +0.000000000e+00 +1.000000000e+00
-6.232285872e-03 +1.617212035e-02 +9.998497367e-01 +0.000000000e+00
+9.331088066e-01 +3.595941961e-01 +1.627663976e-11 +0.000000000e+00
#include <math.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
typedef int lapack_int;
void sbbcsd_(
const char* jobu1, const char* jobu2, const char* jobv1t,
const char* jobv2t, const char* trans, const lapack_int* m,
const lapack_int* p, const lapack_int* q, float* theta, float* phi,
float* u1, const lapack_int* ldu1, float* u2, const lapack_int* ldu2,
float* v1t, const lapack_int* ldv1t, float* v2t, const lapack_int* ldv2t,
float* b11d, float* b11e, float* b12d, float* b12e, float* b21d,
float* b21e, float* b22d, float* b22e, float* work, const lapack_int* lwork,
lapack_int* info, size_t jobu1_len, size_t jobu2_len, size_t jobv1t_len,
size_t jobv2t_len, size_t trans_len
);
#define M 8
#define P 4
#define Q 4
int main()
{
float nan = NAN;
char jobu1 = 'N';
char jobu2 = 'N';
char jobv1t = 'Y';
char jobv2t = 'N';
char trans = 'N';
lapack_int m = M;
lapack_int p = P;
lapack_int q = Q;
float u1[P * P] = { 0 };
lapack_int ldu1 = P;
float u2[(M - P) * (M - P)] = { 0 };
lapack_int ldu2 = M - P;
float v1t[Q * Q] = { 0 };
lapack_int ldv1t = Q;
lapack_int ldv2t = 1;
float theta[Q]
= { 1.20296335, 1.27602555E-02, 6.23163305E-06, 2.79734493E-08 };
float phi[Q - 1] = { 1.57074344, 6.70322042E-05, 1.90471667E-07 };
float b11d[Q];
float b11e[Q - 1];
float b12d[Q];
float b12e[Q - 1];
float b21d[Q];
float b21e[Q - 1];
float b22d[Q];
float b22e[Q - 1];
float work[8 * Q] = { nan };
lapack_int lwork = sizeof(work) / sizeof(work[0]);
lapack_int info = -1;
//
// Disable this for loop and the next one if you want Valgrind's memcheck
// to detect the problem.
//
for(size_t i = 0; i < Q; ++i) {
b11d[i] = nan;
b12d[i] = nan;
b21d[i] = nan;
b22d[i] = nan;
}
for(size_t i = 0; i + 1 < Q; ++i) {
b11e[i] = nan;
b12e[i] = nan;
b21e[i] = nan;
b22e[i] = nan;
}
for(size_t i = 0; i < Q; ++i) {
size_t index = i * ldv1t + i;
v1t[index] = 1;
}
sbbcsd_(
&jobu1, &jobu2, &jobv1t, &jobv2t, &trans, &m, &p, &q, theta, phi, u1,
&ldu1, u2, &ldu2, v1t, &ldv1t, NULL, &ldv2t, b11d, b11e, b12d, b12e,
b21d, b21e, b22d, b22e, work, &lwork, &info, 1, 1, 1, 1, 1
);
int exit_status = EXIT_SUCCESS;
if(info != 0) {
fprintf(stderr, "SBBCSD failed with info=%d\n", info);
exit_status = EXIT_FAILURE;
}
printf("theta ");
for(size_t i = 0; i < Q; ++i) {
const char* sep = (i + 1 < Q) ? " " : "\n";
printf("%+8.2e%s", theta[i], sep);
}
printf("phi ");
for(size_t i = 0; i + 1 < Q; ++i) {
const char* sep = (i + 2 < Q) ? " " : "\n";
printf("%+8.2e%s", phi[i], sep);
}
printf("V1T\n");
for(size_t i = 0; i < Q; ++i) {
for(size_t j = 0; j < Q; ++j) {
size_t index = j * ldv1t + i;
const char* sep = (j + 1 < Q) ? " " : "\n";
printf("%+15.9e%s", v1t[index], sep);
}
}
return exit_status;
}
Checklist
- I've included a minimal example to reproduce the issue
- I'd be willing to make a PR to solve this issue