taco: bug for user-defined fused function xor(and(A, C), and(B, C))

When generating code for `xor(and(A, C), and(B, C))` in taco certain implementations do not match numpy. 

1) 
The below statement produces the correct code that matches dense numpy and pydata/sparse. For `data/image/no/image1.jpg` it produces 816 nnzs: 
```
   temp1(i,j) = andOp1(A(i, j), C(i, j));
   temp2(i,j) = andOp1(B(i, j), C(i, j));
   result(i, j) = xorOp1(temp1(i,j), temp2(i,j));
```

2)
This (supposedly) equivalent statement produces too many zeros. For `data/image/no/image1.jpg` it produces 2646 nnzs:
```
    result(i, j) = xorOp1(andOp1(matrix1(i, j), matrix3(i, j)), andOp1(matrix2(i, j), matrix3(i, j)));
```

3)
This (supposedly) equivalent statement also produces too many zeros and produces the same result as 2). For `data/image/no/image1.jpg` it produces 2646 nnzs:
```
    result(i, j) = xorAndOp(matrix1(i, j), matrix2(i, j), matrix3(i, j));
```
For case 3) the generated code from the lowerer is: 
```
int compute(taco_tensor_t *result, taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) {
  int result1_dimension = (int)(result->dimensions[0]);
  int64_t* restrict result_vals = (int64_t*)(result->vals);
  int A1_dimension = (int)(A->dimensions[0]);
  int* restrict A2_pos = (int*)(A->indices[1][0]);
  int* restrict A2_crd = (int*)(A->indices[1][1]);
  int B1_dimension = (int)(B->dimensions[0]);
  int* restrict B2_pos = (int*)(B->indices[1][0]);
  int* restrict B2_crd = (int*)(B->indices[1][1]);
  int C1_dimension = (int)(C->dimensions[0]);
  int* restrict C2_pos = (int*)(C->indices[1][0]);
  int* restrict C2_crd = (int*)(C->indices[1][1]);
  int32_t jresult = 0;
  for (int32_t i = 0; i < C1_dimension; i++) {
    int32_t jA = A2_pos[i];
    int32_t pA2_end = A2_pos[(i + 1)];
    int32_t jC = C2_pos[i];
    int32_t pC2_end = C2_pos[(i + 1)];
    int32_t jB = B2_pos[i];
    int32_t pB2_end = B2_pos[(i + 1)];
    while ((jA < pA2_end && jC < pC2_end) && jB < pB2_end) {
      int32_t jA0 = A2_crd[jA];
      int32_t jC0 = C2_crd[jC];
      int32_t jB0 = B2_crd[jB];
      int32_t j = TACO_MIN(jA0,TACO_MIN(jC0,jB0));
      if ((jA0 == j && jC0 == j) && jB0 == j) {
        result_vals[jresult] = 1;
        jresult++;
      }
      else if (jB0 == j && jC0 == j) {
        result_vals[jresult] = 1;
        jresult++;
      }
      else if (jA0 == j && jC0 == j) {
        result_vals[jresult] = 1;
        jresult++;
      }
      jA += (int32_t)(jA0 == j);
      jC += (int32_t)(jC0 == j);
      jB += (int32_t)(jB0 == j);
    }
    while (jB < pB2_end && jC < pC2_end) {
      int32_t jB0 = B2_crd[jB];
      int32_t jC0 = C2_crd[jC];
      int32_t j = TACO_MIN(jB0,jC0);
      if (jB0 == j && jC0 == j) {
        result_vals[jresult] = 1;
        jresult++;
      }
      jB += (int32_t)(jB0 == j);
      jC += (int32_t)(jC0 == j);
    }
    while (jA < pA2_end && jC < pC2_end) {
      int32_t jA0 = A2_crd[jA];
      int32_t jC0 = C2_crd[jC];
      int32_t j = TACO_MIN(jA0,jC0);
      if (jA0 == j && jC0 == j) {
        result_vals[jresult] = 1;
        jresult++;
      }
      jA += (int32_t)(jA0 == j);
      jC += (int32_t)(jC0 == j);
    }
  }
  return 0;
}
```
Where the case statement `if ((jA0 == j && jC0 == j) && jB0 == j)` does not seem correct since the intersection of all three tensors (A, B, and C) should not be included in the defined fused iteration space of `xor(and(A, C), and(B, C))`.

The above ops are defined as: 
```
struct Boolean {
  ir::Expr operator()(const std::vector<ir::Expr> &v) {
    taco_iassert(v.size() >= 1) << "Add operator needs at least one operand";
    return ir::Literal::make(int64_t(1), v[0].type());
  }
};

struct xorAlgebra {
  IterationAlgebra operator()(const std::vector<IndexExpr>& regions) {
    IterationAlgebra noIntersect = Complement(Intersect(regions[0], regions[1]));
    return Intersect(noIntersect, Union(regions[0], regions[1]));
  }
};

struct andAlgebra {
  IterationAlgebra operator()(const std::vector<IndexExpr>& regions) {
    return Intersect(regions[0], regions[1]);
  }
};

struct xorAndAlgebra {
  IterationAlgebra operator()(const std::vector<IndexExpr>& regions) {
    auto m1 = Intersect(regions[0], regions[2]);
    auto m2 = Intersect(regions[1], regions[2]);
    auto noIntersect = Complement(Intersect(m1, m2));
    return Intersect(noIntersect, Union(m1, m2));
  }
};

Func xorOp1("logical_xor", Boolean(), xorAlgebra());
Func andOp1("logical_and", Boolean(), andAlgebra());
Func xorAndOp("fused_xor_and", Boolean(), xorAndAlgebra());
``` 

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

taco: bug for user-defined fused function xor(and(A, C), and(B, C)) #29

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

taco: bug for user-defined fused function xor(and(A, C), and(B, C)) #29

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions