Skip to content

taco: bug for user-defined fused function xor(and(A, C), and(B, C)) #29

Open
@weiya711

Description

@weiya711

When generating code for xor(and(A, C), and(B, C)) in taco certain implementations do not match numpy.

The below statement produces the correct code that matches dense numpy and pydata/sparse. For data/image/no/image1.jpg it produces 816 nnzs:

   temp1(i,j) = andOp1(A(i, j), C(i, j));
   temp2(i,j) = andOp1(B(i, j), C(i, j));
   result(i, j) = xorOp1(temp1(i,j), temp2(i,j));

This (supposedly) equivalent statement produces too many zeros. For data/image/no/image1.jpg it produces 2646 nnzs:

    result(i, j) = xorOp1(andOp1(matrix1(i, j), matrix3(i, j)), andOp1(matrix2(i, j), matrix3(i, j)));

This (supposedly) equivalent statement also produces too many zeros and produces the same result as 2). For data/image/no/image1.jpg it produces 2646 nnzs:

    result(i, j) = xorAndOp(matrix1(i, j), matrix2(i, j), matrix3(i, j));

For case 3) the generated code from the lowerer is:

int compute(taco_tensor_t *result, taco_tensor_t *A, taco_tensor_t *B, taco_tensor_t *C) {
  int result1_dimension = (int)(result->dimensions[0]);
  int64_t* restrict result_vals = (int64_t*)(result->vals);
  int A1_dimension = (int)(A->dimensions[0]);
  int* restrict A2_pos = (int*)(A->indices[1][0]);
  int* restrict A2_crd = (int*)(A->indices[1][1]);
  int B1_dimension = (int)(B->dimensions[0]);
  int* restrict B2_pos = (int*)(B->indices[1][0]);
  int* restrict B2_crd = (int*)(B->indices[1][1]);
  int C1_dimension = (int)(C->dimensions[0]);
  int* restrict C2_pos = (int*)(C->indices[1][0]);
  int* restrict C2_crd = (int*)(C->indices[1][1]);
  int32_t jresult = 0;
  for (int32_t i = 0; i < C1_dimension; i++) {
    int32_t jA = A2_pos[i];
    int32_t pA2_end = A2_pos[(i + 1)];
    int32_t jC = C2_pos[i];
    int32_t pC2_end = C2_pos[(i + 1)];
    int32_t jB = B2_pos[i];
    int32_t pB2_end = B2_pos[(i + 1)];
    while ((jA < pA2_end && jC < pC2_end) && jB < pB2_end) {
      int32_t jA0 = A2_crd[jA];
      int32_t jC0 = C2_crd[jC];
      int32_t jB0 = B2_crd[jB];
      int32_t j = TACO_MIN(jA0,TACO_MIN(jC0,jB0));
      if ((jA0 == j && jC0 == j) && jB0 == j) {
        result_vals[jresult] = 1;
        jresult++;
      }
      else if (jB0 == j && jC0 == j) {
        result_vals[jresult] = 1;
        jresult++;
      }
      else if (jA0 == j && jC0 == j) {
        result_vals[jresult] = 1;
        jresult++;
      }
      jA += (int32_t)(jA0 == j);
      jC += (int32_t)(jC0 == j);
      jB += (int32_t)(jB0 == j);
    }
    while (jB < pB2_end && jC < pC2_end) {
      int32_t jB0 = B2_crd[jB];
      int32_t jC0 = C2_crd[jC];
      int32_t j = TACO_MIN(jB0,jC0);
      if (jB0 == j && jC0 == j) {
        result_vals[jresult] = 1;
        jresult++;
      }
      jB += (int32_t)(jB0 == j);
      jC += (int32_t)(jC0 == j);
    }
    while (jA < pA2_end && jC < pC2_end) {
      int32_t jA0 = A2_crd[jA];
      int32_t jC0 = C2_crd[jC];
      int32_t j = TACO_MIN(jA0,jC0);
      if (jA0 == j && jC0 == j) {
        result_vals[jresult] = 1;
        jresult++;
      }
      jA += (int32_t)(jA0 == j);
      jC += (int32_t)(jC0 == j);
    }
  }
  return 0;
}

Where the case statement if ((jA0 == j && jC0 == j) && jB0 == j) does not seem correct since the intersection of all three tensors (A, B, and C) should not be included in the defined fused iteration space of xor(and(A, C), and(B, C)).

The above ops are defined as:

struct Boolean {
  ir::Expr operator()(const std::vector<ir::Expr> &v) {
    taco_iassert(v.size() >= 1) << "Add operator needs at least one operand";
    return ir::Literal::make(int64_t(1), v[0].type());
  }
};

struct xorAlgebra {
  IterationAlgebra operator()(const std::vector<IndexExpr>& regions) {
    IterationAlgebra noIntersect = Complement(Intersect(regions[0], regions[1]));
    return Intersect(noIntersect, Union(regions[0], regions[1]));
  }
};

struct andAlgebra {
  IterationAlgebra operator()(const std::vector<IndexExpr>& regions) {
    return Intersect(regions[0], regions[1]);
  }
};

struct xorAndAlgebra {
  IterationAlgebra operator()(const std::vector<IndexExpr>& regions) {
    auto m1 = Intersect(regions[0], regions[2]);
    auto m2 = Intersect(regions[1], regions[2]);
    auto noIntersect = Complement(Intersect(m1, m2));
    return Intersect(noIntersect, Union(m1, m2));
  }
};

Func xorOp1("logical_xor", Boolean(), xorAlgebra());
Func andOp1("logical_and", Boolean(), andAlgebra());
Func xorAndOp("fused_xor_and", Boolean(), xorAndAlgebra());

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions