Allow computed expressions on the left-hand-side

abadams · abadams · commit 31cb2a8177b1 · 2018-04-05T11:16:58.000-07:00
diff --git a/include/tc/core/libraries.h b/include/tc/core/libraries.h
@@ -32,8 +32,14 @@ namespace c {
 
 constexpr auto types = R"C(
 // Halide type handling
-typedef int int32;
-typedef long int64;
+typedef signed char int8;
+typedef unsigned char uint8;
+typedef signed short int16;
+typedef unsigned short uint16;
+typedef signed int int32;
+typedef unsigned int uint32;
+typedef signed long int64;
+typedef unsigned long uint64;
 typedef float float32;
 typedef double float64;
 )C";
@@ -81,16 +87,16 @@ float fmodf ( float  x, float  y );
 //float frexpf ( float  x, int* nptr );
 float hypotf ( float  x, float  y );
 //int ilogbf ( float  x );
-//__RETURN_TYPE 	isfinite ( float  a );
-//__RETURN_TYPE 	isinf ( float  a );
-//__RETURN_TYPE 	isnan ( float  a );
+//__RETURN_TYPE        isfinite ( float  a );
+//__RETURN_TYPE        isinf ( float  a );
+//__RETURN_TYPE        isnan ( float  a );
 float j0f ( float  x );
 float j1f ( float  x );
 //float jnf ( int  n, float  x );
 //float ldexpf ( float  x, int  exp );
 float lgammaf ( float  x );
-//long long int 	llrintf ( float  x );
-//long long int 	llroundf ( float  x );
+//long long int        llrintf ( float  x );
+//long long int        llroundf ( float  x );
 float log10f ( float  x );
 float log1pf ( float  x );
 float log2f ( float  x );
@@ -120,7 +126,7 @@ float roundf ( float  x );
 float rsqrtf ( float  x );
 //float scalblnf ( float  x, long int  n );
 //float scalbnf ( float  x, int  n );
-//__RETURN_TYPE 	signbit ( float  a );
+//__RETURN_TYPE         signbit ( float  a );
 //void sincosf ( float  x, float* sptr, float* cptr );
 //void sincospif ( float  x, float* sptr, float* cptr );
 float sinf ( float  x );
diff --git a/include/tc/lang/parser.h b/include/tc/lang/parser.h
@@ -138,6 +138,15 @@ struct Parser {
   TreeRef parseExpList() {
     return parseList('(', ',', ')', [&](int i) { return parseExp(); });
   }
+  TreeRef parseOptionalExpList() {
+    TreeRef list = nullptr;
+    if (L.cur().kind == '(') {
+      list = parseExpList();
+    } else {
+      list = List::create(L.cur().range, {});
+    }
+    return list;
+  }
   TreeRef parseIdentList() {
     return parseList('(', ',', ')', [&](int i) { return parseIdent(); });
   }
@@ -213,7 +222,7 @@ struct Parser {
   }
   TreeRef parseStmt() {
     auto ident = parseIdent();
-    TreeRef list = parseOptionalIdentList();
+    TreeRef list = parseOptionalExpList();
     auto assign = parseAssignment();
     auto rhs = parseExp();
     TreeRef equivalent_statement = parseEquivalent();
diff --git a/include/tc/lang/sema.h b/include/tc/lang/sema.h
@@ -442,9 +442,11 @@ struct Sema {
 
     // register index variables (non-reductions)
     for (const auto& index : stmt.indices()) {
-      std::string idx = index.name();
-      auto typ = indexType(index);
-      insert(index_env, index, typ, true);
+      if (index->kind() == TK_IDENT) {
+        std::string idx = Ident(index).name();
+        auto typ = indexType(index);
+        insert(index_env, Ident(index), typ, true);
+      }
     }
 
     // make dimension variables for each dimension of the output tensor
@@ -465,6 +467,9 @@ struct Sema {
     auto where_clauses_ = stmt.whereClauses().map(
         [&](TreeRef rc) { return checkWhereClause(rc); });
 
+    auto indices_ = stmt.indices().map(
+        [&](TreeRef idx) { return checkExp(idx, true); });
+
     TreeRef rhs_ = checkExp(stmt.rhs(), true);
     TreeRef scalar_type = typeOfExpr(rhs_);
 
@@ -525,7 +530,7 @@ struct Sema {
     TreeRef result = Comprehension::create(
         stmt.range(),
         stmt.ident(),
-        stmt.indices(),
+        indices_,
         stmt.assignment(),
         rhs_,
         where_clauses_,
diff --git a/include/tc/lang/tree_views.h b/include/tc/lang/tree_views.h
@@ -386,8 +386,8 @@ struct Comprehension : public TreeView {
   Ident ident() const {
     return Ident(subtree(0));
   }
-  ListView<Ident> indices() const {
-    return ListView<Ident>(subtree(1));
+  ListView<TreeRef> indices() const {
+    return ListView<TreeRef>(subtree(1));
   }
   // kind == '=', TK_PLUS_EQ, TK_PLUS_EQ_B, etc.
   TreeRef assignment() const {
diff --git a/src/core/tc2halide.cc b/src/core/tc2halide.cc
@@ -216,7 +216,7 @@ Expr translateExpr(
   }
 }
 
-vector<const Variable*> unboundVariables(const vector<Var>& lhs, Expr rhs) {
+vector<const Variable*> unboundVariables(const vector<Expr>& lhs, Expr rhs) {
   class FindUnboundVariables : public IRVisitor {
     using IRVisitor::visit;
 
@@ -241,14 +241,19 @@ vector<const Variable*> unboundVariables(const vector<Var>& lhs, Expr rhs) {
     set<string> visited;
 
    public:
-    FindUnboundVariables(const vector<Var>& lhs) {
-      for (auto v : lhs) {
-        bound.push(v.name());
+    FindUnboundVariables(const vector<Expr>& lhs) {
+      for (auto e : lhs) {
+        if (const Variable *v = e.as<Variable>()) {
+          bound.push(v->name);
+        }
       }
     }
     vector<const Variable*> result;
   } finder(lhs);
   rhs.accept(&finder);
+  for (auto e : lhs) {
+    e.accept(&finder);
+  }
   return finder.result;
 }
 
@@ -507,22 +512,31 @@ void translateComprehension(
     f = Function(c.ident().name());
     (*funcs)[c.ident().name()] = f;
   }
+
+  // we currently inline all of the let bindings generated in where clauses
+  // in the future we may consider using Halide Let bindings when they
+  // are supported later
+  map<string, Expr> lets;
+
   // Function is the internal Halide IR type for a pipeline
   // stage. Func is the front-end class that wraps it. Here it's
   // convenient to use both.
   Func func(f);
 
-  vector<Var> lhs;
-  vector<Expr> lhs_as_exprs;
-  for (lang::Ident id : c.indices()) {
-    lhs.push_back(Var(id.name()));
-    lhs_as_exprs.push_back(lhs.back());
+  vector<Expr> lhs;
+  vector<Var> lhs_vars;
+  bool total_definition = true;
+  for (lang::TreeRef idx : c.indices()) {
+    Expr e = translateExpr(idx, params, *funcs, lets);
+    if (const Variable *op = e.as<Variable>()) {
+      lhs_vars.push_back(Var(op->name));
+    } else {
+      total_definition = false;
+      lhs_vars.push_back(Var());
+    }
+    lhs.push_back(e);
   }
 
-  // we currently inline all of the let bindings generated in where clauses
-  // in the future we may consider using Halide Let bindings when they
-  // are supported later
-  map<string, Expr> lets;
   for (auto wc : c.whereClauses()) {
     if (wc->kind() == lang::TK_LET) {
       auto let = lang::Let(wc);
@@ -546,9 +560,8 @@ void translateComprehension(
   auto setupIdentity = [&](const Expr& identity, bool zero) {
     if (!f.has_pure_definition()) {
       added_implicit_initialization = true;
-      func(lhs) = (zero) ? identity
-                         : undef(rhs.type()); // undef causes the original value
-                                              // to remain in input arrays
+      // undef causes the original value to remain in input arrays
+      func(lhs_vars) = (zero) ? identity : undef(rhs.type());
     }
   };
 
@@ -587,6 +600,9 @@ void translateComprehension(
       break;
 
     case '=':
+      if (!total_definition) {
+        setupIdentity(rhs, false);
+      }
       break;
     default:
       throw lang::ErrorReport(c) << "Unimplemented reduction "
@@ -618,9 +634,10 @@ void translateComprehension(
   for (auto& exp : all_exprs) {
     exp = bindParams.mutate(exp);
   }
-
-  // TODO: When the LHS incorporates general expressions we'll need to
-  // bind params there too.
+  for (auto &e : lhs) {
+    e = bindParams.mutate(e);
+    all_exprs.push_back(e);
+  }
 
   // Do forward bounds inference -- construct an expression that says
   // this expression never reads out of bounds on its inputs, and
@@ -660,19 +677,34 @@ void translateComprehension(
   // (e.g. an in-place stencil)?. The .bound directive will use the
   // bounds of the last stage for all stages.
 
-  // Does a tensor have a single bound, or can its bounds shrink over
-  // time? Solve for a single bound for now.
-
-  for (Var v : lhs) {
-    if (!solution.contains(v.name())) {
-      throw lang::ErrorReport(c)
+  // Set the bounds to be the union of the boxes written to by every
+  // comprehension touching the tensor.
+  for (size_t i = 0; i < lhs.size(); i++) {
+    Expr e = lhs[i];
+    if (const Variable *v = e.as<Variable>()) {
+      if (!solution.contains(v->name)) {
+        throw lang::ErrorReport(c)
           << "Free variable " << v
           << " was not solved in range inference. May not be used right-hand side";
+      }
+    }
+
+    Interval in = bounds_of_expr_in_scope(e, solution);
+    if (!in.is_bounded()) {
+      throw lang::ErrorReport(c.indices()[i])
+        << "Left-hand side expression is unbounded";
+    }
+    in.min = cast<int>(in.min);
+    in.max = cast<int>(in.max);
+
+    map<string, Interval> &b = (*bounds)[f];
+    string dim_name = f.dimensions() ? f.args()[i] : lhs_vars[i].name();
+    auto old = b.find(dim_name);
+    if (old != b.end()) {
+      // Take the union with any existing bounds
+      in.include(old->second);
     }
-    // TODO: We're enforcing a single bound across all comprehensions
-    // for now. We should really check later ones are equal to earlier
-    // ones instead of just clobbering.
-    (*bounds)[f][v.name()] = solution.get(v.name());
+    b[dim_name] = in;
   }
 
   // Free variables that appear on the rhs but not the lhs are
@@ -703,6 +735,9 @@ void translateComprehension(
     for (auto v : unbound) {
       Expr rv = Variable::make(Int(32), v->name, domain);
       rhs = substitute(v->name, rv, rhs);
+      for (Expr &e : lhs) {
+        e = substitute(v->name, rv, e);
+      }
     }
     rdom = RDom(domain);
   }
@@ -718,9 +753,12 @@ void translateComprehension(
     }
   }
   while (!lhs.empty()) {
-    loop_nest.push_back(lhs.back());
+    if (const Variable *v = lhs.back().as<Variable>()) {
+      loop_nest.push_back(Var(v->name));
+    }
     lhs.pop_back();
   }
+  stage.reorder(loop_nest);
 
   if (added_implicit_initialization) {
     // Also reorder reduction initializations to the TC convention
@@ -734,7 +772,6 @@ void translateComprehension(
   }
 
   func.compute_root();
-  stage.reorder(loop_nest);
 }
 
 HalideComponents translateDef(const lang::Def& def, bool throwWarnings) {
diff --git a/src/lang/tc_format.cc b/src/lang/tc_format.cc
@@ -60,8 +60,9 @@ std::ostream& operator<<(std::ostream& s, const Param& p) {
 }
 
 std::ostream& operator<<(std::ostream& s, const Comprehension& comp) {
-  s << comp.ident() << "(" << comp.indices() << ") "
-    << kindToToken(comp.assignment()->kind()) << " ";
+  s << comp.ident() << "(";
+  showList(s, comp.indices(), showExpr);
+  s << ") " << kindToToken(comp.assignment()->kind()) << " ";
   showExpr(s, comp.rhs());
   if (!comp.whereClauses().empty())
     throw std::runtime_error("Printing of where clauses is not supported yet");
diff --git a/test/test_execution_engine.cc b/test/test_execution_engine.cc
@@ -145,6 +145,25 @@ def concat(float(M, N) A, float(M, N) B) -> (O1) {
       outputs);
 }
 
+TEST_F(ATenCompilationUnitTest, Concat2) {
+  at::Tensor a = at::CUDA(at::kFloat).rand({32, 16});
+  at::Tensor b = at::CUDA(at::kFloat).rand({32, 16});
+  std::vector<at::Tensor> inputs = {a, b};
+  std::vector<at::Tensor> outputs;
+
+  Check(
+      R"(
+def concat(float(M, N) A, float(M, N) B) -> (O1) {
+    O1(n, 0, m) = A(m, n)
+    O1(n, 1, m) = B(m, n)
+}
+    )",
+      "concat",
+      tc::CudaMappingOptions::makeNaiveCudaMappingOptions(),
+      inputs,
+      outputs);
+}
+
 TEST_F(ATenCompilationUnitTest, Indexing) {
   at::Tensor a = at::CUDA(at::kFloat).rand({3, 4});
   at::Tensor b = at::CUDA(at::kInt).ones({2});
diff --git a/test/test_tc_mapper.cc b/test/test_tc_mapper.cc
@@ -521,6 +521,51 @@ def fun(float(B, R) LUT, int32(B, N) I) -> (O) {
       checkFun);
 }
 
+
+TEST_F(TcMapperTest, Histogram) {
+  const int N = 17, M = 82;
+  at::Tensor I =
+      at::CUDA(at::kFloat).rand({N, M}).mul_(256).floor_().toType(at::kByte);
+  std::vector<at::Tensor> inputs = {I};
+  std::vector<at::Tensor> outputs;
+
+  static constexpr auto TC = R"TC(
+def fun(uint8(N, M) I) -> (O) {
+  O(I(i, j)) +=! 1
+}
+)TC";
+
+  auto checkFun = [=](const std::vector<at::Tensor>& inputs,
+                      std::vector<at::Tensor>& outputs) {
+    at::Tensor I = inputs[0].toBackend(at::kCPU);
+    at::Tensor O = outputs[0].toBackend(at::kCPU);
+    auto IAccessor = I.accessor<uint8_t, 2>();
+    auto OAccessor = O.accessor<int, 1>();
+    int sum = 0;
+    for (int i = 0; i < 256; i++) {
+        sum += OAccessor[i];
+    }
+    CHECK_EQ(sum, N * M);
+
+    for (int i = 0; i < N; i++) {
+        for (int j = 0; j < M; j++) {
+            OAccessor[IAccessor[i][j]]--;
+        }
+    }
+
+    for (int i = 0; i < 256; i++) {
+        CHECK_EQ(OAccessor[i], 0);
+    }
+  };
+  Check(
+      TC,
+      "fun",
+      tc::CudaMappingOptions::makeNaiveCudaMappingOptions(),
+      inputs,
+      checkFun);
+}
+
+
 TEST_F(TcMapperTest, DISABLED_SpatialBatchNormalization) {
   N = 32;
   at::Tensor eps = at::CUDA(at::kFloat).rand({});

Original file line number	Diff line number	Diff line change
`@@ -386,8 +386,8 @@ struct Comprehension : public TreeView {`
`386`	`386`	`Ident ident() const {`
`387`	`387`	`return Ident(subtree(0));`
`388`	`388`	`}`
`389`		`- ListView<Ident> indices() const {`
`390`		`- return ListView<Ident>(subtree(1));`
	`389`	`+ ListView<TreeRef> indices() const {`
	`390`	`+ return ListView<TreeRef>(subtree(1));`
`391`	`391`	`}`
`392`	`392`	`// kind == '=', TK_PLUS_EQ, TK_PLUS_EQ_B, etc.`
`393`	`393`	`TreeRef assignment() const {`