Skip to content

Commit e0c6088

Browse files
authored
[mlir][nfc] Update vectorize-tensor-extract.mlir (3/N) (#119121)
Tests in "vectorize-tensor-extract.mlir" are inconsistent and would benefit from refactoring to: * Clearly categorize tests into "contiguous load," "gather load," and "scalar load + broadcast" cases, reflecting the structure of tensor.extract vectorization. * Unify variable naming (both MLIR and FileCheck). * Ensure all tests exercise unmasked vectorization (masked vectorization is covered in "vectorize-tensor-extract-masked.mlir"). * Improve and standardize formatting. These changes will make it easier to identify the test cases being exercised and simplify future maintenance or refactoring. This is patch 3/N in the series. Below is a summary of the changes in this patch. ---------------------------------------------------------------------- Summary of patch 3/N ---------------------------------------------------------------------- * Cluster all tests for "scalar load + broadcast" together * Unify MLIR and FileCheck variable names, e.g. `%input`, `%output` -> `%src`, `%init`. Note, I haven't changed test function names to make it easier to track changes (this PR is mostly about moving code). I will send a seperate PR to rename the tests. ---------------------------------------------------------------------- Previous patches ---------------------------------------------------------------------- * #118977 * #119080
1 parent d416cae commit e0c6088

File tree

1 file changed

+141
-136
lines changed

1 file changed

+141
-136
lines changed

mlir/test/Dialect/Linalg/vectorize-tensor-extract.mlir

+141-136
Original file line numberDiff line numberDiff line change
@@ -31,83 +31,6 @@ func.func @vectorize_1d_tensor_extract(%arg0: tensor<3xf32>, %arg1: tensor<4x3xi
3131

3232
// -----
3333

34-
#map = affine_map<() -> ()>
35-
func.func @extract_scalar_from_0d_into_0d(%src: tensor<f32>, %init: tensor<f32>) -> tensor<f32> {
36-
%res = linalg.generic {
37-
indexing_maps = [#map],
38-
iterator_types = []
39-
} outs(%init : tensor<f32>) {
40-
^bb0(%in: f32):
41-
%1 = tensor.extract %src[] : tensor<f32>
42-
linalg.yield %1 : f32
43-
} -> tensor<f32>
44-
45-
return %res : tensor<f32>
46-
}
47-
48-
// CHECK-LABEL: func.func @extract_scalar_from_0d_into_0d(
49-
// CHECK-SAME: %[[SRC:.*]]: tensor<f32>,
50-
// CHECK-SAME: %[[INIT:.*]]: tensor<f32>) -> tensor<f32> {
51-
// CHECK: %[[PAD:.*]] = arith.constant 0.000000e+00 : f32
52-
// CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]][], %[[PAD]] : tensor<f32>, vector<f32>
53-
// CHECK: vector.transfer_write %[[READ]], %[[INIT]][] : vector<f32>, tensor<f32>
54-
55-
// -----
56-
57-
#map = affine_map<(n) -> (n)>
58-
func.func @extract_scalar_from_0d_into_1d(%src: tensor<f32>, %init: tensor<1xf32>) -> tensor<1xf32> {
59-
%res = linalg.generic {
60-
indexing_maps = [#map],
61-
iterator_types = ["parallel"]
62-
} outs(%init : tensor<1xf32>) {
63-
^bb0(%in: f32):
64-
%1 = tensor.extract %src[] : tensor<f32>
65-
linalg.yield %1 : f32
66-
} -> tensor<1xf32>
67-
68-
return %res : tensor<1xf32>
69-
}
70-
// CHECK-LABEL: func.func @extract_scalar_from_0d_into_1d(
71-
// CHECK-SAME: %[[SRC:.*]]: tensor<f32>,
72-
// CHECK-SAME: %[[INIT:.*]]: tensor<1xf32>) -> tensor<1xf32> {
73-
// CHECK: %[[C0:.*]] = arith.constant 0 : index
74-
// CHECK: %[[PAD:.*]] = arith.constant 0.000000e+00 : f32
75-
// CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]][], %[[PAD]] : tensor<f32>, vector<f32>
76-
// CHECK: %[[READ_BCAST:.*]] = vector.broadcast %[[READ]] : vector<f32> to vector<1xf32>
77-
// CHECK: vector.transfer_write %[[READ_BCAST]], %[[INIT]][%[[C0]]] {in_bounds = [true]} : vector<1xf32>, tensor<1xf32>
78-
79-
// -----
80-
81-
#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
82-
func.func @vectorize_nd_tensor_extract_scalar_broadcast(%src: tensor<3x3xf32>, %init: tensor<1x1x3xf32>) -> tensor<1x1x3xf32> {
83-
%c0 = arith.constant 1 : index
84-
%c1 = arith.constant 2 : index
85-
86-
%res = linalg.generic {
87-
indexing_maps = [#map],
88-
iterator_types = ["parallel", "parallel", "parallel"]
89-
} outs(%init : tensor<1x1x3xf32>) {
90-
^bb0(%arg4: f32):
91-
%1 = tensor.extract %src[%c0, %c1] : tensor<3x3xf32>
92-
linalg.yield %1 : f32
93-
} -> tensor<1x1x3xf32>
94-
95-
return %res : tensor<1x1x3xf32>
96-
}
97-
98-
// CHECK-LABEL: func.func @vectorize_nd_tensor_extract_scalar_broadcast(
99-
// CHECK-SAME: %[[SRC:.*]]: tensor<3x3xf32>,
100-
// CHECK-SAME: %[[INIT:.*]]: tensor<1x1x3xf32>) -> tensor<1x1x3xf32> {
101-
// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
102-
// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
103-
// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
104-
// CHECK-DAG: %[[PAD:.*]] = arith.constant 0.000000e+00 : f32
105-
// CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]][%[[C1]], %[[C2]]], %[[PAD]] : tensor<3x3xf32>, vector<f32>
106-
// CHECK: %[[READ_BCAST:.*]] = vector.broadcast %[[READ]] : vector<f32> to vector<1x1x3xf32>
107-
// CHECK: vector.transfer_write %[[READ_BCAST]], %[[INIT]][%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x1x3xf32>, tensor<1x1x3xf32>
108-
109-
// -----
110-
11134
#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
11235
func.func @vectorize_nd_tensor_extract_transfer_read_basic(
11336
%arg0: tensor<3x3x3xf32>,
@@ -144,37 +67,6 @@ func.func @vectorize_nd_tensor_extract_transfer_read_basic(
14467
// CHECK: %[[READ:.*]] = vector.transfer_read %[[ARG0]][%[[IDX1]], %[[IDX2]], %[[IDX3]]], %[[CST]] {in_bounds = [true, true, true]} : tensor<3x3x3xf32>, vector<1x1x3xf32>
14568
// CHECK: vector.transfer_write %[[READ]], %[[ARG1]][%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x1x3xf32>, tensor<1x1x3xf32>
14669

147-
// Same as example above, but reading into a column tensor.
148-
149-
// TODO: Currently this fails to vectorise when the indices are non-constant.
150-
151-
func.func @vectorize_nd_tensor_extract_transfer_read_basic_column(
152-
%input: tensor<3x3x3xf32>,
153-
%output: tensor<3x1x1xf32>) -> tensor<3x1x1xf32> {
154-
155-
%c0 = arith.constant 0 : index
156-
%res = linalg.generic {
157-
indexing_maps = [#map],
158-
iterator_types = ["parallel", "parallel", "parallel"]
159-
} outs(%output : tensor<3x1x1xf32>) {
160-
^bb0(%out: f32):
161-
%5 = tensor.extract %input[%c0, %c0, %c0] : tensor<3x3x3xf32>
162-
linalg.yield %5 : f32
163-
} -> tensor<3x1x1xf32>
164-
165-
return %res : tensor<3x1x1xf32>
166-
}
167-
168-
// CHECK-LABEL: func.func @vectorize_nd_tensor_extract_transfer_read_basic_column(
169-
// CHECK-SAME: %[[INPUT:.*]]: tensor<3x3x3xf32>,
170-
// CHECK-SAME: %[[OUTPUT:.*]]: tensor<3x1x1xf32>)
171-
// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
172-
// CHECK-DAG: %[[CST_0:.*]] = arith.constant 0.000000e+00 : f32
173-
// CHECK: %[[READ:.*]] = vector.transfer_read %[[INPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]], %[[CST_0]] : tensor<3x3x3xf32>, vector<f32>
174-
// CHECK: %[[BCAST:.*]] = vector.broadcast %[[READ]] : vector<f32> to vector<3x1x1xf32>
175-
// CHECK: %[[RES:.*]] = vector.transfer_write %[[BCAST]], %[[OUTPUT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<3x1x1xf32>, tensor<3x1x1xf32>
176-
// CHECK: return %[[RES]] : tensor<3x1x1xf32>
177-
17870
// -----
17971

18072
func.func @vectorize_nd_tensor_extract_transfer_read_complex(%6: tensor<45x80x16xf32>, %arg0: index, %arg2: index, %arg1: index, %arg4: index, %extracted_slice : tensor<1x4xf32>) -> tensor<1x4xf32> {
@@ -620,26 +512,6 @@ func.func @vectorize_nd_tensor_extract_block_arg(%arg0: tensor<5x6xf32>, %arg1:
620512

621513
// -----
622514

623-
#map1 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
624-
func.func @vectorize_0d_tensor_extract(%arg0: tensor<f32>, %arg2: tensor<1x1x3xf32>) -> tensor<1x1x3xf32> {
625-
%2 = linalg.generic {
626-
indexing_maps = [#map1],
627-
iterator_types = ["parallel", "parallel", "parallel"]
628-
} outs(%arg2 : tensor<1x1x3xf32>) {
629-
^bb0(%arg4: f32):
630-
%7 = tensor.extract %arg0[] : tensor<f32>
631-
linalg.yield %7 : f32
632-
} -> tensor<1x1x3xf32>
633-
return %2 : tensor<1x1x3xf32>
634-
}
635-
636-
// CHECK-LABEL: func.func @vectorize_0d_tensor_extract(
637-
// CHECK-SAME: %[[ARG_0:.*]]: tensor<f32>
638-
// CHECK: %[[EXTRACT:.*]] = vector.transfer_read %[[ARG_0]][], %{{.+}} : tensor<f32>
639-
// CHECK: vector.broadcast %[[EXTRACT]] : vector<f32> to vector<1x1x3xf32>
640-
641-
// -----
642-
643515
#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
644516
#map1 = affine_map<(d0, d1, d2) -> (d0 + d1 + d2)>
645517
func.func @vectorize_reverse_like_tensor_extract(%arg0: tensor<1x2x3xf32>, %arg1: tensor<1x1x3xf32>, %arg2: index) -> tensor<1x1x3xf32> {
@@ -674,17 +546,118 @@ func.func @vectorize_reverse_like_tensor_extract(%arg0: tensor<1x2x3xf32>, %arg1
674546
// CHECK: %[[GATHER:.*]] = vector.gather %[[ARG0]][%[[C0]], %[[C0]], %[[C0]]] [%[[T3]]], %[[MASK]], %[[PASSTHRU]]
675547
// CHECK: vector.transfer_write %[[GATHER]]
676548

549+
//===----------------------------------------------------------------------===//
550+
// Scalar load + broadcast
551+
//===----------------------------------------------------------------------===//
552+
553+
// -----
554+
555+
#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
556+
func.func @vectorize_nd_tensor_extract_scalar_broadcast(%src: tensor<3x3xf32>, %init: tensor<1x1x3xf32>) -> tensor<1x1x3xf32> {
557+
%c0 = arith.constant 1 : index
558+
%c1 = arith.constant 2 : index
559+
560+
%res = linalg.generic {
561+
indexing_maps = [#map],
562+
iterator_types = ["parallel", "parallel", "parallel"]
563+
} outs(%init : tensor<1x1x3xf32>) {
564+
^bb0(%arg4: f32):
565+
%1 = tensor.extract %src[%c0, %c1] : tensor<3x3xf32>
566+
linalg.yield %1 : f32
567+
} -> tensor<1x1x3xf32>
568+
569+
return %res : tensor<1x1x3xf32>
570+
}
571+
572+
// CHECK-LABEL: func.func @vectorize_nd_tensor_extract_scalar_broadcast(
573+
// CHECK-SAME: %[[SRC:.*]]: tensor<3x3xf32>,
574+
// CHECK-SAME: %[[INIT:.*]]: tensor<1x1x3xf32>) -> tensor<1x1x3xf32> {
575+
// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
576+
// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
577+
// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
578+
// CHECK-DAG: %[[PAD:.*]] = arith.constant 0.000000e+00 : f32
579+
// CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]][%[[C1]], %[[C2]]], %[[PAD]] : tensor<3x3xf32>, vector<f32>
580+
// CHECK: %[[READ_BCAST:.*]] = vector.broadcast %[[READ]] : vector<f32> to vector<1x1x3xf32>
581+
// CHECK: vector.transfer_write %[[READ_BCAST]], %[[INIT]][%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x1x3xf32>, tensor<1x1x3xf32>
582+
677583
// -----
678584

679-
func.func @vectorize_scalar_read_with_broadcast_from_column_tensor(%init: tensor<1x1x4xi32>) -> tensor<1x1x4xi32> {
585+
#map = affine_map<() -> ()>
586+
func.func @extract_scalar_from_0d_into_0d(%src: tensor<f32>, %init: tensor<f32>) -> tensor<f32> {
587+
%res = linalg.generic {
588+
indexing_maps = [#map],
589+
iterator_types = []
590+
} outs(%init : tensor<f32>) {
591+
^bb0(%in: f32):
592+
%1 = tensor.extract %src[] : tensor<f32>
593+
linalg.yield %1 : f32
594+
} -> tensor<f32>
595+
596+
return %res : tensor<f32>
597+
}
598+
599+
// CHECK-LABEL: func.func @extract_scalar_from_0d_into_0d(
600+
// CHECK-SAME: %[[SRC:.*]]: tensor<f32>,
601+
// CHECK-SAME: %[[INIT:.*]]: tensor<f32>) -> tensor<f32> {
602+
// CHECK: %[[PAD:.*]] = arith.constant 0.000000e+00 : f32
603+
// CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]][], %[[PAD]] : tensor<f32>, vector<f32>
604+
// CHECK: vector.transfer_write %[[READ]], %[[INIT]][] : vector<f32>, tensor<f32>
605+
606+
// -----
607+
608+
#map = affine_map<(n) -> (n)>
609+
func.func @extract_scalar_from_0d_into_1d(%src: tensor<f32>, %init: tensor<1xf32>) -> tensor<1xf32> {
610+
%res = linalg.generic {
611+
indexing_maps = [#map],
612+
iterator_types = ["parallel"]
613+
} outs(%init : tensor<1xf32>) {
614+
^bb0(%in: f32):
615+
%1 = tensor.extract %src[] : tensor<f32>
616+
linalg.yield %1 : f32
617+
} -> tensor<1xf32>
618+
619+
return %res : tensor<1xf32>
620+
}
621+
// CHECK-LABEL: func.func @extract_scalar_from_0d_into_1d(
622+
// CHECK-SAME: %[[SRC:.*]]: tensor<f32>,
623+
// CHECK-SAME: %[[INIT:.*]]: tensor<1xf32>) -> tensor<1xf32> {
624+
// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
625+
// CHECK-DAG: %[[PAD:.*]] = arith.constant 0.000000e+00 : f32
626+
// CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]][], %[[PAD]] : tensor<f32>, vector<f32>
627+
// CHECK: %[[READ_BCAST:.*]] = vector.broadcast %[[READ]] : vector<f32> to vector<1xf32>
628+
// CHECK: vector.transfer_write %[[READ_BCAST]], %[[INIT]][%[[C0]]] {in_bounds = [true]} : vector<1xf32>, tensor<1xf32>
629+
630+
// -----
631+
632+
#map1 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
633+
func.func @vectorize_0d_tensor_extract(%src: tensor<f32>, %init: tensor<1x1x3xf32>) -> tensor<1x1x3xf32> {
634+
%res = linalg.generic {
635+
indexing_maps = [#map1],
636+
iterator_types = ["parallel", "parallel", "parallel"]
637+
} outs(%init : tensor<1x1x3xf32>) {
638+
^bb0(%arg4: f32):
639+
%1 = tensor.extract %src[] : tensor<f32>
640+
linalg.yield %1 : f32
641+
} -> tensor<1x1x3xf32>
642+
return %res : tensor<1x1x3xf32>
643+
}
644+
645+
// CHECK-LABEL: func.func @vectorize_0d_tensor_extract(
646+
// CHECK-SAME: %[[SRC:.*]]: tensor<f32>
647+
// CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]][], %{{.+}} : tensor<f32>
648+
// CHECK: vector.broadcast %[[READ]] : vector<f32> to vector<1x1x3xf32>
649+
650+
// -----
651+
652+
func.func @scalar_read_with_broadcast_from_column_tensor(%init: tensor<1x1x4xi32>) -> tensor<1x1x4xi32> {
680653
%c4 = arith.constant 4 : index
681654
%c0 = arith.constant 0 : index
682655
%src = arith.constant dense<[[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14]]> : tensor<15x1xi32>
683656

684657
%res = linalg.generic {
685658
indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>],
686-
iterator_types = ["parallel", "parallel", "parallel"]}
687-
outs(%init : tensor<1x1x4xi32>) {
659+
iterator_types = ["parallel", "parallel", "parallel"]
660+
} outs(%init : tensor<1x1x4xi32>) {
688661

689662
^bb0(%out: i32):
690663
%idx = linalg.index 0 : index
@@ -695,13 +668,45 @@ func.func @vectorize_scalar_read_with_broadcast_from_column_tensor(%init: tensor
695668
return %res : tensor<1x1x4xi32>
696669
}
697670

698-
// CHECK-LABEL: func.func @vectorize_scalar_read_with_broadcast_from_column_tensor(
671+
// CHECK-LABEL: func.func @scalar_read_with_broadcast_from_column_tensor
699672
// CHECK-SAME: %[[INIT:.*]]: tensor<1x1x4xi32>) -> tensor<1x1x4xi32> {
700-
// CHECK: %[[PAD:.*]] = arith.constant 0 : i32
701-
// CHECK: %[[C0:.*]] = arith.constant 0 : index
702-
// CHECK: %[[SRC:.*]] = arith.constant dense<{{\[\[}}0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14]]> : tensor<15x1xi32>
703-
// CHECK: %[[IDX_VEC:.*]] = arith.constant dense<0> : vector<1xindex>
673+
// CHECK-DAG: %[[PAD:.*]] = arith.constant 0 : i32
674+
// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
675+
// CHECK-DAG: %[[SRC:.*]] = arith.constant dense<{{\[\[}}0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14]]> : tensor<15x1xi32>
676+
// CHECK-DAG: %[[IDX_VEC:.*]] = arith.constant dense<0> : vector<1xindex>
704677
// CHECK: %[[IDX_ELT:.*]] = vector.extract %[[IDX_VEC]][0] : index from vector<1xindex>
705678
// CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]]{{\[}}%[[IDX_ELT]], %[[C0]]], %[[PAD]] : tensor<15x1xi32>, vector<i32>
706679
// CHECK: %[[READ_BCAST:.*]] = vector.broadcast %[[READ]] : vector<i32> to vector<1x1x4xi32>
707680
// CHECK: %[[RES:.*]] = vector.transfer_write %[[READ_BCAST]], %[[INIT]][%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<1x1x4xi32>, tensor<1x1x4xi32>
681+
682+
// -----
683+
684+
// TODO: Currently this fails to vectorise when the indices are non-constant.
685+
686+
#map = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
687+
func.func @vectorize_nd_tensor_extract_transfer_read_basic_column(
688+
%src: tensor<3x3x3xf32>,
689+
%init: tensor<3x1x1xf32>) -> tensor<3x1x1xf32> {
690+
691+
%c0 = arith.constant 0 : index
692+
693+
%res = linalg.generic {
694+
indexing_maps = [#map],
695+
iterator_types = ["parallel", "parallel", "parallel"]
696+
} outs(%init : tensor<3x1x1xf32>) {
697+
^bb0(%out: f32):
698+
%1 = tensor.extract %src[%c0, %c0, %c0] : tensor<3x3x3xf32>
699+
linalg.yield %1 : f32
700+
} -> tensor<3x1x1xf32>
701+
702+
return %res : tensor<3x1x1xf32>
703+
}
704+
705+
// CHECK-LABEL: func.func @vectorize_nd_tensor_extract_transfer_read_basic_column(
706+
// CHECK-SAME: %[[SRC:.*]]: tensor<3x3x3xf32>,
707+
// CHECK-SAME: %[[INIT:.*]]: tensor<3x1x1xf32>)
708+
// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
709+
// CHECK-DAG: %[[CST_0:.*]] = arith.constant 0.000000e+00 : f32
710+
// CHECK: %[[READ:.*]] = vector.transfer_read %[[SRC]][%[[C0]], %[[C0]], %[[C0]]], %[[CST_0]] : tensor<3x3x3xf32>, vector<f32>
711+
// CHECK: %[[READ_BCAST:.*]] = vector.broadcast %[[READ]] : vector<f32> to vector<3x1x1xf32>
712+
// CHECK: vector.transfer_write %[[READ_BCAST]], %[[INIT]]{{\[}}%[[C0]], %[[C0]], %[[C0]]] {in_bounds = [true, true, true]} : vector<3x1x1xf32>, tensor<3x1x1xf32>

0 commit comments

Comments
 (0)