@@ -562,13 +562,13 @@ impl BloomIndex {
562
562
563
563
pub struct BloomIndexBuilder {
564
564
func_ctx : FunctionContext ,
565
- columns : Vec < ColumnFilterBuilder > ,
565
+ bloom_columns : Vec < ColumnFilterBuilder > ,
566
+ ngram_columns : Vec < ColumnFilterBuilder > ,
566
567
}
567
568
568
569
struct ColumnFilterBuilder {
569
570
index : FieldIndex ,
570
571
field : TableField ,
571
- is_ngram : bool ,
572
572
gram_size : usize ,
573
573
builder : FilterImplBuilder ,
574
574
}
@@ -578,11 +578,11 @@ pub struct NgramArgs {
578
578
index : FieldIndex ,
579
579
field : TableField ,
580
580
gram_size : usize ,
581
- bloom_size : usize ,
581
+ bloom_size : u64 ,
582
582
}
583
583
584
584
impl NgramArgs {
585
- pub fn new ( index : FieldIndex , field : TableField , gram_size : usize , bloom_size : usize ) -> Self {
585
+ pub fn new ( index : FieldIndex , field : TableField , gram_size : usize , bloom_size : u64 ) -> Self {
586
586
Self {
587
587
index,
588
588
field,
@@ -599,7 +599,7 @@ impl NgramArgs {
599
599
self . gram_size
600
600
}
601
601
602
- pub fn bloom_size ( & self ) -> usize {
602
+ pub fn bloom_size ( & self ) -> u64 {
603
603
self . bloom_size
604
604
}
605
605
}
@@ -610,21 +610,20 @@ impl BloomIndexBuilder {
610
610
bloom_columns_map : BTreeMap < FieldIndex , TableField > ,
611
611
ngram_args : & [ NgramArgs ] ,
612
612
) -> Result < Self > {
613
- let mut bloom_columns = Vec :: with_capacity ( bloom_columns_map. len ( ) + ngram_args. len ( ) ) ;
613
+ let mut bloom_columns = Vec :: with_capacity ( bloom_columns_map. len ( ) ) ;
614
+ let mut ngram_columns = Vec :: with_capacity ( ngram_args. len ( ) ) ;
614
615
for ( & index, field) in bloom_columns_map. iter ( ) {
615
616
bloom_columns. push ( ColumnFilterBuilder {
616
617
index,
617
618
field : field. clone ( ) ,
618
- is_ngram : false ,
619
619
gram_size : 0 ,
620
620
builder : FilterImplBuilder :: Xor ( Xor8Builder :: create ( ) ) ,
621
621
} ) ;
622
622
}
623
623
for arg in ngram_args. iter ( ) {
624
- bloom_columns . push ( ColumnFilterBuilder {
624
+ ngram_columns . push ( ColumnFilterBuilder {
625
625
index : arg. index ,
626
626
field : arg. field . clone ( ) ,
627
- is_ngram : true ,
628
627
gram_size : arg. gram_size ,
629
628
builder : FilterImplBuilder :: Ngram ( BloomBuilder :: create (
630
629
arg. bloom_size ,
@@ -635,7 +634,8 @@ impl BloomIndexBuilder {
635
634
636
635
Ok ( Self {
637
636
func_ctx,
638
- columns : bloom_columns,
637
+ bloom_columns,
638
+ ngram_columns,
639
639
} )
640
640
}
641
641
}
@@ -649,18 +649,12 @@ impl BloomIndexBuilder {
649
649
return Ok ( ( ) ) ;
650
650
}
651
651
652
- let mut keys_to_remove = Vec :: with_capacity ( self . columns . len ( ) ) ;
653
-
654
- let ( bloom_iter, ngram_iter) : ( Vec < _ > , Vec < _ > ) = self
655
- . columns
656
- . iter_mut ( )
657
- . enumerate ( )
658
- . partition ( |( _, column) | !column. is_ngram ) ;
652
+ let mut bloom_keys_to_remove = Vec :: with_capacity ( self . bloom_columns . len ( ) ) ;
659
653
660
- for ( index, index_column) in bloom_iter {
654
+ for ( index, index_column) in self . bloom_columns . iter_mut ( ) . enumerate ( ) {
661
655
let field_type = & block. get_by_offset ( index_column. index ) . data_type ;
662
656
if !Xor8Filter :: supported_type ( field_type) {
663
- keys_to_remove . push ( index) ;
657
+ bloom_keys_to_remove . push ( index) ;
664
658
continue ;
665
659
}
666
660
@@ -708,22 +702,22 @@ impl BloomIndexBuilder {
708
702
}
709
703
let str_column = builder. build ( ) ;
710
704
if BloomIndex :: check_large_string ( & str_column) {
711
- keys_to_remove . push ( index) ;
705
+ bloom_keys_to_remove . push ( index) ;
712
706
continue ;
713
707
}
714
708
let str_type = DataType :: Nullable ( Box :: new ( DataType :: String ) ) ;
715
709
( str_column, str_type)
716
710
} else {
717
711
if BloomIndex :: check_large_string ( & column) {
718
- keys_to_remove . push ( index) ;
712
+ bloom_keys_to_remove . push ( index) ;
719
713
continue ;
720
714
}
721
715
( column, val_type)
722
716
}
723
717
}
724
718
_ => {
725
719
if BloomIndex :: check_large_string ( & column) {
726
- keys_to_remove . push ( index) ;
720
+ bloom_keys_to_remove . push ( index) ;
727
721
continue ;
728
722
}
729
723
( column, field_type. clone ( ) )
@@ -749,7 +743,7 @@ impl BloomIndexBuilder {
749
743
index_column. builder . add_digests ( column. deref ( ) ) ;
750
744
}
751
745
}
752
- for ( _ , index_column) in ngram_iter {
746
+ for index_column in self . ngram_columns . iter_mut ( ) {
753
747
let field_type = & block. get_by_offset ( index_column. index ) . data_type ;
754
748
let column = match & block. get_by_offset ( index_column. index ) . value {
755
749
Value :: Scalar ( s) => {
@@ -770,36 +764,40 @@ impl BloomIndexBuilder {
770
764
index_column. builder . add_digests ( digests. iter ( ) )
771
765
}
772
766
}
773
- for k in keys_to_remove {
774
- self . columns . remove ( k) ;
767
+ for k in bloom_keys_to_remove {
768
+ self . bloom_columns . remove ( k) ;
775
769
}
776
770
Ok ( ( ) )
777
771
}
778
772
779
773
pub fn finalize ( & mut self ) -> Result < Option < BloomIndex > > {
780
- let mut column_distinct_count = HashMap :: with_capacity ( self . columns . len ( ) ) ;
781
- let mut filters = Vec :: with_capacity ( self . columns . len ( ) ) ;
782
- let mut filter_fields = Vec :: with_capacity ( self . columns . len ( ) ) ;
783
- for column in self . columns . iter_mut ( ) {
784
- let filter = column. builder . build ( ) ?;
785
- let filter_name = if column. is_ngram {
786
- BloomIndex :: build_filter_ngram_name ( & column. field , column. gram_size )
787
- } else {
788
- if let Some ( len) = filter. len ( ) {
789
- if !matches ! (
790
- column. field. data_type( ) . remove_nullable( ) ,
791
- TableDataType :: Map ( _) | TableDataType :: Variant
792
- ) {
793
- column_distinct_count. insert ( column. field . column_id , len) ;
794
- // Not need to generate bloom index,
795
- // it will never be used since range index is checked first.
796
- if len < 2 {
797
- continue ;
798
- }
774
+ let mut column_distinct_count = HashMap :: with_capacity ( self . columns_len ( ) ) ;
775
+ let mut filters = Vec :: with_capacity ( self . columns_len ( ) ) ;
776
+ let mut filter_fields = Vec :: with_capacity ( self . columns_len ( ) ) ;
777
+ for bloom_column in self . bloom_columns . iter_mut ( ) {
778
+ let filter = bloom_column. builder . build ( ) ?;
779
+ if let Some ( len) = filter. len ( ) {
780
+ if !matches ! (
781
+ bloom_column. field. data_type( ) . remove_nullable( ) ,
782
+ TableDataType :: Map ( _) | TableDataType :: Variant
783
+ ) {
784
+ column_distinct_count. insert ( bloom_column. field . column_id , len) ;
785
+ // Not need to generate bloom index,
786
+ // it will never be used since range index is checked first.
787
+ if len < 2 {
788
+ continue ;
799
789
}
800
790
}
801
- BloomIndex :: build_filter_bloom_name ( BlockFilter :: VERSION , & column. field ) ?
802
- } ;
791
+ }
792
+ let filter_name =
793
+ BloomIndex :: build_filter_bloom_name ( BlockFilter :: VERSION , & bloom_column. field ) ?;
794
+ filter_fields. push ( TableField :: new ( & filter_name, TableDataType :: Binary ) ) ;
795
+ filters. push ( Arc :: new ( filter) ) ;
796
+ }
797
+ for ngram_column in self . ngram_columns . iter_mut ( ) {
798
+ let filter = ngram_column. builder . build ( ) ?;
799
+ let filter_name =
800
+ BloomIndex :: build_filter_ngram_name ( & ngram_column. field , ngram_column. gram_size ) ;
803
801
filter_fields. push ( TableField :: new ( & filter_name, TableDataType :: Binary ) ) ;
804
802
filters. push ( Arc :: new ( filter) ) ;
805
803
}
@@ -816,6 +814,10 @@ impl BloomIndexBuilder {
816
814
column_distinct_count,
817
815
} ) )
818
816
}
817
+
818
+ pub fn columns_len ( & self ) -> usize {
819
+ self . bloom_columns . len ( ) + self . ngram_columns . len ( )
820
+ }
819
821
}
820
822
821
823
struct Visitor < T : EqVisitor > ( T ) ;
@@ -1042,6 +1044,9 @@ trait EqVisitor {
1042
1044
return_type : & DataType ,
1043
1045
is_like : bool ,
1044
1046
) -> ResultRewrite {
1047
+ if is_like {
1048
+ return Ok ( ControlFlow :: Continue ( None ) ) ;
1049
+ }
1045
1050
match & args[ 0 ] {
1046
1051
Expr :: ColumnRef ( ColumnRef { id, data_type, .. } )
1047
1052
| Expr :: Cast ( Cast {
@@ -1172,6 +1177,9 @@ impl EqVisitor for RewriteVisitor<'_> {
1172
1177
constant : & Constant ,
1173
1178
is_like : bool ,
1174
1179
) -> ResultRewrite {
1180
+ if is_like {
1181
+ return Ok ( ControlFlow :: Continue ( None ) ) ;
1182
+ }
1175
1183
let Expr :: Cast ( Cast {
1176
1184
span,
1177
1185
is_try : false ,
@@ -1283,6 +1291,9 @@ impl EqVisitor for ShortListVisitor {
1283
1291
constant : & Constant ,
1284
1292
is_like : bool ,
1285
1293
) -> ResultRewrite {
1294
+ if is_like {
1295
+ return Ok ( ControlFlow :: Continue ( None ) ) ;
1296
+ }
1286
1297
let Expr :: Cast ( Cast {
1287
1298
is_try : false ,
1288
1299
expr :
@@ -1302,9 +1313,6 @@ impl EqVisitor for ShortListVisitor {
1302
1313
let Some ( ( i, field) ) = Self :: found_field ( & self . ngram_fields , id) else {
1303
1314
return Ok ( ControlFlow :: Break ( None ) ) ;
1304
1315
} ;
1305
- if !Xor8Filter :: supported_type ( src_type) || !is_injective_cast ( src_type, dest_type) {
1306
- return Ok ( ControlFlow :: Break ( None ) ) ;
1307
- }
1308
1316
1309
1317
let Some ( s) = cast_const (
1310
1318
& FunctionContext :: default ( ) ,
0 commit comments