Skip to content

Commit b806534

Browse files
committed
chore: codefmt
Signed-off-by: Kould <kould2333@gmail.com>
1 parent 79f3354 commit b806534

File tree

8 files changed

+101
-109
lines changed

8 files changed

+101
-109
lines changed

src/query/sql/src/planner/binder/ddl/index.rs

+8-2
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,8 @@ use crate::MetadataRef;
6565
use crate::RefreshAggregatingIndexRewriter;
6666
use crate::SUPPORTED_AGGREGATING_INDEX_FUNCTIONS;
6767

68-
const MAXIMUM_BLOOM_SIZE: usize = 1 << 30;
68+
const MAXIMUM_BLOOM_SIZE: u64 = 10 * 1024 * 1024;
69+
const MINIMUM_BLOOM_SIZE: u64 = 512;
6970

7071
// valid values for inverted index option tokenizer
7172
static INDEX_TOKENIZER_VALUES: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
@@ -599,13 +600,18 @@ impl Binder {
599600
options.insert("gram_size".to_string(), value);
600601
}
601602
"bloom_size" => {
602-
match value.parse::<usize>() {
603+
match value.parse::<u64>() {
603604
Ok(num) => {
604605
if num == 0 {
605606
return Err(ErrorCode::IndexOptionInvalid(
606607
"`bloom_size` cannot be 0",
607608
));
608609
}
610+
if num < MINIMUM_BLOOM_SIZE {
611+
return Err(ErrorCode::IndexOptionInvalid(format!(
612+
"bloom_size: `{num}` is too small (bloom_size is minimum: {MINIMUM_BLOOM_SIZE})",
613+
)));
614+
}
609615
if num > MAXIMUM_BLOOM_SIZE {
610616
return Err(ErrorCode::IndexOptionInvalid(format!(
611617
"bloom_size: `{num}` is too large (bloom_size is maximum: {MAXIMUM_BLOOM_SIZE})",

src/query/storages/common/index/src/bloom_index.rs

+57-49
Original file line numberDiff line numberDiff line change
@@ -562,13 +562,13 @@ impl BloomIndex {
562562

563563
pub struct BloomIndexBuilder {
564564
func_ctx: FunctionContext,
565-
columns: Vec<ColumnFilterBuilder>,
565+
bloom_columns: Vec<ColumnFilterBuilder>,
566+
ngram_columns: Vec<ColumnFilterBuilder>,
566567
}
567568

568569
struct ColumnFilterBuilder {
569570
index: FieldIndex,
570571
field: TableField,
571-
is_ngram: bool,
572572
gram_size: usize,
573573
builder: FilterImplBuilder,
574574
}
@@ -578,11 +578,11 @@ pub struct NgramArgs {
578578
index: FieldIndex,
579579
field: TableField,
580580
gram_size: usize,
581-
bloom_size: usize,
581+
bloom_size: u64,
582582
}
583583

584584
impl NgramArgs {
585-
pub fn new(index: FieldIndex, field: TableField, gram_size: usize, bloom_size: usize) -> Self {
585+
pub fn new(index: FieldIndex, field: TableField, gram_size: usize, bloom_size: u64) -> Self {
586586
Self {
587587
index,
588588
field,
@@ -599,7 +599,7 @@ impl NgramArgs {
599599
self.gram_size
600600
}
601601

602-
pub fn bloom_size(&self) -> usize {
602+
pub fn bloom_size(&self) -> u64 {
603603
self.bloom_size
604604
}
605605
}
@@ -610,21 +610,20 @@ impl BloomIndexBuilder {
610610
bloom_columns_map: BTreeMap<FieldIndex, TableField>,
611611
ngram_args: &[NgramArgs],
612612
) -> Result<Self> {
613-
let mut bloom_columns = Vec::with_capacity(bloom_columns_map.len() + ngram_args.len());
613+
let mut bloom_columns = Vec::with_capacity(bloom_columns_map.len());
614+
let mut ngram_columns = Vec::with_capacity(ngram_args.len());
614615
for (&index, field) in bloom_columns_map.iter() {
615616
bloom_columns.push(ColumnFilterBuilder {
616617
index,
617618
field: field.clone(),
618-
is_ngram: false,
619619
gram_size: 0,
620620
builder: FilterImplBuilder::Xor(Xor8Builder::create()),
621621
});
622622
}
623623
for arg in ngram_args.iter() {
624-
bloom_columns.push(ColumnFilterBuilder {
624+
ngram_columns.push(ColumnFilterBuilder {
625625
index: arg.index,
626626
field: arg.field.clone(),
627-
is_ngram: true,
628627
gram_size: arg.gram_size,
629628
builder: FilterImplBuilder::Ngram(BloomBuilder::create(
630629
arg.bloom_size,
@@ -635,7 +634,8 @@ impl BloomIndexBuilder {
635634

636635
Ok(Self {
637636
func_ctx,
638-
columns: bloom_columns,
637+
bloom_columns,
638+
ngram_columns,
639639
})
640640
}
641641
}
@@ -649,18 +649,12 @@ impl BloomIndexBuilder {
649649
return Ok(());
650650
}
651651

652-
let mut keys_to_remove = Vec::with_capacity(self.columns.len());
653-
654-
let (bloom_iter, ngram_iter): (Vec<_>, Vec<_>) = self
655-
.columns
656-
.iter_mut()
657-
.enumerate()
658-
.partition(|(_, column)| !column.is_ngram);
652+
let mut bloom_keys_to_remove = Vec::with_capacity(self.bloom_columns.len());
659653

660-
for (index, index_column) in bloom_iter {
654+
for (index, index_column) in self.bloom_columns.iter_mut().enumerate() {
661655
let field_type = &block.get_by_offset(index_column.index).data_type;
662656
if !Xor8Filter::supported_type(field_type) {
663-
keys_to_remove.push(index);
657+
bloom_keys_to_remove.push(index);
664658
continue;
665659
}
666660

@@ -708,22 +702,22 @@ impl BloomIndexBuilder {
708702
}
709703
let str_column = builder.build();
710704
if BloomIndex::check_large_string(&str_column) {
711-
keys_to_remove.push(index);
705+
bloom_keys_to_remove.push(index);
712706
continue;
713707
}
714708
let str_type = DataType::Nullable(Box::new(DataType::String));
715709
(str_column, str_type)
716710
} else {
717711
if BloomIndex::check_large_string(&column) {
718-
keys_to_remove.push(index);
712+
bloom_keys_to_remove.push(index);
719713
continue;
720714
}
721715
(column, val_type)
722716
}
723717
}
724718
_ => {
725719
if BloomIndex::check_large_string(&column) {
726-
keys_to_remove.push(index);
720+
bloom_keys_to_remove.push(index);
727721
continue;
728722
}
729723
(column, field_type.clone())
@@ -749,7 +743,7 @@ impl BloomIndexBuilder {
749743
index_column.builder.add_digests(column.deref());
750744
}
751745
}
752-
for (_, index_column) in ngram_iter {
746+
for index_column in self.ngram_columns.iter_mut() {
753747
let field_type = &block.get_by_offset(index_column.index).data_type;
754748
let column = match &block.get_by_offset(index_column.index).value {
755749
Value::Scalar(s) => {
@@ -770,36 +764,40 @@ impl BloomIndexBuilder {
770764
index_column.builder.add_digests(digests.iter())
771765
}
772766
}
773-
for k in keys_to_remove {
774-
self.columns.remove(k);
767+
for k in bloom_keys_to_remove {
768+
self.bloom_columns.remove(k);
775769
}
776770
Ok(())
777771
}
778772

779773
pub fn finalize(&mut self) -> Result<Option<BloomIndex>> {
780-
let mut column_distinct_count = HashMap::with_capacity(self.columns.len());
781-
let mut filters = Vec::with_capacity(self.columns.len());
782-
let mut filter_fields = Vec::with_capacity(self.columns.len());
783-
for column in self.columns.iter_mut() {
784-
let filter = column.builder.build()?;
785-
let filter_name = if column.is_ngram {
786-
BloomIndex::build_filter_ngram_name(&column.field, column.gram_size)
787-
} else {
788-
if let Some(len) = filter.len() {
789-
if !matches!(
790-
column.field.data_type().remove_nullable(),
791-
TableDataType::Map(_) | TableDataType::Variant
792-
) {
793-
column_distinct_count.insert(column.field.column_id, len);
794-
// Not need to generate bloom index,
795-
// it will never be used since range index is checked first.
796-
if len < 2 {
797-
continue;
798-
}
774+
let mut column_distinct_count = HashMap::with_capacity(self.columns_len());
775+
let mut filters = Vec::with_capacity(self.columns_len());
776+
let mut filter_fields = Vec::with_capacity(self.columns_len());
777+
for bloom_column in self.bloom_columns.iter_mut() {
778+
let filter = bloom_column.builder.build()?;
779+
if let Some(len) = filter.len() {
780+
if !matches!(
781+
bloom_column.field.data_type().remove_nullable(),
782+
TableDataType::Map(_) | TableDataType::Variant
783+
) {
784+
column_distinct_count.insert(bloom_column.field.column_id, len);
785+
// Not need to generate bloom index,
786+
// it will never be used since range index is checked first.
787+
if len < 2 {
788+
continue;
799789
}
800790
}
801-
BloomIndex::build_filter_bloom_name(BlockFilter::VERSION, &column.field)?
802-
};
791+
}
792+
let filter_name =
793+
BloomIndex::build_filter_bloom_name(BlockFilter::VERSION, &bloom_column.field)?;
794+
filter_fields.push(TableField::new(&filter_name, TableDataType::Binary));
795+
filters.push(Arc::new(filter));
796+
}
797+
for ngram_column in self.ngram_columns.iter_mut() {
798+
let filter = ngram_column.builder.build()?;
799+
let filter_name =
800+
BloomIndex::build_filter_ngram_name(&ngram_column.field, ngram_column.gram_size);
803801
filter_fields.push(TableField::new(&filter_name, TableDataType::Binary));
804802
filters.push(Arc::new(filter));
805803
}
@@ -816,6 +814,10 @@ impl BloomIndexBuilder {
816814
column_distinct_count,
817815
}))
818816
}
817+
818+
pub fn columns_len(&self) -> usize {
819+
self.bloom_columns.len() + self.ngram_columns.len()
820+
}
819821
}
820822

821823
struct Visitor<T: EqVisitor>(T);
@@ -1042,6 +1044,9 @@ trait EqVisitor {
10421044
return_type: &DataType,
10431045
is_like: bool,
10441046
) -> ResultRewrite {
1047+
if is_like {
1048+
return Ok(ControlFlow::Continue(None));
1049+
}
10451050
match &args[0] {
10461051
Expr::ColumnRef(ColumnRef { id, data_type, .. })
10471052
| Expr::Cast(Cast {
@@ -1172,6 +1177,9 @@ impl EqVisitor for RewriteVisitor<'_> {
11721177
constant: &Constant,
11731178
is_like: bool,
11741179
) -> ResultRewrite {
1180+
if is_like {
1181+
return Ok(ControlFlow::Continue(None));
1182+
}
11751183
let Expr::Cast(Cast {
11761184
span,
11771185
is_try: false,
@@ -1283,6 +1291,9 @@ impl EqVisitor for ShortListVisitor {
12831291
constant: &Constant,
12841292
is_like: bool,
12851293
) -> ResultRewrite {
1294+
if is_like {
1295+
return Ok(ControlFlow::Continue(None));
1296+
}
12861297
let Expr::Cast(Cast {
12871298
is_try: false,
12881299
expr:
@@ -1302,9 +1313,6 @@ impl EqVisitor for ShortListVisitor {
13021313
let Some((i, field)) = Self::found_field(&self.ngram_fields, id) else {
13031314
return Ok(ControlFlow::Break(None));
13041315
};
1305-
if !Xor8Filter::supported_type(src_type) || !is_injective_cast(src_type, dest_type) {
1306-
return Ok(ControlFlow::Break(None));
1307-
}
13081316

13091317
let Some(s) = cast_const(
13101318
&FunctionContext::default(),

0 commit comments

Comments
 (0)