@@ -546,17 +546,24 @@ impl JoinHashTable {
546
546
let probe_indexes = & mut probe_state. probe_indexs ;
547
547
let valids = & probe_state. valids ;
548
548
let mut validity = MutableBitmap :: new ( ) ;
549
+ let mut row_state = std:: collections:: HashMap :: new ( ) ;
549
550
for ( i, key) in keys_iter. enumerate ( ) {
550
551
let probe_result_ptr = Self :: probe_key ( hash_table, key, valids, i) ;
551
552
if let Some ( v) = probe_result_ptr {
552
553
let probe_result_ptrs = v. get_value ( ) ;
553
554
build_indexes. extend_from_slice ( probe_result_ptrs) ;
555
+ for row_ptr in probe_result_ptrs. iter ( ) {
556
+ row_state
557
+ . entry ( row_ptr. clone ( ) )
558
+ . and_modify ( |e| * e += 1 )
559
+ . or_insert ( 1 as usize ) ;
560
+ }
554
561
probe_indexes. extend ( std:: iter:: repeat ( i as u32 ) . take ( probe_result_ptrs. len ( ) ) ) ;
555
562
validity. extend_constant ( probe_result_ptrs. len ( ) , true ) ;
556
563
}
557
564
}
558
565
559
- // For right join, build side will always appear in the joined table
566
+ // For right join, build side will appear at lease once in the joined table
560
567
// Find the unmatched rows in build side
561
568
let mut unmatched_build_indexes = vec ! [ ] ;
562
569
for kv in hash_table. iter ( ) {
@@ -591,7 +598,45 @@ impl JoinHashTable {
591
598
) ;
592
599
593
600
nullable_probe_block = DataBlock :: concat_blocks ( & [ nullable_probe_block, null_probe_block] ) ?;
594
- let merged_block = self . merge_eq_block ( & nullable_probe_block, & build_block) ?;
601
+ let mut merged_block = self . merge_eq_block ( & nullable_probe_block, & build_block) ?;
602
+ if !WITH_OTHER_CONJUNCT {
603
+ return Ok ( merged_block) ;
604
+ }
605
+
606
+ let ( bm, all_true, all_false) = self . get_other_filters (
607
+ & merged_block,
608
+ self . hash_join_desc . other_predicate . as_ref ( ) . unwrap ( ) ,
609
+ ) ?;
610
+
611
+ if all_true {
612
+ return Ok ( merged_block) ;
613
+ }
614
+
615
+ let validity = match ( bm, all_false) {
616
+ ( Some ( b) , _) => b,
617
+ ( None , true ) => Bitmap :: new_zeroed ( merged_block. num_rows ( ) ) ,
618
+ // must be one of above
619
+ _ => unreachable ! ( ) ,
620
+ } ;
621
+
622
+ let nullable_columns = nullable_probe_block
623
+ . columns ( )
624
+ . iter ( )
625
+ . map ( |c| Self :: set_validity ( c, & validity) )
626
+ . collect :: < Result < Vec < _ > > > ( ) ?;
627
+ nullable_probe_block = DataBlock :: create ( self . probe_schema . clone ( ) , nullable_columns) ;
628
+ merged_block = self . merge_eq_block ( & nullable_probe_block, & build_block) ?;
629
+
630
+ // If there are only non-equi conditions, build_indexes size will greater build table size
631
+ // Because the case will cause cross join.
632
+ // We need filter the redundant rows for build side.
633
+ if build_indexes. len ( ) > self . row_space . rows_number ( ) {
634
+ let mut bm = validity. into_mut ( ) . right ( ) . unwrap ( ) ;
635
+ Self :: filter_rows_for_right_join ( & mut bm, build_indexes, & mut row_state) ;
636
+ let predicate = BooleanColumn :: from_arrow_data ( bm. into ( ) ) . arc ( ) ;
637
+ return DataBlock :: filter_block ( merged_block, & predicate) ;
638
+ }
639
+
595
640
Ok ( merged_block)
596
641
}
597
642
@@ -644,9 +689,9 @@ impl JoinHashTable {
644
689
}
645
690
646
691
// keep at least one index of the positive state and the null state
647
- // bitmap: [1, 0, 1] with row_state [2, 0 ], probe_index: [0, 0, 1]
692
+ // bitmap: [1, 0, 1] with row_state [2, 1 ], probe_index: [0, 0, 1]
648
693
// bitmap will be [1, 0, 1] -> [1, 0, 1] -> [1, 0, 1] -> [1, 0, 1]
649
- // row_state will be [2, 0 ] -> [2, 0 ] -> [1, 0 ] -> [1, 0 ]
694
+ // row_state will be [2, 1 ] -> [2, 1 ] -> [1, 1 ] -> [1, 1 ]
650
695
fn fill_null_for_left_join (
651
696
bm : & mut MutableBitmap ,
652
697
probe_indexs : & [ u32 ] ,
@@ -672,6 +717,25 @@ impl JoinHashTable {
672
717
}
673
718
}
674
719
720
+ fn filter_rows_for_right_join (
721
+ bm : & mut MutableBitmap ,
722
+ build_indexes : & [ RowPtr ] ,
723
+ row_state : & mut std:: collections:: HashMap < RowPtr , usize > ,
724
+ ) {
725
+ for ( index, row) in build_indexes. iter ( ) . enumerate ( ) {
726
+ if row_state[ row] == 1 {
727
+ if !bm. get ( index) {
728
+ bm. set ( index, true )
729
+ }
730
+ continue ;
731
+ }
732
+
733
+ if !bm. get ( index) {
734
+ row_state. entry ( * row) . and_modify ( |e| * e -= 1 ) ;
735
+ }
736
+ }
737
+ }
738
+
675
739
// return an (option bitmap, all_true, all_false)
676
740
fn get_other_filters (
677
741
& self ,
0 commit comments