@@ -981,6 +981,7 @@ mod test {
981
981
blockfile_record:: {
982
982
RecordSegmentReader , RecordSegmentReaderCreationError , RecordSegmentWriter ,
983
983
} ,
984
+ test:: TestDistributedSegment ,
984
985
types:: materialize_logs,
985
986
} ;
986
987
use chroma_blockstore:: {
@@ -990,10 +991,15 @@ mod test {
990
991
use chroma_cache:: new_cache_for_test;
991
992
use chroma_storage:: { local:: LocalStorage , Storage } ;
992
993
use chroma_types:: {
993
- Chunk , CollectionUuid , LogRecord , MetadataValue , Operation , OperationRecord , SegmentUuid ,
994
- UpdateMetadataValue ,
994
+ regex:: literal_expr:: { LiteralExpr , NgramLiteralProvider } ,
995
+ strategies:: { ArbitraryChromaRegexTestDocumentsParameters , ChromaRegexTestDocuments } ,
996
+ Chunk , CollectionUuid , LogRecord , MetadataValue , Operation , OperationRecord ,
997
+ ScalarEncoding , SegmentUuid , UpdateMetadataValue ,
995
998
} ;
999
+ use proptest:: prelude:: any_with;
1000
+ use roaring:: RoaringBitmap ;
996
1001
use std:: { collections:: HashMap , str:: FromStr } ;
1002
+ use tokio:: runtime:: Runtime ;
997
1003
998
1004
#[ tokio:: test]
999
1005
async fn empty_blocks ( ) {
@@ -1963,4 +1969,79 @@ mod test {
1963
1969
Some ( String :: from( "bye" ) . as_str( ) )
1964
1970
) ;
1965
1971
}
1972
+
1973
+ async fn run_regex_test ( test_case : ChromaRegexTestDocuments ) {
1974
+ let pattern = String :: from ( test_case. hir . clone ( ) ) ;
1975
+ let regex = regex:: Regex :: new ( & pattern) . unwrap ( ) ;
1976
+ let reference_results = test_case
1977
+ . documents
1978
+ . iter ( )
1979
+ . enumerate ( )
1980
+ . filter_map ( |( id, doc) | regex. is_match ( doc) . then_some ( id as u32 ) )
1981
+ . collect :: < RoaringBitmap > ( ) ;
1982
+ let logs = test_case
1983
+ . documents
1984
+ . into_iter ( )
1985
+ . enumerate ( )
1986
+ . map ( |( id, doc) | LogRecord {
1987
+ log_offset : id as i64 ,
1988
+ record : OperationRecord {
1989
+ id : format ! ( "<{id}>" ) ,
1990
+ embedding : Some ( vec ! [ id as f32 ; 2 ] ) ,
1991
+ encoding : Some ( ScalarEncoding :: FLOAT32 ) ,
1992
+ metadata : None ,
1993
+ document : Some ( doc) ,
1994
+ operation : Operation :: Add ,
1995
+ } ,
1996
+ } )
1997
+ . collect :: < Vec < _ > > ( ) ;
1998
+ let mut segments = TestDistributedSegment :: new_with_dimension ( 2 ) ;
1999
+ segments. compact_log ( Chunk :: new ( logs. into ( ) ) , 0 ) . await ;
2000
+ let metadata_segment_reader = MetadataSegmentReader :: from_segment (
2001
+ & segments. metadata_segment ,
2002
+ & segments. blockfile_provider ,
2003
+ )
2004
+ . await
2005
+ . expect ( "Metadata segment reader should be constructable" ) ;
2006
+ let fts_reader = metadata_segment_reader
2007
+ . full_text_index_reader
2008
+ . as_ref ( )
2009
+ . expect ( "Full text index reader should be present" ) ;
2010
+ let literal_expression = LiteralExpr :: from ( test_case. hir ) ;
2011
+ let regex_results = fts_reader
2012
+ . match_literal_expression ( & literal_expression)
2013
+ . await
2014
+ . expect ( "Literal evaluation should not fail" ) ;
2015
+ if let Some ( res) = regex_results {
2016
+ assert_eq ! ( res, reference_results) ;
2017
+ }
2018
+ }
2019
+
2020
+ proptest:: proptest! {
2021
+ #[ test]
2022
+ fn test_simple_regex(
2023
+ test_case in any_with:: <ChromaRegexTestDocuments >( ArbitraryChromaRegexTestDocumentsParameters {
2024
+ recursive_hir: false ,
2025
+ total_document_count: 10 ,
2026
+ } )
2027
+ ) {
2028
+ let runtime = Runtime :: new( ) . unwrap( ) ;
2029
+ runtime. block_on( async {
2030
+ run_regex_test( test_case) . await
2031
+ } ) ;
2032
+ }
2033
+
2034
+ #[ test]
2035
+ fn test_composite_regex(
2036
+ test_case in any_with:: <ChromaRegexTestDocuments >( ArbitraryChromaRegexTestDocumentsParameters {
2037
+ recursive_hir: true ,
2038
+ total_document_count: 50 ,
2039
+ } )
2040
+ ) {
2041
+ let runtime = Runtime :: new( ) . unwrap( ) ;
2042
+ runtime. block_on( async {
2043
+ run_regex_test( test_case) . await
2044
+ } ) ;
2045
+ }
2046
+ }
1966
2047
}
0 commit comments