Skip to content

Commit 6d63343

Browse files
committed
add tests to the tokenize() function
1 parent e8dff6f commit 6d63343

File tree

1 file changed

+89
-0
lines changed

1 file changed

+89
-0
lines changed

src/renderer/html_handlebars/search.rs

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -409,3 +409,92 @@ fn chapter_settings_priority() {
409409
);
410410
}
411411
}
412+
413+
#[cfg(test)]
414+
mod tests {
415+
use super::*;
416+
417+
#[test]
418+
fn test_tokenize_basic() {
419+
assert_eq!(tokenize("hello world"), vec!["hello", "world"]);
420+
}
421+
422+
#[test]
423+
fn test_tokenize_with_hyphens() {
424+
assert_eq!(
425+
tokenize("hello-world test-case"),
426+
vec!["hello", "world", "test", "case"]
427+
);
428+
}
429+
430+
#[test]
431+
fn test_tokenize_mixed_whitespace() {
432+
assert_eq!(
433+
tokenize("hello\tworld\ntest\r\ncase"),
434+
vec!["hello", "world", "test", "case"]
435+
);
436+
}
437+
438+
#[test]
439+
fn test_tokenize_empty_string() {
440+
assert_eq!(tokenize(""), Vec::<String>::new());
441+
}
442+
443+
#[test]
444+
fn test_tokenize_only_whitespace() {
445+
assert_eq!(tokenize(" \t\n "), Vec::<String>::new());
446+
}
447+
448+
#[test]
449+
fn test_tokenize_case_normalization() {
450+
assert_eq!(tokenize("Hello WORLD Test"), vec!["hello", "world", "test"]);
451+
}
452+
453+
#[test]
454+
fn test_tokenize_trim_whitespace() {
455+
assert_eq!(tokenize(" hello world "), vec!["hello", "world"]);
456+
}
457+
458+
#[test]
459+
fn test_tokenize_long_words_filtered() {
460+
let long_word = "a".repeat(MAX_WORD_LENGTH_TO_INDEX + 1);
461+
let short_word = "a".repeat(MAX_WORD_LENGTH_TO_INDEX);
462+
let input = format!("{} hello {}", long_word, short_word);
463+
assert_eq!(tokenize(&input), vec!["hello", &short_word]);
464+
}
465+
466+
#[test]
467+
fn test_tokenize_max_length_word() {
468+
let max_word = "a".repeat(MAX_WORD_LENGTH_TO_INDEX);
469+
assert_eq!(tokenize(&max_word), vec![max_word]);
470+
}
471+
472+
#[test]
473+
fn test_tokenize_special_characters() {
474+
assert_eq!(
475+
tokenize("hello,world.test!case?"),
476+
vec!["hello,world.test!case?"]
477+
);
478+
}
479+
480+
#[test]
481+
fn test_tokenize_unicode() {
482+
assert_eq!(
483+
tokenize("café naïve résumé"),
484+
vec!["café", "naïve", "résumé"]
485+
);
486+
}
487+
488+
#[test]
489+
fn test_tokenize_unicode_rtl_hebre() {
490+
assert_eq!(tokenize("שלום עולם"), vec!["שלום", "עולם"]);
491+
}
492+
493+
#[test]
494+
fn test_tokenize_numbers() {
495+
assert_eq!(
496+
tokenize("test123 456-789 hello"),
497+
vec!["test123", "456", "789", "hello"]
498+
);
499+
}
500+
}

0 commit comments

Comments
 (0)