diff --git a/src/renderer/html_handlebars/search.rs b/src/renderer/html_handlebars/search.rs
index 01ef44ac7a..a96059c1c0 100644
--- a/src/renderer/html_handlebars/search.rs
+++ b/src/renderer/html_handlebars/search.rs
@@ -409,3 +409,92 @@ fn chapter_settings_priority() {
);
}
}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_tokenize_basic() {
+ assert_eq!(tokenize("hello world"), vec!["hello", "world"]);
+ }
+
+ #[test]
+ fn test_tokenize_with_hyphens() {
+ assert_eq!(
+ tokenize("hello-world test-case"),
+ vec!["hello", "world", "test", "case"]
+ );
+ }
+
+ #[test]
+ fn test_tokenize_mixed_whitespace() {
+ assert_eq!(
+ tokenize("hello\tworld\ntest\r\ncase"),
+ vec!["hello", "world", "test", "case"]
+ );
+ }
+
+ #[test]
+ fn test_tokenize_empty_string() {
+ assert_eq!(tokenize(""), Vec::::new());
+ }
+
+ #[test]
+ fn test_tokenize_only_whitespace() {
+ assert_eq!(tokenize(" \t\n "), Vec::::new());
+ }
+
+ #[test]
+ fn test_tokenize_case_normalization() {
+ assert_eq!(tokenize("Hello WORLD Test"), vec!["hello", "world", "test"]);
+ }
+
+ #[test]
+ fn test_tokenize_trim_whitespace() {
+ assert_eq!(tokenize(" hello world "), vec!["hello", "world"]);
+ }
+
+ #[test]
+ fn test_tokenize_long_words_filtered() {
+ let long_word = "a".repeat(MAX_WORD_LENGTH_TO_INDEX + 1);
+ let short_word = "a".repeat(MAX_WORD_LENGTH_TO_INDEX);
+ let input = format!("{} hello {}", long_word, short_word);
+ assert_eq!(tokenize(&input), vec!["hello", &short_word]);
+ }
+
+ #[test]
+ fn test_tokenize_max_length_word() {
+ let max_word = "a".repeat(MAX_WORD_LENGTH_TO_INDEX);
+ assert_eq!(tokenize(&max_word), vec![max_word]);
+ }
+
+ #[test]
+ fn test_tokenize_special_characters() {
+ assert_eq!(
+ tokenize("hello,world.test!case?"),
+ vec!["hello,world.test!case?"]
+ );
+ }
+
+ #[test]
+ fn test_tokenize_unicode() {
+ assert_eq!(
+ tokenize("café naïve résumé"),
+ vec!["café", "naïve", "résumé"]
+ );
+ }
+
+ #[test]
+ fn test_tokenize_unicode_rtl_hebre() {
+ assert_eq!(tokenize("שלום עולם"), vec!["שלום", "עולם"]);
+ }
+
+ #[test]
+ fn test_tokenize_numbers() {
+ assert_eq!(
+ tokenize("test123 456-789 hello"),
+ vec!["test123", "456", "789", "hello"]
+ );
+ }
+}