From f0973c69bdaabc157ab984bb075fc594b3b1aeaf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Phil=20H=C3=B6fer?= <mail@phil-hoefer.de>
Date: Mon, 1 Jul 2024 13:56:22 +0200
Subject: [PATCH] Filtering and Sorting for Set Predictor

---
 src/predictors/basic_set.rs | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/src/predictors/basic_set.rs b/src/predictors/basic_set.rs
index 43d5fd8..43f7c2f 100644
--- a/src/predictors/basic_set.rs
+++ b/src/predictors/basic_set.rs
@@ -42,7 +42,15 @@ impl Predictor for SetPredictor {
 
         //println!("blocklist:{:?}",self.configuration);
 
-        let lowercase_query = query.to_lowercase();
+        let lowercase_query: String = query.to_lowercase().chars()
+        .map(|c| {
+            if c.is_alphanumeric() {
+                c
+            } else {
+                ' '
+            }
+        })
+        .collect();
         let words: Vec<&str> = lowercase_query.split_whitespace().filter(|&x| !blocklist.contains(&x)).collect();
 
         for &word in &words {
@@ -79,12 +87,13 @@ fn get_top_completions(
     top_n: usize,
     min_freq: usize
 ) -> Option<Vec<(String, usize)>> {
-    Some(predictor.set.iter()
+    let mut completions: Vec<(String, usize)> = predictor.set.iter()
     .filter(|(key, &value)| key.starts_with(word) && value >= min_freq)
     .map(|(key, &value)| (key.clone(), value))
-    .take(top_n)
-    .collect())
-
+    .collect();
+    completions.sort_by(|a, b| b.1.cmp(&a.1));
+    completions.truncate(top_n);
+    Some(completions)
 }
 
 
-- 
GitLab