From 5ebce499d474d164e9b1c4b9899fcdef05ad8563 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Phil=20H=C3=B6fer?= <phil.hoefer@suma-ev.de>
Date: Fri, 28 Jun 2024 13:38:16 +0200
Subject: [PATCH] Implement Exclusion List For Suggested Terms

---
 config.toml                    |  3 ++-
 src/predictors/basic_markov.rs | 24 +++++++++++++++++++++---
 2 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/config.toml b/config.toml
index 7bbf4be..9b3d8cf 100644
--- a/config.toml
+++ b/config.toml
@@ -1,3 +1,4 @@
 auth = "12345"
 term_frequency_threshold = 2
-max_predict_count = 5
\ No newline at end of file
+max_predict_count = 5
+blocked_words = ""
\ No newline at end of file
diff --git a/src/predictors/basic_markov.rs b/src/predictors/basic_markov.rs
index adafab4..7839581 100644
--- a/src/predictors/basic_markov.rs
+++ b/src/predictors/basic_markov.rs
@@ -34,8 +34,18 @@ impl Predictor for MarkovChainPredictor {
     }
 
     fn update(&mut self, query: &str) -> Result<(), Box<dyn std::error::Error>> {
+        let blocklist: Vec<&str> = match self.configuration.get("blocked_words") {
+            Some(list) => {
+                list.split_whitespace().collect()
+            },
+            _ => Vec::new()
+            
+        };    
+
+        //println!("blocklist:{:?}",self.configuration);
+
         let lowercase_query = query.to_lowercase();
-        let words: Vec<&str> = lowercase_query.split_whitespace().collect();
+        let words: Vec<&str> = lowercase_query.split_whitespace().filter(|&x| !blocklist.contains(&x)).collect();
         for window in words.windows(2) {
             if let [first, second] = window {
                 self.chain
@@ -107,14 +117,22 @@ pub fn from_file_path(file_path: &str) -> Result<MarkovChainPredictor, std::io::
 
 
 pub fn from_file_path_and_config(file_path: &str, config: HashMap<String, impl Into<String>>) -> Result<MarkovChainPredictor, std::io::Error> {
-    let mut markov_chain: MarkovChainPredictor = from_file_path(file_path)?;
     let mut configuration = HashMap::new();
     for (key, value) in config {
         configuration.insert(key, value.into());
     }
+    let file = File::open(file_path)?;
+    let mut reader = ReaderBuilder::new().from_reader(file);
+    let mut markov_chain: MarkovChainPredictor = MarkovChainPredictor::new();
     markov_chain.configuration = configuration;
 
-    println!("{}",markov_chain.configuration.get("term_frequency_threshold").unwrap());
+    for result in reader.records() {
+        let record = result?;
+        if let Some(query) = record.get(5) {
+            markov_chain.update(query);
+        }
+    }
+
 
     Ok(markov_chain)
 }
\ No newline at end of file
-- 
GitLab