diff --git a/config.toml b/config.toml index 7bbf4bef306f1e4db181c4f7805daf8cce51bf38..9b3d8cf5dc4abeac8982d0454f700d5a88ecda1f 100644 --- a/config.toml +++ b/config.toml @@ -1,3 +1,4 @@ auth = "12345" term_frequency_threshold = 2 -max_predict_count = 5 \ No newline at end of file +max_predict_count = 5 +blocked_words = "" \ No newline at end of file diff --git a/src/predictors/basic_markov.rs b/src/predictors/basic_markov.rs index adafab457f1435f8b30716fbd677dcc8083d0c57..7839581a15c4f17504f6cef217695567967dee0b 100644 --- a/src/predictors/basic_markov.rs +++ b/src/predictors/basic_markov.rs @@ -34,8 +34,18 @@ impl Predictor for MarkovChainPredictor { } fn update(&mut self, query: &str) -> Result<(), Box<dyn std::error::Error>> { + let blocklist: Vec<&str> = match self.configuration.get("blocked_words") { + Some(list) => { + list.split_whitespace().collect() + }, + _ => Vec::new() + + }; + + //println!("blocklist:{:?}",self.configuration); + let lowercase_query = query.to_lowercase(); - let words: Vec<&str> = lowercase_query.split_whitespace().collect(); + let words: Vec<&str> = lowercase_query.split_whitespace().filter(|&x| !blocklist.contains(&x)).collect(); for window in words.windows(2) { if let [first, second] = window { self.chain @@ -107,14 +117,22 @@ pub fn from_file_path(file_path: &str) -> Result<MarkovChainPredictor, std::io:: pub fn from_file_path_and_config(file_path: &str, config: HashMap<String, impl Into<String>>) -> Result<MarkovChainPredictor, std::io::Error> { - let mut markov_chain: MarkovChainPredictor = from_file_path(file_path)?; let mut configuration = HashMap::new(); for (key, value) in config { configuration.insert(key, value.into()); } + let file = File::open(file_path)?; + let mut reader = ReaderBuilder::new().from_reader(file); + let mut markov_chain: MarkovChainPredictor = MarkovChainPredictor::new(); markov_chain.configuration = configuration; - println!("{}",markov_chain.configuration.get("term_frequency_threshold").unwrap()); + for result in reader.records() { + let record = result?; + if let Some(query) = record.get(5) { + markov_chain.update(query); + } + } + Ok(markov_chain) } \ No newline at end of file