Skip to content
Snippets Groups Projects
Commit 76c921af authored by Phil Höfer's avatar Phil Höfer
Browse files

Merge branch 'main' into '35-implement-db-integration'

# Conflicts:
#   src/main.rs
#   src/predictors/basic_set.rs
parents f5ae7d94 ceee8ff5
No related branches found
No related tags found
1 merge request!8Resolve "Implement DB Integration"
This commit is part of merge request !8. Comments created here will be created in the context of that merge request.
auth = "12345" auth = "12345"
term_frequency_threshold = 2 term_frequency_threshold = 1
max_predict_count = 5 max_predict_count = 5
blocked_words = "" blocked_words = ""
\ No newline at end of file
...@@ -43,9 +43,10 @@ fn main() -> Result<(), io::Error> { ...@@ -43,9 +43,10 @@ fn main() -> Result<(), io::Error> {
} }
} }
let mut markov_chain = basic_set::from_file_path_and_config("../../data/data.csv",config.clone())
.unwrap_or(basic_set::from_file_path_and_config("data.csv",config.clone()) let mut markov_chain = basic_set::from_file_path_and_config(
.unwrap()); vec!["../../data/data.csv","data/data.csv","data.csv","data_full.csv"],config.clone())
.unwrap_or(basic_set::SetPredictor::new());
markov_chain = read_from_db(config.clone(), markov_chain); markov_chain = read_from_db(config.clone(), markov_chain);
......
...@@ -116,20 +116,32 @@ pub fn from_file_path(file_path: &str) -> Result<MarkovChainPredictor, std::io:: ...@@ -116,20 +116,32 @@ pub fn from_file_path(file_path: &str) -> Result<MarkovChainPredictor, std::io::
} }
pub fn from_file_path_and_config(file_path: &str, config: HashMap<String, impl Into<String>>) -> Result<MarkovChainPredictor, std::io::Error> { pub fn from_file_path_and_config(file_paths: Vec<&str>, config: HashMap<String, impl Into<String>>) -> Result<MarkovChainPredictor, std::io::Error> {
let mut configuration = HashMap::new(); let mut configuration = HashMap::new();
for (key, value) in config { for (key, value) in config {
configuration.insert(key, value.into()); configuration.insert(key, value.into());
} }
let file = File::open(file_path)?;
let mut reader = ReaderBuilder::new().from_reader(file);
let mut markov_chain: MarkovChainPredictor = MarkovChainPredictor::new(); let mut markov_chain: MarkovChainPredictor = MarkovChainPredictor::new();
markov_chain.configuration = configuration; markov_chain.configuration = configuration;
for result in reader.records() { for path in file_paths {
let record = result?; println!("Trying to open data file at {}",path);
if let Some(query) = record.get(5) { match File::open(path) {
markov_chain.update(query); Ok(file) => {
println!("Reading data file...");
let mut reader = ReaderBuilder::new().from_reader(file);
for result in reader.records() {
let record = result?;
if let Some(query) = record.get(5) {
markov_chain.update(query);
}
}
},
Err(e) => {
println!("Error while reading: {}",e);
}
} }
} }
......
use std::{collections::HashMap, fs::File}; use std::{collections::HashMap, f32::consts::E, fs::File};
use csv::ReaderBuilder; use csv::ReaderBuilder;
...@@ -42,7 +42,15 @@ impl Predictor for SetPredictor { ...@@ -42,7 +42,15 @@ impl Predictor for SetPredictor {
//println!("blocklist:{:?}",self.configuration); //println!("blocklist:{:?}",self.configuration);
let lowercase_query = query.to_lowercase(); let lowercase_query: String = query.to_lowercase().chars()
.map(|c| {
if c.is_alphanumeric() {
c
} else {
' '
}
})
.collect();
let words: Vec<&str> = lowercase_query.split_whitespace().filter(|&x| !blocklist.contains(&x)).collect(); let words: Vec<&str> = lowercase_query.split_whitespace().filter(|&x| !blocklist.contains(&x)).collect();
for &word in &words { for &word in &words {
...@@ -65,7 +73,7 @@ impl Predictor for SetPredictor { ...@@ -65,7 +73,7 @@ impl Predictor for SetPredictor {
let query_prefix = query.rsplit_once(' ').map_or("", |(head, _)| head).to_string(); let query_prefix = query.rsplit_once(' ').map_or("", |(head, _)| head).to_string();
let predictions: Vec<String> = top_words let predictions: Vec<String> = top_words
.into_iter() .into_iter()
.map(|(word, _)| format!("\"{} {}\"", query_prefix, word)) .map(|(word, _)| format!("\"{}\"", format!("{} {}",query_prefix, word).trim()))
.collect(); .collect();
return format!("[\"{}\",[{}]]", query, predictions.join(",")); return format!("[\"{}\",[{}]]", query, predictions.join(","));
} }
...@@ -79,12 +87,13 @@ fn get_top_completions( ...@@ -79,12 +87,13 @@ fn get_top_completions(
top_n: usize, top_n: usize,
min_freq: usize min_freq: usize
) -> Option<Vec<(String, usize)>> { ) -> Option<Vec<(String, usize)>> {
Some(predictor.set.iter() let mut completions: Vec<(String, usize)> = predictor.set.iter()
.filter(|(key, &value)| key.starts_with(word) && value >= min_freq) .filter(|(key, &value)| key.starts_with(word) && value >= min_freq)
.map(|(key, &value)| (key.clone(), value)) .map(|(key, &value)| (key.clone(), value))
.take(top_n) .collect();
.collect()) completions.sort_by(|a, b| b.1.cmp(&a.1));
completions.truncate(top_n);
Some(completions)
} }
...@@ -107,25 +116,37 @@ pub fn from_file_path(file_path: &str) -> Result<SetPredictor, std::io::Error> { ...@@ -107,25 +116,37 @@ pub fn from_file_path(file_path: &str) -> Result<SetPredictor, std::io::Error> {
} }
pub fn from_file_path_and_config(file_path: &str, config: HashMap<String, impl Into<String>>) -> Result<SetPredictor, std::io::Error> { pub fn from_file_path_and_config(file_paths: Vec<&str>, config: HashMap<String, impl Into<String>>) -> Result<SetPredictor, std::io::Error> {
let mut configuration = HashMap::new(); let mut configuration = HashMap::new();
for (key, value) in config { for (key, value) in config {
configuration.insert(key, value.into()); configuration.insert(key, value.into());
} }
let file = File::open(file_path)?;
let mut reader = ReaderBuilder::new().from_reader(file);
let mut markov_chain: SetPredictor = SetPredictor::new(); let mut markov_chain: SetPredictor = SetPredictor::new();
markov_chain.configuration = configuration; markov_chain.configuration = configuration;
let mut count = 0; for path in file_paths {
for result in reader.records() { println!("Trying to open data file at {}",path);
let record = result?; match File::open(path) {
if let Some(query) = record.get(5) { Ok(file) => {
markov_chain.update(query); println!("Reading data file...");
count += 1; let mut count = 0;
let mut reader = ReaderBuilder::new().from_reader(file);
for result in reader.records() {
let record = result?;
if let Some(query) = record.get(5) {
markov_chain.update(query);
count += 1;
}
}
println!("{} queries read from file", count);
},
Err(e) => {
println!("Error while reading: {}",e);
}
} }
} }
println!("{} queries read from file", count);
Ok(markov_chain) Ok(markov_chain)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment