Skip to content
Snippets Groups Projects
Commit 76c921af authored by Phil Höfer's avatar Phil Höfer
Browse files

Merge branch 'main' into '35-implement-db-integration'

# Conflicts:
#   src/main.rs
#   src/predictors/basic_set.rs
parents f5ae7d94 ceee8ff5
No related branches found
No related tags found
1 merge request!8Resolve "Implement DB Integration"
This commit is part of merge request !8. Comments created here will be created in the context of that merge request.
auth = "12345"
term_frequency_threshold = 2
term_frequency_threshold = 1
max_predict_count = 5
blocked_words = ""
\ No newline at end of file
......@@ -43,9 +43,10 @@ fn main() -> Result<(), io::Error> {
}
}
let mut markov_chain = basic_set::from_file_path_and_config("../../data/data.csv",config.clone())
.unwrap_or(basic_set::from_file_path_and_config("data.csv",config.clone())
.unwrap());
let mut markov_chain = basic_set::from_file_path_and_config(
vec!["../../data/data.csv","data/data.csv","data.csv","data_full.csv"],config.clone())
.unwrap_or(basic_set::SetPredictor::new());
markov_chain = read_from_db(config.clone(), markov_chain);
......
......@@ -116,20 +116,32 @@ pub fn from_file_path(file_path: &str) -> Result<MarkovChainPredictor, std::io::
}
pub fn from_file_path_and_config(file_path: &str, config: HashMap<String, impl Into<String>>) -> Result<MarkovChainPredictor, std::io::Error> {
pub fn from_file_path_and_config(file_paths: Vec<&str>, config: HashMap<String, impl Into<String>>) -> Result<MarkovChainPredictor, std::io::Error> {
let mut configuration = HashMap::new();
for (key, value) in config {
configuration.insert(key, value.into());
}
let file = File::open(file_path)?;
let mut reader = ReaderBuilder::new().from_reader(file);
let mut markov_chain: MarkovChainPredictor = MarkovChainPredictor::new();
markov_chain.configuration = configuration;
for result in reader.records() {
let record = result?;
if let Some(query) = record.get(5) {
markov_chain.update(query);
for path in file_paths {
println!("Trying to open data file at {}",path);
match File::open(path) {
Ok(file) => {
println!("Reading data file...");
let mut reader = ReaderBuilder::new().from_reader(file);
for result in reader.records() {
let record = result?;
if let Some(query) = record.get(5) {
markov_chain.update(query);
}
}
},
Err(e) => {
println!("Error while reading: {}",e);
}
}
}
......
use std::{collections::HashMap, fs::File};
use std::{collections::HashMap, f32::consts::E, fs::File};
use csv::ReaderBuilder;
......@@ -42,7 +42,15 @@ impl Predictor for SetPredictor {
//println!("blocklist:{:?}",self.configuration);
let lowercase_query = query.to_lowercase();
let lowercase_query: String = query.to_lowercase().chars()
.map(|c| {
if c.is_alphanumeric() {
c
} else {
' '
}
})
.collect();
let words: Vec<&str> = lowercase_query.split_whitespace().filter(|&x| !blocklist.contains(&x)).collect();
for &word in &words {
......@@ -65,7 +73,7 @@ impl Predictor for SetPredictor {
let query_prefix = query.rsplit_once(' ').map_or("", |(head, _)| head).to_string();
let predictions: Vec<String> = top_words
.into_iter()
.map(|(word, _)| format!("\"{} {}\"", query_prefix, word))
.map(|(word, _)| format!("\"{}\"", format!("{} {}",query_prefix, word).trim()))
.collect();
return format!("[\"{}\",[{}]]", query, predictions.join(","));
}
......@@ -79,12 +87,13 @@ fn get_top_completions(
top_n: usize,
min_freq: usize
) -> Option<Vec<(String, usize)>> {
Some(predictor.set.iter()
let mut completions: Vec<(String, usize)> = predictor.set.iter()
.filter(|(key, &value)| key.starts_with(word) && value >= min_freq)
.map(|(key, &value)| (key.clone(), value))
.take(top_n)
.collect())
.collect();
completions.sort_by(|a, b| b.1.cmp(&a.1));
completions.truncate(top_n);
Some(completions)
}
......@@ -107,25 +116,37 @@ pub fn from_file_path(file_path: &str) -> Result<SetPredictor, std::io::Error> {
}
pub fn from_file_path_and_config(file_path: &str, config: HashMap<String, impl Into<String>>) -> Result<SetPredictor, std::io::Error> {
pub fn from_file_path_and_config(file_paths: Vec<&str>, config: HashMap<String, impl Into<String>>) -> Result<SetPredictor, std::io::Error> {
let mut configuration = HashMap::new();
for (key, value) in config {
configuration.insert(key, value.into());
}
let file = File::open(file_path)?;
let mut reader = ReaderBuilder::new().from_reader(file);
let mut markov_chain: SetPredictor = SetPredictor::new();
markov_chain.configuration = configuration;
let mut count = 0;
for result in reader.records() {
let record = result?;
if let Some(query) = record.get(5) {
markov_chain.update(query);
count += 1;
for path in file_paths {
println!("Trying to open data file at {}",path);
match File::open(path) {
Ok(file) => {
println!("Reading data file...");
let mut count = 0;
let mut reader = ReaderBuilder::new().from_reader(file);
for result in reader.records() {
let record = result?;
if let Some(query) = record.get(5) {
markov_chain.update(query);
count += 1;
}
}
println!("{} queries read from file", count);
},
Err(e) => {
println!("Error while reading: {}",e);
}
}
}
println!("{} queries read from file", count);
Ok(markov_chain)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment