From 86934523e0821bc194d87232a844d10fa31dc867 Mon Sep 17 00:00:00 2001 From: Scott Munro Date: Thu, 28 Jan 2021 21:35:37 -0800 Subject: [PATCH] Improve the word generation logic (#9) Prior to this change, words were generated randomly from the entire dictionary of available words. For harder difficulties that often led to the vast majority of words only matching by 0-2 characters of like the total 12. This change adds a better word generation algorithm which uses a predefined heuristic of "fun" distributions of word differences across each of the difficulties i.e. on difficulty Foo we specify that N words should differ by 1 letter, M words should differ by 3 letters, and so on and so forth generally following a pattern of "more words match for less characters" --- src/dict.rs | 91 ++++++++++++++++++ src/game.rs | 227 ++++++++++++++++++++++++++++++++++++--------- src/randwrapper.rs | 25 ----- src/utils.rs | 7 ++ 4 files changed, 280 insertions(+), 70 deletions(-) diff --git a/src/dict.rs b/src/dict.rs index bf6a722..a83f25a 100644 --- a/src/dict.rs +++ b/src/dict.rs @@ -1,4 +1,5 @@ use crate::randwrapper::{select_rand, RangeRng}; +use crate::utils::hamming_dist_ignore_case; // Each dict chunk represents all words of the same length from our src dict. This partitioning is a // quick optimization since the cracker game will only concern itself with words of the same length. @@ -7,7 +8,23 @@ pub struct EnglishDictChunk { word_set: Vec, } +pub struct HammingDistanceIterator<'a> { + cmp_word: String, + dict_chunk: &'a EnglishDictChunk, + next_candidate_distance: usize, + next_item_candidate_index: usize, +} + impl EnglishDictChunk { + #[cfg(test)] + pub fn new_mock(word_len: usize, word_set: &[&str]) -> Self { + assert!(word_set.iter().all(|w| w.len() == word_len)); + EnglishDictChunk { + word_len, + word_set: word_set.iter().map(|s| String::from(*s)).collect(), + } + } + pub fn load(word_len: usize) -> Self { let dict_file_name = format!("src/dict/{}_char_words_alpha.txt", word_len); let word_set = snm_simple_file::read_lines(&dict_file_name).collect(); @@ -22,4 +39,78 @@ impl EnglishDictChunk { pub fn get_random_word(&self, rng: &mut dyn RangeRng) -> String { select_rand(&self.word_set, rng).clone() } + + pub fn get_hamming_distance_sorted_words(&self, word: &str) -> HammingDistanceIterator { + HammingDistanceIterator { + cmp_word: String::from(word), + dict_chunk: self, + next_candidate_distance: 1, + next_item_candidate_index: 0, + } + } +} + +impl<'a> Iterator for HammingDistanceIterator<'a> { + type Item = (&'a str, usize); + + fn next(&mut self) -> Option { + while self.next_candidate_distance <= self.dict_chunk.word_len { + let candidate_index = self.next_item_candidate_index; + let current_candidate_distance = self.next_candidate_distance; + + // if we've made it to the end of the list, start over at the beginning and look for the next hamming distance + self.next_item_candidate_index += 1; + if self.next_item_candidate_index >= self.dict_chunk.word_set.len() { + self.next_item_candidate_index = 0; + self.next_candidate_distance += 1; + } + + let candidate = &self.dict_chunk.word_set[candidate_index]; + let candidate_hamming_distance = hamming_dist_ignore_case(&candidate, &self.cmp_word); + if candidate_hamming_distance == current_candidate_distance { + return Some((candidate, candidate_hamming_distance)); + } + } + + None + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_hamming_distance_iterator() { + let word = "pens"; + + let word_set = [ + // 3, 1, 2, 4, 0, 1, 1, 3 + "adds", "pans", "pils", "dull", "pens", "pins", "pent", "miss", + ]; + + let expected_words_sorted_by_hamming_distance = [ + ("pans", 1), + ("pins", 1), + ("pent", 1), + ("pils", 2), + ("adds", 3), + ("miss", 3), + ("dull", 4), + ]; + // 1 less because we shouldn't match our own word + assert_eq!( + word_set.len() - 1, + expected_words_sorted_by_hamming_distance.len() + ); + + let dict_chunk = EnglishDictChunk::new_mock(4, &word_set); + let words_sorted_by_haming_distance: Vec<(&str, usize)> = dict_chunk + .get_hamming_distance_sorted_words(&word) + .collect(); + assert_eq!( + words_sorted_by_haming_distance, + expected_words_sorted_by_hamming_distance + ); + } } diff --git a/src/game.rs b/src/game.rs index a44b062..7c8c96d 100644 --- a/src/game.rs +++ b/src/game.rs @@ -1,14 +1,18 @@ -// Work breakdown -// - setup a better word selection algorithm which results in more common letters - -// extensions/flavor +// Extended work breakdown +// - add start screen +// - make game and solver modes launchable from start screen +// - add timed mode +// - add extra game rules for handling selecting brackets? // - use appropriate font to give it a "fallout feel" // - use appropriate animations to give it a "fallout feel" // - SFX // - refactor out tui utils into its own module +// - improve TUI navigation logic to be more intuitive +// - refactor different components into modules +// - address all cleanup/refactoring todos use crate::dict; -use crate::randwrapper::{select_rand, RangeRng, ThreadRangeRng}; +use crate::randwrapper::{RangeRng, ThreadRangeRng}; use crate::utils::{matching_char_count_ignore_case, Rect}; const TITLE: &str = "FONV: Terminal Cracker"; @@ -115,21 +119,111 @@ struct SelectedChunk { len: usize, } -fn generate_words(difficulty: Difficulty, rng: &mut dyn RangeRng) -> Vec { - let word_len = match difficulty { +// H_amming D_istance D_istribution Entry +#[derive(Clone, Copy)] +struct HDDEntry { + num_words: usize, // the number of words to look for with this hamming distance + hamming_distance: usize, // the hamming distance to look for +} + +fn get_hamming_distance_distribution(difficulty: Difficulty) -> [HDDEntry; 4] { + let distances = match difficulty { + Difficulty::VeryEasy => [1, 2, 3, 4], + Difficulty::Easy => [1, 3, 4, 5], + Difficulty::Average => [1, 3, 5, 7], + Difficulty::Hard => [1, 4, 6, 9], + Difficulty::VeryHard => [1, 3, 7, 10], + }; + + [ + HDDEntry { + num_words: 1, + hamming_distance: distances[0], + }, + HDDEntry { + num_words: 2, + hamming_distance: distances[1], + }, + HDDEntry { + num_words: 3, + hamming_distance: distances[2], + }, + HDDEntry { + num_words: 5, + hamming_distance: distances[3], + }, + ] +} + +fn get_word_len_for_difficulty(difficulty: Difficulty) -> usize { + match difficulty { Difficulty::VeryEasy => 4, Difficulty::Easy => 6, Difficulty::Average => 8, Difficulty::Hard => 10, Difficulty::VeryHard => 12, - }; + } +} + +fn generate_words( + dict_chunk: &dict::EnglishDictChunk, + hd_distribution: &[HDDEntry; 4], + rng: &mut dyn RangeRng, +) -> (Vec, String) { + let total_words_in_distribution = hd_distribution.iter().fold(0, |acc, e| acc + e.num_words); + + let mut words = Vec::with_capacity(total_words_in_distribution + 1); + let goal_word = dict_chunk.get_random_word(rng); + words.push(goal_word.clone()); + + let mut current_hd_distribution_index = 0; + let mut hd_distribution_tracker: [HDDEntry; 4] = hd_distribution.clone(); + let mut hamming_distance_sorted_iter = dict_chunk.get_hamming_distance_sorted_words(&goal_word); + + while current_hd_distribution_index < hd_distribution_tracker.len() { + let current_hd_distribution_entry = + &mut hd_distribution_tracker[current_hd_distribution_index]; + assert_ne!(current_hd_distribution_entry.num_words, 0); + + let next_sorted_word_pair = hamming_distance_sorted_iter.next(); + let (word, hamming_distance) = match next_sorted_word_pair { + None => break, // we are out of words! + Some(sorted_word_pair) => sorted_word_pair, + }; + + if hamming_distance >= current_hd_distribution_entry.hamming_distance { + current_hd_distribution_entry.num_words -= 1; + words.push(String::from(word)); + + if current_hd_distribution_entry.num_words == 0 { + current_hd_distribution_index += 1; + } + } + } + + // the code can manage finding fewer words, but this represents a bug + assert_eq!(words.len(), total_words_in_distribution + 1); + (words, goal_word) +} - const WORDS_TO_GENERATE_COUNT: usize = 12; +fn simple_shuffle(mut v: Vec, rng: &mut dyn RangeRng) -> Vec { + const NUM_SWAPS: usize = 100; // a good-enough heuristic for shuffling the words in place - let dict_chunk = dict::EnglishDictChunk::load(word_len); - (0..WORDS_TO_GENERATE_COUNT) - .map(|_| dict_chunk.get_random_word(rng)) - .collect() + for _ in 0..NUM_SWAPS { + let index = rng.gen_range(0, v.len()); + v.swap(0, index); + } + + v +} + +fn generate_words_from_difficulty( + difficulty: Difficulty, + rng: &mut dyn RangeRng, +) -> (Vec, String) { + let dict_chunk = dict::EnglishDictChunk::load(get_word_len_for_difficulty(difficulty)); + let hd_distribution = get_hamming_distance_distribution(difficulty); + generate_words(&dict_chunk, &hd_distribution, rng) } fn move_selection( @@ -467,8 +561,9 @@ pub fn run_game(difficulty: Difficulty) { // Generate a random set of words based on the provided difficulty setting let mut rng = ThreadRangeRng::new(); - let words = generate_words(difficulty, &mut rng); - let solution = select_rand(&words, &mut rng); + let (unshuffled_words, solution) = generate_words_from_difficulty(difficulty, &mut rng); + assert_eq!(unshuffled_words.len(), 12); // the game isn't broken if we don't have 12 words but it represents a bug + let words = simple_shuffle(unshuffled_words, &mut rng); let mut denied_selections = Vec::new(); let mut accepted_selection = None; @@ -597,38 +692,60 @@ mod tests { #[test] fn test_word_generation() { - let mut rng = randwrapper::mocks::SequenceRangeRng::new(&[0, 2, 4, 7]); - let tests = [ - (Difficulty::VeryEasy, ["aahs", "aani", "abac", "abba"]), - (Difficulty::Easy, ["aahing", "aarrgh", "abacay", "abacot"]), - ( - Difficulty::Average, - ["aardvark", "aaronite", "abacisci", "abacuses"], - ), - ( - Difficulty::Hard, - ["aardwolves", "abalienate", "abandoning", "abaptistum"], - ), - ( - Difficulty::VeryHard, - [ - "abalienating", - "abandonments", - "abbreviately", - "abbreviatory", - ], - ), + // use a single-value rng for value 0. This will make sure the goal_word is the first word in the original word list + let mut rng = randwrapper::mocks::SingleValueRangeRng::new(0); + + let test_hd_distribution = [ + HDDEntry { + num_words: 1, + hamming_distance: 1, + }, + HDDEntry { + num_words: 2, + hamming_distance: 2, + }, + HDDEntry { + num_words: 3, + hamming_distance: 3, + }, + HDDEntry { + num_words: 4, + hamming_distance: 4, + }, ]; - for (difficulty, expected_words) in &tests { - let generated_words = generate_words(*difficulty, &mut rng); - let expected_word_cnt = 12; - for i in 0..expected_word_cnt { - let generated_word = &generated_words[i]; - let expected_word = expected_words[i % expected_words.len()]; - assert_eq!(generated_word, expected_word); - } - } + let goal_word = "dude"; + let words = [ + goal_word, // 0 + "dede", // 1 + "door", // 3 + "dodo", // 2 + "doom", // 3 + "abba", // 4 + "rude", // 1 + "duds", // 1 + "rube", // 2 + "cube", // 2 + "sick", // 4 + "stop", // 4 + "soil", // 4 + "roll", // 4 + ]; + + let expected_generated_words = [ + goal_word, // goal + "dede", // hd 1 + "dodo", "rube", // hd 2 + "door", "doom", "abba", // hd 3 + "sick", "stop", "soil", "roll", // hd 4 + ]; + + let test_dict = dict::EnglishDictChunk::new_mock(4, &words); + let (generated_words, solution) = + generate_words(&test_dict, &test_hd_distribution, &mut rng); + + assert_eq!(solution, goal_word); + assert_eq!(generated_words, expected_generated_words); } #[test] @@ -1062,4 +1179,24 @@ mod tests { assert_eq!(end_selection, expected_end_selection); } + + #[test] + fn ensure_word_len_for_difficulty_matches_hamming_distance_distribution_for_difficulty() { + let difficulties = [ + Difficulty::VeryEasy, + Difficulty::Easy, + Difficulty::Average, + Difficulty::Hard, + Difficulty::VeryHard, + ]; + + for d in &difficulties { + let word_len = get_word_len_for_difficulty(*d); + let hamming_distance_distribution = get_hamming_distance_distribution(*d); + + for hdd_entry in &hamming_distance_distribution { + assert!(hdd_entry.hamming_distance <= word_len); + } + } + } } diff --git a/src/randwrapper.rs b/src/randwrapper.rs index 01529c7..7ff3e4d 100644 --- a/src/randwrapper.rs +++ b/src/randwrapper.rs @@ -36,11 +36,6 @@ pub mod mocks { value: T, } - pub struct SequenceRangeRng { - next: usize, - seq: Vec, - } - pub struct SeededRng { rng: rand::rngs::SmallRng, } @@ -59,26 +54,6 @@ pub mod mocks { } } - impl SequenceRangeRng { - pub fn new(value: &[T]) -> Self { - SequenceRangeRng { - next: 0, - seq: Vec::from(value), - } - } - } - - impl RangeRng for SequenceRangeRng { - fn gen_range(&mut self, lower: T, upper: T) -> T { - let value = self.seq[self.next]; - self.next = (self.next + 1) % self.seq.len(); - - assert!(lower <= value); - assert!(upper > value); - value - } - } - impl SeededRng { pub fn new(seed: u64) -> Self { SeededRng { diff --git a/src/utils.rs b/src/utils.rs index 5bcc7dd..0b37c7c 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,4 +1,6 @@ pub fn matching_char_count_ignore_case(a: &str, b: &str) -> usize { + assert_eq!(a.len(), b.len()); + fn chars_eq_ignore_case((a, b): &(char, char)) -> bool { a.to_ascii_lowercase() == b.to_ascii_lowercase() } @@ -9,6 +11,11 @@ pub fn matching_char_count_ignore_case(a: &str, b: &str) -> usize { .count() } +pub fn hamming_dist_ignore_case(a: &str, b: &str) -> usize { + assert_eq!(a.len(), b.len()); + a.len() - matching_char_count_ignore_case(a, b) +} + pub struct Rect { pub left: i32, pub top: i32,