Skip to content

Commit

Permalink
Improve the word generation logic (#9)
Browse files Browse the repository at this point in the history
Prior to this change, words were generated randomly from the entire dictionary of available words.
For harder difficulties that often led to the vast majority of words only matching by 0-2 characters of like the total 12.

This change adds a better word generation algorithm which uses a predefined heuristic of "fun" distributions of word differences across each of the difficulties

i.e. on difficulty Foo we specify that N words should differ by 1 letter, M words should differ by 3 letters, and so on and so forth generally following a pattern of "more words match for less characters"
  • Loading branch information
scottnm committed Jan 29, 2021
1 parent df0e5b1 commit 8693452
Show file tree
Hide file tree
Showing 4 changed files with 280 additions and 70 deletions.
91 changes: 91 additions & 0 deletions src/dict.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use crate::randwrapper::{select_rand, RangeRng};
use crate::utils::hamming_dist_ignore_case;

// Each dict chunk represents all words of the same length from our src dict. This partitioning is a
// quick optimization since the cracker game will only concern itself with words of the same length.
Expand All @@ -7,7 +8,23 @@ pub struct EnglishDictChunk {
word_set: Vec<String>,
}

pub struct HammingDistanceIterator<'a> {
cmp_word: String,
dict_chunk: &'a EnglishDictChunk,
next_candidate_distance: usize,
next_item_candidate_index: usize,
}

impl EnglishDictChunk {
#[cfg(test)]
pub fn new_mock(word_len: usize, word_set: &[&str]) -> Self {
assert!(word_set.iter().all(|w| w.len() == word_len));
EnglishDictChunk {
word_len,
word_set: word_set.iter().map(|s| String::from(*s)).collect(),
}
}

pub fn load(word_len: usize) -> Self {
let dict_file_name = format!("src/dict/{}_char_words_alpha.txt", word_len);
let word_set = snm_simple_file::read_lines(&dict_file_name).collect();
Expand All @@ -22,4 +39,78 @@ impl EnglishDictChunk {
pub fn get_random_word(&self, rng: &mut dyn RangeRng<usize>) -> String {
select_rand(&self.word_set, rng).clone()
}

pub fn get_hamming_distance_sorted_words(&self, word: &str) -> HammingDistanceIterator {
HammingDistanceIterator {
cmp_word: String::from(word),
dict_chunk: self,
next_candidate_distance: 1,
next_item_candidate_index: 0,
}
}
}

impl<'a> Iterator for HammingDistanceIterator<'a> {
type Item = (&'a str, usize);

fn next(&mut self) -> Option<Self::Item> {
while self.next_candidate_distance <= self.dict_chunk.word_len {
let candidate_index = self.next_item_candidate_index;
let current_candidate_distance = self.next_candidate_distance;

// if we've made it to the end of the list, start over at the beginning and look for the next hamming distance
self.next_item_candidate_index += 1;
if self.next_item_candidate_index >= self.dict_chunk.word_set.len() {
self.next_item_candidate_index = 0;
self.next_candidate_distance += 1;
}

let candidate = &self.dict_chunk.word_set[candidate_index];
let candidate_hamming_distance = hamming_dist_ignore_case(&candidate, &self.cmp_word);
if candidate_hamming_distance == current_candidate_distance {
return Some((candidate, candidate_hamming_distance));
}
}

None
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_hamming_distance_iterator() {
let word = "pens";

let word_set = [
// 3, 1, 2, 4, 0, 1, 1, 3
"adds", "pans", "pils", "dull", "pens", "pins", "pent", "miss",
];

let expected_words_sorted_by_hamming_distance = [
("pans", 1),
("pins", 1),
("pent", 1),
("pils", 2),
("adds", 3),
("miss", 3),
("dull", 4),
];
// 1 less because we shouldn't match our own word
assert_eq!(
word_set.len() - 1,
expected_words_sorted_by_hamming_distance.len()
);

let dict_chunk = EnglishDictChunk::new_mock(4, &word_set);
let words_sorted_by_haming_distance: Vec<(&str, usize)> = dict_chunk
.get_hamming_distance_sorted_words(&word)
.collect();
assert_eq!(
words_sorted_by_haming_distance,
expected_words_sorted_by_hamming_distance
);
}
}
227 changes: 182 additions & 45 deletions src/game.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
// Work breakdown
// - setup a better word selection algorithm which results in more common letters

// extensions/flavor
// Extended work breakdown
// - add start screen
// - make game and solver modes launchable from start screen
// - add timed mode
// - add extra game rules for handling selecting brackets?
// - use appropriate font to give it a "fallout feel"
// - use appropriate animations to give it a "fallout feel"
// - SFX
// - refactor out tui utils into its own module
// - improve TUI navigation logic to be more intuitive
// - refactor different components into modules
// - address all cleanup/refactoring todos

use crate::dict;
use crate::randwrapper::{select_rand, RangeRng, ThreadRangeRng};
use crate::randwrapper::{RangeRng, ThreadRangeRng};
use crate::utils::{matching_char_count_ignore_case, Rect};

const TITLE: &str = "FONV: Terminal Cracker";
Expand Down Expand Up @@ -115,21 +119,111 @@ struct SelectedChunk {
len: usize,
}

fn generate_words(difficulty: Difficulty, rng: &mut dyn RangeRng<usize>) -> Vec<String> {
let word_len = match difficulty {
// H_amming D_istance D_istribution Entry
#[derive(Clone, Copy)]
struct HDDEntry {
num_words: usize, // the number of words to look for with this hamming distance
hamming_distance: usize, // the hamming distance to look for
}

fn get_hamming_distance_distribution(difficulty: Difficulty) -> [HDDEntry; 4] {
let distances = match difficulty {
Difficulty::VeryEasy => [1, 2, 3, 4],
Difficulty::Easy => [1, 3, 4, 5],
Difficulty::Average => [1, 3, 5, 7],
Difficulty::Hard => [1, 4, 6, 9],
Difficulty::VeryHard => [1, 3, 7, 10],
};

[
HDDEntry {
num_words: 1,
hamming_distance: distances[0],
},
HDDEntry {
num_words: 2,
hamming_distance: distances[1],
},
HDDEntry {
num_words: 3,
hamming_distance: distances[2],
},
HDDEntry {
num_words: 5,
hamming_distance: distances[3],
},
]
}

fn get_word_len_for_difficulty(difficulty: Difficulty) -> usize {
match difficulty {
Difficulty::VeryEasy => 4,
Difficulty::Easy => 6,
Difficulty::Average => 8,
Difficulty::Hard => 10,
Difficulty::VeryHard => 12,
};
}
}

fn generate_words(
dict_chunk: &dict::EnglishDictChunk,
hd_distribution: &[HDDEntry; 4],
rng: &mut dyn RangeRng<usize>,
) -> (Vec<String>, String) {
let total_words_in_distribution = hd_distribution.iter().fold(0, |acc, e| acc + e.num_words);

let mut words = Vec::with_capacity(total_words_in_distribution + 1);
let goal_word = dict_chunk.get_random_word(rng);
words.push(goal_word.clone());

let mut current_hd_distribution_index = 0;
let mut hd_distribution_tracker: [HDDEntry; 4] = hd_distribution.clone();
let mut hamming_distance_sorted_iter = dict_chunk.get_hamming_distance_sorted_words(&goal_word);

while current_hd_distribution_index < hd_distribution_tracker.len() {
let current_hd_distribution_entry =
&mut hd_distribution_tracker[current_hd_distribution_index];
assert_ne!(current_hd_distribution_entry.num_words, 0);

let next_sorted_word_pair = hamming_distance_sorted_iter.next();
let (word, hamming_distance) = match next_sorted_word_pair {
None => break, // we are out of words!
Some(sorted_word_pair) => sorted_word_pair,
};

if hamming_distance >= current_hd_distribution_entry.hamming_distance {
current_hd_distribution_entry.num_words -= 1;
words.push(String::from(word));

if current_hd_distribution_entry.num_words == 0 {
current_hd_distribution_index += 1;
}
}
}

// the code can manage finding fewer words, but this represents a bug
assert_eq!(words.len(), total_words_in_distribution + 1);
(words, goal_word)
}

const WORDS_TO_GENERATE_COUNT: usize = 12;
fn simple_shuffle<T>(mut v: Vec<T>, rng: &mut dyn RangeRng<usize>) -> Vec<T> {
const NUM_SWAPS: usize = 100; // a good-enough heuristic for shuffling the words in place

let dict_chunk = dict::EnglishDictChunk::load(word_len);
(0..WORDS_TO_GENERATE_COUNT)
.map(|_| dict_chunk.get_random_word(rng))
.collect()
for _ in 0..NUM_SWAPS {
let index = rng.gen_range(0, v.len());
v.swap(0, index);
}

v
}

fn generate_words_from_difficulty(
difficulty: Difficulty,
rng: &mut dyn RangeRng<usize>,
) -> (Vec<String>, String) {
let dict_chunk = dict::EnglishDictChunk::load(get_word_len_for_difficulty(difficulty));
let hd_distribution = get_hamming_distance_distribution(difficulty);
generate_words(&dict_chunk, &hd_distribution, rng)
}

fn move_selection(
Expand Down Expand Up @@ -467,8 +561,9 @@ pub fn run_game(difficulty: Difficulty) {

// Generate a random set of words based on the provided difficulty setting
let mut rng = ThreadRangeRng::new();
let words = generate_words(difficulty, &mut rng);
let solution = select_rand(&words, &mut rng);
let (unshuffled_words, solution) = generate_words_from_difficulty(difficulty, &mut rng);
assert_eq!(unshuffled_words.len(), 12); // the game isn't broken if we don't have 12 words but it represents a bug
let words = simple_shuffle(unshuffled_words, &mut rng);

let mut denied_selections = Vec::new();
let mut accepted_selection = None;
Expand Down Expand Up @@ -597,38 +692,60 @@ mod tests {

#[test]
fn test_word_generation() {
let mut rng = randwrapper::mocks::SequenceRangeRng::new(&[0, 2, 4, 7]);
let tests = [
(Difficulty::VeryEasy, ["aahs", "aani", "abac", "abba"]),
(Difficulty::Easy, ["aahing", "aarrgh", "abacay", "abacot"]),
(
Difficulty::Average,
["aardvark", "aaronite", "abacisci", "abacuses"],
),
(
Difficulty::Hard,
["aardwolves", "abalienate", "abandoning", "abaptistum"],
),
(
Difficulty::VeryHard,
[
"abalienating",
"abandonments",
"abbreviately",
"abbreviatory",
],
),
// use a single-value rng for value 0. This will make sure the goal_word is the first word in the original word list
let mut rng = randwrapper::mocks::SingleValueRangeRng::new(0);

let test_hd_distribution = [
HDDEntry {
num_words: 1,
hamming_distance: 1,
},
HDDEntry {
num_words: 2,
hamming_distance: 2,
},
HDDEntry {
num_words: 3,
hamming_distance: 3,
},
HDDEntry {
num_words: 4,
hamming_distance: 4,
},
];

for (difficulty, expected_words) in &tests {
let generated_words = generate_words(*difficulty, &mut rng);
let expected_word_cnt = 12;
for i in 0..expected_word_cnt {
let generated_word = &generated_words[i];
let expected_word = expected_words[i % expected_words.len()];
assert_eq!(generated_word, expected_word);
}
}
let goal_word = "dude";
let words = [
goal_word, // 0
"dede", // 1
"door", // 3
"dodo", // 2
"doom", // 3
"abba", // 4
"rude", // 1
"duds", // 1
"rube", // 2
"cube", // 2
"sick", // 4
"stop", // 4
"soil", // 4
"roll", // 4
];

let expected_generated_words = [
goal_word, // goal
"dede", // hd 1
"dodo", "rube", // hd 2
"door", "doom", "abba", // hd 3
"sick", "stop", "soil", "roll", // hd 4
];

let test_dict = dict::EnglishDictChunk::new_mock(4, &words);
let (generated_words, solution) =
generate_words(&test_dict, &test_hd_distribution, &mut rng);

assert_eq!(solution, goal_word);
assert_eq!(generated_words, expected_generated_words);
}

#[test]
Expand Down Expand Up @@ -1062,4 +1179,24 @@ mod tests {

assert_eq!(end_selection, expected_end_selection);
}

#[test]
fn ensure_word_len_for_difficulty_matches_hamming_distance_distribution_for_difficulty() {
let difficulties = [
Difficulty::VeryEasy,
Difficulty::Easy,
Difficulty::Average,
Difficulty::Hard,
Difficulty::VeryHard,
];

for d in &difficulties {
let word_len = get_word_len_for_difficulty(*d);
let hamming_distance_distribution = get_hamming_distance_distribution(*d);

for hdd_entry in &hamming_distance_distribution {
assert!(hdd_entry.hamming_distance <= word_len);
}
}
}
}
Loading

0 comments on commit 8693452

Please sign in to comment.