diff --git a/devel/alt_search.py b/devel/alt_search.py new file mode 100644 index 000000000000..332704a48c54 --- /dev/null +++ b/devel/alt_search.py @@ -0,0 +1,120 @@ +"""Alternative implementation of the search function.""" + +from collections import deque +import gzip +import pickle +from pprint import pprint +import re +import time + +import psutil + +from ansys.fluent.core.generated.solver.settings_252 import root +from ansys.fluent.core.solver.flobject import NamedObject + + +def get_name_components(name: str): + """ + Given a name like 'abc_def' returns ['abc', 'def'] + """ + return name.split("_") + + +PathCache = {} # caching name -> paths +NameCache = {} # caching name_component -> names + + +def build_cache(root_cls): + """ + Build a trie from the settings module + """ + print(f"Memory usage before building cache: {get_memory_usage():.2f} MB") + start_time = time.time() + + # A depth-first algorithm is chosen for the following reasons: + # 1. Show the search results in a depth-first order of the settings API. + # 2. Can support a `depth` parameter in the search function to limit the depth of the search. + queue_order = 0 + queue = deque([("", root_cls, ".settings", queue_order)]) + + while queue: + current_name, current_cls, current_path, rank = queue.popleft() + PathCache.setdefault(current_name, []).append((current_path, rank)) + NameCache.setdefault(current_name, set()).add(current_name) + for name_component in get_name_components(current_name): + NameCache.setdefault(name_component, set()).add(current_name) + + if not hasattr(current_cls, "_child_classes"): + continue + + for k, v in current_cls._child_classes.items(): + if not issubclass(v, NamedObject): + next_cls = v + next_path = f"{current_path}.{k}" + else: + next_cls = getattr(v, "child_object_type") + next_path = f'{current_path}.{k}["_name_"]' + # with open("alt_search.log", "a") as f: + # f.write(f"{next_path}\n") + queue_order += 1 + queue.append((k, next_cls, next_path, queue_order)) + + print(f"Cache built in {time.time() - start_time:.2f} seconds") + print(f"Memory usage after building cache: {get_memory_usage():.2f} MB") + + +def search( + search_string: str, + wildcard: bool | None = False, + match_whole_word: bool = False, +): + """ + Basic string-based search + """ + if not PathCache: + build_cache(root) + if match_whole_word: + names = NameCache.get(search_string, set()) + results = [item for name in names for item in PathCache[name]] + elif wildcard: + r = re.compile(search_string) + results = [item for k, v in PathCache.items() if r.match(k) for item in v] + else: + results = [ + item for k, v in PathCache.items() if search_string in k for item in v + ] + results.sort(key=lambda x: x[1]) + return [x[0] for x in results] + + +def get_memory_usage(): + """ + Print the memory usage of the current process. + """ + process = psutil.Process() + memory_info = process.memory_info() + return memory_info.rss / (1024 * 1024) # Convert bytes to MB + + +def save_compressed_cache(): + """ + Save the cache to a compressed file. + """ + with gzip.open("search_cache.pkl.gz", "wb") as f: + pickle.dump((PathCache, NameCache), f) + + +if __name__ == "__main__": + # Example usage + pprint(search("viscous", match_whole_word=True)) + pprint(len(search("viscous", match_whole_word=True))) + pprint(search("read_case", match_whole_word=True)) + pprint(len(search("read_case", match_whole_word=True))) + pprint(search("viscous")) + pprint(len(search("viscous"))) + pprint(search("viscous*", wildcard=True)) + pprint(len(search("viscous*", wildcard=True))) + save_compressed_cache() + with open("alt_search.log", "w") as f: + pprint(PathCache, stream=f) + pprint(NameCache, stream=f) diff --git a/devel/alt_search_trie.py b/devel/alt_search_trie.py new file mode 100644 index 000000000000..5922f6d9a906 --- /dev/null +++ b/devel/alt_search_trie.py @@ -0,0 +1,139 @@ +"""Alternative implementation of the search function.""" + +from collections import deque +from pprint import pprint + +import psutil + +from ansys.fluent.core.generated.solver.settings_252 import root +from ansys.fluent.core.solver.flobject import NamedObject + + +class TrieNode: + """ + A node in the Trie data structure. + """ + + def __init__(self): + self.children = {} + self.results = [] + + +class Trie: + """ + A Trie (prefix tree) data structure for storing and searching search results." + """ + + def __init__(self): + self._root = TrieNode() + + def insert(self, word, result): + """ + Inserts a word into the Trie and associates it with the given results. + """ + node = self._root + for char in word: + if char not in node.children: + node.children[char] = TrieNode() + node = node.children[char] + node.results.append(result) + + def search(self, prefix): + """ + Searches all results in the Trie for the given word and collects results from all child nodes. + """ + + def collect_results(node): + results = list(node.results) + for child in node.children.values(): + results.extend(collect_results(child)) + return results + + node = self._root + for char in prefix: + if char not in node.children: + return [] + node = node.children[char] + + return collect_results(node) + + +def get_name_components(name: str): + """ + Given a name like 'abc_def' returns ['abc', 'def'] + """ + return name.split("_") + + +def get_all_ending_substrings(name_component: str): + """ + Given a name component like 'abc' returns all ending substrings of length > 1: ['abc', 'bc'] + """ + return [ + name_component[i:] + for i in range(len(name_component)) + if len(name_component[i:]) > 1 + ] + + +def build_trie(root_cls): + """ + Build a trie from the settings module + """ + print(f"Memory usage before building trie: {get_memory_usage():.2f} MB") + + # A depth-first algorithm is chosen for the following reasons: + # 1. Show the search results in a depth-first order of the settings API. + # 2. Can support a `depth` parameter in the search function to limit the depth of the search. + queue = deque([("", root_cls, ".settings")]) + + while queue: + current_name, current_cls, current_path = queue.popleft() + for component in get_name_components(current_name): + for substring in get_all_ending_substrings(component): + SettingsTrie.insert(substring, current_path) + + if not hasattr(current_cls, "_child_classes"): + continue + + for k, v in current_cls._child_classes.items(): + if not issubclass(v, NamedObject): + next_cls = v + next_path = f"{current_path}.{k}" + else: + next_cls = getattr(v, "child_object_type") + next_path = f'{current_path}.{k}["_name_"]' + # with open("alt_search.log", "a") as f: + # f.write(f"{next_path}\n") + queue.append((k, next_cls, next_path)) + + print(f"Memory usage after building trie: {get_memory_usage():.2f} MB") + + +SettingsTrie = Trie() + + +def search(search_term): + """ + Basic substring search + """ + results = SettingsTrie.search(search_term) + return results + + +def get_memory_usage(): + """ + Print the memory usage of the current process. + """ + process = psutil.Process() + memory_info = process.memory_info() + return memory_info.rss / (1024 * 1024) # Convert bytes to MB + + +if __name__ == "__main__": + build_trie(root) + # Example usage + pprint(search("viscous")) + pprint(search("isco")) + pprint(len(search("viscous"))) + pprint(len(search("isco"))) diff --git a/doc/changelog.d/3909.added.md b/doc/changelog.d/3909.added.md new file mode 100644 index 000000000000..36d0a0fac100 --- /dev/null +++ b/doc/changelog.d/3909.added.md @@ -0,0 +1 @@ +Basic substring search within settings API based on a cache \ No newline at end of file