From 158e29dfd5a7965b0dc411401982c59aff1ff42d Mon Sep 17 00:00:00 2001 From: Mainak Kundu Date: Tue, 8 Apr 2025 12:17:45 -0400 Subject: [PATCH 01/10] feat: alt_search - basic substring search --- devel/alt_search.py | 134 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 devel/alt_search.py diff --git a/devel/alt_search.py b/devel/alt_search.py new file mode 100644 index 000000000000..fcc9b2c7a0b9 --- /dev/null +++ b/devel/alt_search.py @@ -0,0 +1,134 @@ +"""Alternative implementation of the search function.""" + +from collections import deque +from pprint import pprint + +import psutil + +from ansys.fluent.core.generated.solver.settings_252 import root +from ansys.fluent.core.solver.flobject import NamedObject + + +class TrieNode: + """ + A node in the Trie data structure. + """ + + def __init__(self): + self.children = {} + self.results = [] + + +class Trie: + """ + A Trie (prefix tree) data structure for storing and searching search results." + """ + + def __init__(self): + self._root = TrieNode() + + def insert(self, word, result): + """ + Inserts a word into the Trie and associates it with the given results. + """ + node = self._root + for char in word: + if char not in node.children: + node.children[char] = TrieNode() + node = node.children[char] + node.results.append(result) + + def search(self, word): + """ + Searches all results in the Trie for the given word. + """ + node = self._root + for char in word: + if char not in node.children: + return [] + node = node.children[char] + + return node.results + + +def get_name_components(name: str): + """ + Given a name like 'abc_def' returns ['abc', 'def'] + """ + return name.split("_") + + +def build_trie(root_cls): + """ + Build a trie from the settings module + """ + print(f"Memory usage before building trie: {get_memory_usage():.2f} MB") + + # A depth-first algorithm is chosen for the following reasons: + # 1. Show the search results in a depth-first order of the settings API. + # 2. Can support a `depth` parameter in the search function to limit the depth of the search. + queue = deque([("", root_cls, ".settings")]) + + while queue: + current_name, current_cls, current_path = queue.popleft() + for component in get_name_components(current_name): + for substring in get_all_substrings(component): + SettingsTrie.insert(substring, current_path) + + if not hasattr(current_cls, "_child_classes"): + continue + + for k, v in current_cls._child_classes.items(): + if not issubclass(v, NamedObject): + next_cls = v + next_path = f"{current_path}.{k}" + else: + next_cls = getattr(v, "child_object_type") + next_path = f'{current_path}.{k}["_name_"]' + # with open("alt_search.log", "a") as f: + # f.write(f"{next_path}\n") + queue.append((k, next_cls, next_path)) + + print(f"Memory usage after building trie: {get_memory_usage():.2f} MB") + + +def get_all_substrings(name_component: str): + """ + Given a name component like 'abc' returns all substrings of length > 1 + """ + if len(name_component) < 2: + return [] + return [ + name_component[i:j] + for i in range(len(name_component)) + for j in range(i + 2, len(name_component) + 1) + ] + + +SettingsTrie = Trie() + + +def search(search_term): + """ + Basic substring search + """ + results = SettingsTrie.search(search_term) + return results + + +def get_memory_usage(): + """ + Print the memory usage of the current process. + """ + process = psutil.Process() + memory_info = process.memory_info() + return memory_info.rss / (1024 * 1024) # Convert bytes to MB + + +if __name__ == "__main__": + build_trie(root) + # Example usage + pprint(search("viscous")) + pprint(search("isco")) + pprint(len(search("viscous"))) + pprint(len(search("isco"))) From f43e84a805c939d4090ad44983c489758b72bb28 Mon Sep 17 00:00:00 2001 From: pyansys-ci-bot <92810346+pyansys-ci-bot@users.noreply.github.com> Date: Tue, 8 Apr 2025 17:16:47 +0000 Subject: [PATCH 02/10] chore: adding changelog file 3909.added.md [dependabot-skip] --- doc/changelog.d/3909.added.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 doc/changelog.d/3909.added.md diff --git a/doc/changelog.d/3909.added.md b/doc/changelog.d/3909.added.md new file mode 100644 index 000000000000..83648a037402 --- /dev/null +++ b/doc/changelog.d/3909.added.md @@ -0,0 +1 @@ +Basic substring search based on a trie \ No newline at end of file From adad79ea4de4735df1bc11af76bd60be11870a70 Mon Sep 17 00:00:00 2001 From: pyansys-ci-bot <92810346+pyansys-ci-bot@users.noreply.github.com> Date: Tue, 8 Apr 2025 17:21:50 +0000 Subject: [PATCH 03/10] chore: adding changelog file 3909.added.md [dependabot-skip] --- doc/changelog.d/3909.added.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/changelog.d/3909.added.md b/doc/changelog.d/3909.added.md index 83648a037402..4ad280080cc9 100644 --- a/doc/changelog.d/3909.added.md +++ b/doc/changelog.d/3909.added.md @@ -1 +1 @@ -Basic substring search based on a trie \ No newline at end of file +Basic substring search within settings API based on a trie \ No newline at end of file From 526c2463b3d1e84fb1657fdc76723fff65370f30 Mon Sep 17 00:00:00 2001 From: Mainak Kundu Date: Tue, 8 Apr 2025 16:36:13 -0400 Subject: [PATCH 04/10] feat: alt_search - basic substring search --- devel/alt_search.py | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/devel/alt_search.py b/devel/alt_search.py index fcc9b2c7a0b9..5922f6d9a906 100644 --- a/devel/alt_search.py +++ b/devel/alt_search.py @@ -38,17 +38,24 @@ def insert(self, word, result): node = node.children[char] node.results.append(result) - def search(self, word): + def search(self, prefix): """ - Searches all results in the Trie for the given word. + Searches all results in the Trie for the given word and collects results from all child nodes. """ + + def collect_results(node): + results = list(node.results) + for child in node.children.values(): + results.extend(collect_results(child)) + return results + node = self._root - for char in word: + for char in prefix: if char not in node.children: return [] node = node.children[char] - return node.results + return collect_results(node) def get_name_components(name: str): @@ -58,6 +65,17 @@ def get_name_components(name: str): return name.split("_") +def get_all_ending_substrings(name_component: str): + """ + Given a name component like 'abc' returns all ending substrings of length > 1: ['abc', 'bc'] + """ + return [ + name_component[i:] + for i in range(len(name_component)) + if len(name_component[i:]) > 1 + ] + + def build_trie(root_cls): """ Build a trie from the settings module @@ -72,7 +90,7 @@ def build_trie(root_cls): while queue: current_name, current_cls, current_path = queue.popleft() for component in get_name_components(current_name): - for substring in get_all_substrings(component): + for substring in get_all_ending_substrings(component): SettingsTrie.insert(substring, current_path) if not hasattr(current_cls, "_child_classes"): @@ -92,19 +110,6 @@ def build_trie(root_cls): print(f"Memory usage after building trie: {get_memory_usage():.2f} MB") -def get_all_substrings(name_component: str): - """ - Given a name component like 'abc' returns all substrings of length > 1 - """ - if len(name_component) < 2: - return [] - return [ - name_component[i:j] - for i in range(len(name_component)) - for j in range(i + 2, len(name_component) + 1) - ] - - SettingsTrie = Trie() From 6b9dd72583c4a362e5b168b7d61a8d94687acd53 Mon Sep 17 00:00:00 2001 From: Mainak Kundu Date: Tue, 8 Apr 2025 17:04:31 -0400 Subject: [PATCH 05/10] feat: alt_search - basic substring search --- devel/alt_search.py | 107 +++++++++--------------------- devel/alt_search_trie.py | 139 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 169 insertions(+), 77 deletions(-) create mode 100644 devel/alt_search_trie.py diff --git a/devel/alt_search.py b/devel/alt_search.py index 5922f6d9a906..1297def09e5a 100644 --- a/devel/alt_search.py +++ b/devel/alt_search.py @@ -9,55 +9,6 @@ from ansys.fluent.core.solver.flobject import NamedObject -class TrieNode: - """ - A node in the Trie data structure. - """ - - def __init__(self): - self.children = {} - self.results = [] - - -class Trie: - """ - A Trie (prefix tree) data structure for storing and searching search results." - """ - - def __init__(self): - self._root = TrieNode() - - def insert(self, word, result): - """ - Inserts a word into the Trie and associates it with the given results. - """ - node = self._root - for char in word: - if char not in node.children: - node.children[char] = TrieNode() - node = node.children[char] - node.results.append(result) - - def search(self, prefix): - """ - Searches all results in the Trie for the given word and collects results from all child nodes. - """ - - def collect_results(node): - results = list(node.results) - for child in node.children.values(): - results.extend(collect_results(child)) - return results - - node = self._root - for char in prefix: - if char not in node.children: - return [] - node = node.children[char] - - return collect_results(node) - - def get_name_components(name: str): """ Given a name like 'abc_def' returns ['abc', 'def'] @@ -65,33 +16,26 @@ def get_name_components(name: str): return name.split("_") -def get_all_ending_substrings(name_component: str): - """ - Given a name component like 'abc' returns all ending substrings of length > 1: ['abc', 'bc'] - """ - return [ - name_component[i:] - for i in range(len(name_component)) - if len(name_component[i:]) > 1 - ] +SearchCache = {} -def build_trie(root_cls): +def build_cache(root_cls): """ Build a trie from the settings module """ - print(f"Memory usage before building trie: {get_memory_usage():.2f} MB") + print(f"Memory usage before building cache: {get_memory_usage():.2f} MB") # A depth-first algorithm is chosen for the following reasons: # 1. Show the search results in a depth-first order of the settings API. # 2. Can support a `depth` parameter in the search function to limit the depth of the search. - queue = deque([("", root_cls, ".settings")]) + queue_order = 0 + queue = deque([("", root_cls, ".settings", queue_order)]) while queue: - current_name, current_cls, current_path = queue.popleft() - for component in get_name_components(current_name): - for substring in get_all_ending_substrings(component): - SettingsTrie.insert(substring, current_path) + current_name, current_cls, current_path, rank = queue.popleft() + SearchCache.setdefault(current_name, []).append((current_path, rank)) + for name_component in get_name_components(current_name): + SearchCache.setdefault(name_component, []).append((current_path, rank)) if not hasattr(current_cls, "_child_classes"): continue @@ -105,20 +49,28 @@ def build_trie(root_cls): next_path = f'{current_path}.{k}["_name_"]' # with open("alt_search.log", "a") as f: # f.write(f"{next_path}\n") - queue.append((k, next_cls, next_path)) - - print(f"Memory usage after building trie: {get_memory_usage():.2f} MB") - + queue_order += 1 + queue.append((k, next_cls, next_path, queue_order)) -SettingsTrie = Trie() + print(f"Memory usage after building cache: {get_memory_usage():.2f} MB") -def search(search_term): +def search(search_string: str, match_whole_word: bool = False): """ - Basic substring search + Basic string-based search """ - results = SettingsTrie.search(search_term) - return results + if not SearchCache: + build_cache(root) + # with open("alt_search.log", "w") as f: + # pprint(SearchCache, stream=f) + if match_whole_word: + results = SearchCache.get(search_string, []) + else: + results = [ + item for k, v in SearchCache.items() if search_string in k for item in v + ] + results.sort(key=lambda x: x[1]) + return [x[0] for x in results] def get_memory_usage(): @@ -131,9 +83,10 @@ def get_memory_usage(): if __name__ == "__main__": - build_trie(root) # Example usage + pprint(search("viscous", match_whole_word=True)) + pprint(len(search("viscous", match_whole_word=True))) + pprint(search("read_case", match_whole_word=True)) + pprint(len(search("read_case", match_whole_word=True))) pprint(search("viscous")) - pprint(search("isco")) pprint(len(search("viscous"))) - pprint(len(search("isco"))) diff --git a/devel/alt_search_trie.py b/devel/alt_search_trie.py new file mode 100644 index 000000000000..5922f6d9a906 --- /dev/null +++ b/devel/alt_search_trie.py @@ -0,0 +1,139 @@ +"""Alternative implementation of the search function.""" + +from collections import deque +from pprint import pprint + +import psutil + +from ansys.fluent.core.generated.solver.settings_252 import root +from ansys.fluent.core.solver.flobject import NamedObject + + +class TrieNode: + """ + A node in the Trie data structure. + """ + + def __init__(self): + self.children = {} + self.results = [] + + +class Trie: + """ + A Trie (prefix tree) data structure for storing and searching search results." + """ + + def __init__(self): + self._root = TrieNode() + + def insert(self, word, result): + """ + Inserts a word into the Trie and associates it with the given results. + """ + node = self._root + for char in word: + if char not in node.children: + node.children[char] = TrieNode() + node = node.children[char] + node.results.append(result) + + def search(self, prefix): + """ + Searches all results in the Trie for the given word and collects results from all child nodes. + """ + + def collect_results(node): + results = list(node.results) + for child in node.children.values(): + results.extend(collect_results(child)) + return results + + node = self._root + for char in prefix: + if char not in node.children: + return [] + node = node.children[char] + + return collect_results(node) + + +def get_name_components(name: str): + """ + Given a name like 'abc_def' returns ['abc', 'def'] + """ + return name.split("_") + + +def get_all_ending_substrings(name_component: str): + """ + Given a name component like 'abc' returns all ending substrings of length > 1: ['abc', 'bc'] + """ + return [ + name_component[i:] + for i in range(len(name_component)) + if len(name_component[i:]) > 1 + ] + + +def build_trie(root_cls): + """ + Build a trie from the settings module + """ + print(f"Memory usage before building trie: {get_memory_usage():.2f} MB") + + # A depth-first algorithm is chosen for the following reasons: + # 1. Show the search results in a depth-first order of the settings API. + # 2. Can support a `depth` parameter in the search function to limit the depth of the search. + queue = deque([("", root_cls, ".settings")]) + + while queue: + current_name, current_cls, current_path = queue.popleft() + for component in get_name_components(current_name): + for substring in get_all_ending_substrings(component): + SettingsTrie.insert(substring, current_path) + + if not hasattr(current_cls, "_child_classes"): + continue + + for k, v in current_cls._child_classes.items(): + if not issubclass(v, NamedObject): + next_cls = v + next_path = f"{current_path}.{k}" + else: + next_cls = getattr(v, "child_object_type") + next_path = f'{current_path}.{k}["_name_"]' + # with open("alt_search.log", "a") as f: + # f.write(f"{next_path}\n") + queue.append((k, next_cls, next_path)) + + print(f"Memory usage after building trie: {get_memory_usage():.2f} MB") + + +SettingsTrie = Trie() + + +def search(search_term): + """ + Basic substring search + """ + results = SettingsTrie.search(search_term) + return results + + +def get_memory_usage(): + """ + Print the memory usage of the current process. + """ + process = psutil.Process() + memory_info = process.memory_info() + return memory_info.rss / (1024 * 1024) # Convert bytes to MB + + +if __name__ == "__main__": + build_trie(root) + # Example usage + pprint(search("viscous")) + pprint(search("isco")) + pprint(len(search("viscous"))) + pprint(len(search("isco"))) From 738dadfe876bca0d69b6d18b046b3edd4b2efc4a Mon Sep 17 00:00:00 2001 From: Mainak Kundu Date: Tue, 8 Apr 2025 17:08:03 -0400 Subject: [PATCH 06/10] feat: alt_search - basic substring search --- devel/alt_search.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/devel/alt_search.py b/devel/alt_search.py index 1297def09e5a..4622a1307898 100644 --- a/devel/alt_search.py +++ b/devel/alt_search.py @@ -1,6 +1,8 @@ """Alternative implementation of the search function.""" from collections import deque +import gzip +import pickle from pprint import pprint import psutil @@ -82,6 +84,14 @@ def get_memory_usage(): return memory_info.rss / (1024 * 1024) # Convert bytes to MB +def save_compressed_cache(): + """ + Save the cache to a compressed file. + """ + with gzip.open("search_cache.pkl.gz", "wb") as f: + pickle.dump(SearchCache, f) + + if __name__ == "__main__": # Example usage pprint(search("viscous", match_whole_word=True)) @@ -90,3 +100,4 @@ def get_memory_usage(): pprint(len(search("read_case", match_whole_word=True))) pprint(search("viscous")) pprint(len(search("viscous"))) + save_compressed_cache() From 4cfd582e4b1d966691447b38e74fc77b150017c6 Mon Sep 17 00:00:00 2001 From: Mainak Kundu Date: Tue, 8 Apr 2025 17:13:10 -0400 Subject: [PATCH 07/10] feat: alt_search - basic substring search --- devel/alt_search.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/devel/alt_search.py b/devel/alt_search.py index 4622a1307898..b8fcbcd74f55 100644 --- a/devel/alt_search.py +++ b/devel/alt_search.py @@ -4,6 +4,7 @@ import gzip import pickle from pprint import pprint +import re import psutil @@ -57,7 +58,11 @@ def build_cache(root_cls): print(f"Memory usage after building cache: {get_memory_usage():.2f} MB") -def search(search_string: str, match_whole_word: bool = False): +def search( + search_string: str, + wildcard: bool | None = False, + match_whole_word: bool = False, +): """ Basic string-based search """ @@ -67,6 +72,9 @@ def search(search_string: str, match_whole_word: bool = False): # pprint(SearchCache, stream=f) if match_whole_word: results = SearchCache.get(search_string, []) + elif wildcard: + r = re.compile(search_string) + results = [item for k, v in SearchCache.items() if r.match(k) for item in v] else: results = [ item for k, v in SearchCache.items() if search_string in k for item in v @@ -100,4 +108,6 @@ def save_compressed_cache(): pprint(len(search("read_case", match_whole_word=True))) pprint(search("viscous")) pprint(len(search("viscous"))) + pprint(search("viscous*", wildcard=True)) + pprint(len(search("viscous", wildcard=True))) save_compressed_cache() From aa31194d0948160978368420a764e2554f718fac Mon Sep 17 00:00:00 2001 From: pyansys-ci-bot <92810346+pyansys-ci-bot@users.noreply.github.com> Date: Tue, 8 Apr 2025 21:16:51 +0000 Subject: [PATCH 08/10] chore: adding changelog file 3909.added.md [dependabot-skip] --- doc/changelog.d/3909.added.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/changelog.d/3909.added.md b/doc/changelog.d/3909.added.md index 4ad280080cc9..36d0a0fac100 100644 --- a/doc/changelog.d/3909.added.md +++ b/doc/changelog.d/3909.added.md @@ -1 +1 @@ -Basic substring search within settings API based on a trie \ No newline at end of file +Basic substring search within settings API based on a cache \ No newline at end of file From 35d7bb4b7d14440b3584a322e5e4d3560bef5da5 Mon Sep 17 00:00:00 2001 From: Mainak Kundu Date: Tue, 8 Apr 2025 17:51:56 -0400 Subject: [PATCH 09/10] feat: alt_search - basic substring search --- devel/alt_search.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/devel/alt_search.py b/devel/alt_search.py index b8fcbcd74f55..e02748fd1c5f 100644 --- a/devel/alt_search.py +++ b/devel/alt_search.py @@ -37,8 +37,10 @@ def build_cache(root_cls): while queue: current_name, current_cls, current_path, rank = queue.popleft() SearchCache.setdefault(current_name, []).append((current_path, rank)) - for name_component in get_name_components(current_name): - SearchCache.setdefault(name_component, []).append((current_path, rank)) + name_components = get_name_components(current_name) + if len(name_components) > 1: + for name_component in name_components: + SearchCache.setdefault(name_component, []).append((current_path, rank)) if not hasattr(current_cls, "_child_classes"): continue @@ -68,8 +70,6 @@ def search( """ if not SearchCache: build_cache(root) - # with open("alt_search.log", "w") as f: - # pprint(SearchCache, stream=f) if match_whole_word: results = SearchCache.get(search_string, []) elif wildcard: @@ -111,3 +111,5 @@ def save_compressed_cache(): pprint(search("viscous*", wildcard=True)) pprint(len(search("viscous", wildcard=True))) save_compressed_cache() + with open("alt_search.log", "w") as f: + pprint(SearchCache, stream=f) From ffd7e79b26d8f744d257bee57983ccd91a3c9aaf Mon Sep 17 00:00:00 2001 From: Mainak Kundu Date: Tue, 8 Apr 2025 18:10:20 -0400 Subject: [PATCH 10/10] feat: alt_search - basic substring search --- devel/alt_search.py | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/devel/alt_search.py b/devel/alt_search.py index e02748fd1c5f..332704a48c54 100644 --- a/devel/alt_search.py +++ b/devel/alt_search.py @@ -5,6 +5,7 @@ import pickle from pprint import pprint import re +import time import psutil @@ -19,7 +20,8 @@ def get_name_components(name: str): return name.split("_") -SearchCache = {} +PathCache = {} # caching name -> paths +NameCache = {} # caching name_component -> names def build_cache(root_cls): @@ -27,6 +29,7 @@ def build_cache(root_cls): Build a trie from the settings module """ print(f"Memory usage before building cache: {get_memory_usage():.2f} MB") + start_time = time.time() # A depth-first algorithm is chosen for the following reasons: # 1. Show the search results in a depth-first order of the settings API. @@ -36,11 +39,10 @@ def build_cache(root_cls): while queue: current_name, current_cls, current_path, rank = queue.popleft() - SearchCache.setdefault(current_name, []).append((current_path, rank)) - name_components = get_name_components(current_name) - if len(name_components) > 1: - for name_component in name_components: - SearchCache.setdefault(name_component, []).append((current_path, rank)) + PathCache.setdefault(current_name, []).append((current_path, rank)) + NameCache.setdefault(current_name, set()).add(current_name) + for name_component in get_name_components(current_name): + NameCache.setdefault(name_component, set()).add(current_name) if not hasattr(current_cls, "_child_classes"): continue @@ -57,6 +59,7 @@ def build_cache(root_cls): queue_order += 1 queue.append((k, next_cls, next_path, queue_order)) + print(f"Cache built in {time.time() - start_time:.2f} seconds") print(f"Memory usage after building cache: {get_memory_usage():.2f} MB") @@ -68,16 +71,17 @@ def search( """ Basic string-based search """ - if not SearchCache: + if not PathCache: build_cache(root) if match_whole_word: - results = SearchCache.get(search_string, []) + names = NameCache.get(search_string, set()) + results = [item for name in names for item in PathCache[name]] elif wildcard: r = re.compile(search_string) - results = [item for k, v in SearchCache.items() if r.match(k) for item in v] + results = [item for k, v in PathCache.items() if r.match(k) for item in v] else: results = [ - item for k, v in SearchCache.items() if search_string in k for item in v + item for k, v in PathCache.items() if search_string in k for item in v ] results.sort(key=lambda x: x[1]) return [x[0] for x in results] @@ -97,7 +101,7 @@ def save_compressed_cache(): Save the cache to a compressed file. """ with gzip.open("search_cache.pkl.gz", "wb") as f: - pickle.dump(SearchCache, f) + pickle.dump((PathCache, NameCache), f) if __name__ == "__main__": @@ -109,7 +113,8 @@ def save_compressed_cache(): pprint(search("viscous")) pprint(len(search("viscous"))) pprint(search("viscous*", wildcard=True)) - pprint(len(search("viscous", wildcard=True))) + pprint(len(search("viscous*", wildcard=True))) save_compressed_cache() with open("alt_search.log", "w") as f: - pprint(SearchCache, stream=f) + pprint(PathCache, stream=f) + pprint(NameCache, stream=f)