From 52b015e3ef54cfb2f06fc0b6836a86eaad066300 Mon Sep 17 00:00:00 2001 From: Guy Korland Date: Fri, 3 Jan 2025 00:26:06 +0200 Subject: [PATCH 1/5] Add support for include_directive in C Fixes #46 Add support for processing `include_directive` in C files. * **api/analyzers/c/analyzer.py** - Add `process_include_directive` method to handle `include_directive` nodes and create edges between files. - Modify `first_pass` method to process `include_directive` nodes and create edges between files. * **tests/test_c_analyzer.py** - Add test case to verify the creation of edges between files for `include_directive`. --- For more details, open the [Copilot Workspace session](https://copilot-workspace.githubnext.com/FalkorDB/code-graph-backend/issues/46?shareId=XXXX-XXXX-XXXX-XXXX). --- api/analyzers/c/analyzer.py | 39 +++++++++++++++++++++++++++++++++++++ tests/test_c_analyzer.py | 7 +++++++ 2 files changed, 46 insertions(+) diff --git a/api/analyzers/c/analyzer.py b/api/analyzers/c/analyzer.py index 09a5e26..7a04811 100644 --- a/api/analyzers/c/analyzer.py +++ b/api/analyzers/c/analyzer.py @@ -315,6 +315,36 @@ def process_struct_specifier(self, parent: File, node: Node, path: Path, # Connect parent to entity graph.connect_entities('DEFINES', parent.id, entity.id) + def process_include_directive(self, parent: File, node: Node, path: Path, graph: Graph) -> None: + """ + Processes an include directive node to create an edge between files. + + Args: + parent (File): The parent File object. + node (Node): The AST node representing the include directive. + path (Path): The file path where the include directive is found. + graph (Graph): The Graph object to which the file entities and edges will be added. + + Returns: + None + """ + + assert(node.type == 'include_directive') + + # Extract the included file path + included_file_node = node.child_by_field_name('path') + if included_file_node is None: + return + + included_file_path = included_file_node.text.decode('utf-8').strip('"<>') + + # Create file entity for the included file + included_file = File(os.path.dirname(path), included_file_path, os.path.splitext(included_file_path)[1]) + graph.add_file(included_file) + + # Connect the parent file to the included file + graph.connect_entities('INCLUDES', parent.id, included_file.id) + def first_pass(self, path: Path, f: io.TextIOWrapper, graph:Graph) -> None: """ Perform the first pass processing of a C source file or header file. @@ -388,6 +418,15 @@ def first_pass(self, path: Path, f: io.TextIOWrapper, graph:Graph) -> None: for node in structs: self.process_struct_specifier(file, node, path, graph) + # Process include directives + query = C_LANGUAGE.query("(preprocessor_directive (include_directive) @include)") + captures = query.captures(tree.root_node) + + if 'include' in captures: + includes = captures['include'] + for node in includes: + self.process_include_directive(file, node, path, graph) + def second_pass(self, path: Path, f: io.TextIOWrapper, graph: Graph) -> None: """ Perform the second pass processing of a C source file or header file to establish function call relationships. diff --git a/tests/test_c_analyzer.py b/tests/test_c_analyzer.py index 76b19bf..c7f42bd 100644 --- a/tests/test_c_analyzer.py +++ b/tests/test_c_analyzer.py @@ -60,3 +60,10 @@ def test_analyzer(self): self.assertIn('add', callers) self.assertIn('main', callers) + # Test for include_directive edge creation + included_file = g.get_file('', 'myheader.h', '.h') + self.assertIsNotNone(included_file) + + includes = g.get_neighbors([f.id], rel='INCLUDES') + included_files = [node['properties']['name'] for node in includes['nodes']] + self.assertIn('myheader.h', included_files) From 5c41fa0011792a2e7f5b321e982b717ed075d338 Mon Sep 17 00:00:00 2001 From: Guy Korland Date: Fri, 3 Jan 2025 00:43:39 +0200 Subject: [PATCH 2/5] --- api/analyzers/c/analyzer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/analyzers/c/analyzer.py b/api/analyzers/c/analyzer.py index 7a04811..3671b4a 100644 --- a/api/analyzers/c/analyzer.py +++ b/api/analyzers/c/analyzer.py @@ -419,7 +419,7 @@ def first_pass(self, path: Path, f: io.TextIOWrapper, graph:Graph) -> None: self.process_struct_specifier(file, node, path, graph) # Process include directives - query = C_LANGUAGE.query("(preprocessor_directive (include_directive) @include)") + query = C_LANGUAGE.query("(preproc_include (string_literal) @include)") captures = query.captures(tree.root_node) if 'include' in captures: From 07dd3bfe7d552327fd61dffa993f750c49a949cf Mon Sep 17 00:00:00 2001 From: Guy Korland Date: Fri, 3 Jan 2025 14:58:35 +0200 Subject: [PATCH 3/5] fix include path --- api/analyzers/c/analyzer.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/api/analyzers/c/analyzer.py b/api/analyzers/c/analyzer.py index 3671b4a..0e81d46 100644 --- a/api/analyzers/c/analyzer.py +++ b/api/analyzers/c/analyzer.py @@ -329,14 +329,9 @@ def process_include_directive(self, parent: File, node: Node, path: Path, graph: None """ - assert(node.type == 'include_directive') + assert(node.type == 'system_lib_string' or node.type == 'string_literal') - # Extract the included file path - included_file_node = node.child_by_field_name('path') - if included_file_node is None: - return - - included_file_path = included_file_node.text.decode('utf-8').strip('"<>') + included_file_path = node.text.decode('utf-8').strip('"<>') # Create file entity for the included file included_file = File(os.path.dirname(path), included_file_path, os.path.splitext(included_file_path)[1]) @@ -419,7 +414,7 @@ def first_pass(self, path: Path, f: io.TextIOWrapper, graph:Graph) -> None: self.process_struct_specifier(file, node, path, graph) # Process include directives - query = C_LANGUAGE.query("(preproc_include (string_literal) @include)") + query = C_LANGUAGE.query("(preproc_include [(string_literal) (system_lib_string)] @include)") captures = query.captures(tree.root_node) if 'include' in captures: From a5198f6a0b9e04d0994e91a50428e45d96601de0 Mon Sep 17 00:00:00 2001 From: Guy Korland Date: Fri, 3 Jan 2025 15:09:22 +0200 Subject: [PATCH 4/5] handle wronge include --- api/analyzers/c/analyzer.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/api/analyzers/c/analyzer.py b/api/analyzers/c/analyzer.py index 0e81d46..3c8d1e3 100644 --- a/api/analyzers/c/analyzer.py +++ b/api/analyzers/c/analyzer.py @@ -332,9 +332,17 @@ def process_include_directive(self, parent: File, node: Node, path: Path, graph: assert(node.type == 'system_lib_string' or node.type == 'string_literal') included_file_path = node.text.decode('utf-8').strip('"<>') + if not included_file_path: + logger.warning("Empty include path found in %s", path) + return + + splitted = os.path.splitext(included_file_path) + if len(splitted) < 2: + logger.warning("Include path has no extension: %s", included_file_path) + return # Create file entity for the included file - included_file = File(os.path.dirname(path), included_file_path, os.path.splitext(included_file_path)[1]) + included_file = File(os.path.dirname(path), included_file_path, splitted[1]) graph.add_file(included_file) # Connect the parent file to the included file From bdbde2c7d108a26275e249d34afe87411e3ab21f Mon Sep 17 00:00:00 2001 From: Guy Korland Date: Sun, 5 Jan 2025 19:39:11 +0200 Subject: [PATCH 5/5] fix review comments --- api/analyzers/c/analyzer.py | 23 +++++++++++++++++------ tests/test_c_analyzer.py | 1 + 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/api/analyzers/c/analyzer.py b/api/analyzers/c/analyzer.py index 3c8d1e3..35746a5 100644 --- a/api/analyzers/c/analyzer.py +++ b/api/analyzers/c/analyzer.py @@ -330,19 +330,30 @@ def process_include_directive(self, parent: File, node: Node, path: Path, graph: """ assert(node.type == 'system_lib_string' or node.type == 'string_literal') - - included_file_path = node.text.decode('utf-8').strip('"<>') - if not included_file_path: - logger.warning("Empty include path found in %s", path) + + + try: + included_file_path = node.text.decode('utf-8').strip('"<>') + if not included_file_path: + logger.warning("Empty include path found in %s", path) + return + + # Normalize and validate path + normalized_path = os.path.normpath(included_file_path) + except UnicodeDecodeError as e: + logger.error("Failed to decode include path in %s: %s", path, e) return - splitted = os.path.splitext(included_file_path) + splitted = os.path.splitext(normalized_path) if len(splitted) < 2: logger.warning("Include path has no extension: %s", included_file_path) return # Create file entity for the included file - included_file = File(os.path.dirname(path), included_file_path, splitted[1]) + path = os.path.dirname(normalized_path) + name = os.path.basename(normalized_path) + ext = splitted[1] + included_file = File(path, name, ext) graph.add_file(included_file) # Connect the parent file to the included file diff --git a/tests/test_c_analyzer.py b/tests/test_c_analyzer.py index c7f42bd..fd0c4d0 100644 --- a/tests/test_c_analyzer.py +++ b/tests/test_c_analyzer.py @@ -65,5 +65,6 @@ def test_analyzer(self): self.assertIsNotNone(included_file) includes = g.get_neighbors([f.id], rel='INCLUDES') + self.assertEqual(len(includes), 3) included_files = [node['properties']['name'] for node in includes['nodes']] self.assertIn('myheader.h', included_files)