diff --git a/CHANGELOG.md b/CHANGELOG.md index 290de619..36aa47d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,10 @@ +## [1.24.1](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.24.0...v1.24.1) (2024-09-26) + + +### Bug Fixes + +* script creator multi ([9905be8](https://github.com/ScrapeGraphAI/Scrapegraph-ai/commit/9905be8a37dc1ff4b90fe9b8be987887253be8bd)) + ## [1.24.0](https://github.com/ScrapeGraphAI/Scrapegraph-ai/compare/v1.23.1...v1.24.0) (2024-09-26) diff --git a/examples/fireworks/script_multi_generator_fireworks.py b/examples/fireworks/script_multi_generator_fireworks.py index 98671768..669f187d 100644 --- a/examples/fireworks/script_multi_generator_fireworks.py +++ b/examples/fireworks/script_multi_generator_fireworks.py @@ -26,10 +26,9 @@ # ************************************************ # Create the ScriptCreatorGraph instance and run it # ************************************************ - urls=[ - "https://perinim.github.io/", - "https://perinim.github.io/cv/" + "https://schultzbergagency.com/emil-raste-karlsen/", + "https://schultzbergagency.com/johanna-hedberg/", ] # ************************************************ @@ -37,7 +36,8 @@ # ************************************************ script_creator_graph = ScriptCreatorMultiGraph( - prompt="Who is Marco Perini?", + prompt="Find information about actors", + # also accepts a string with the already downloaded HTML code source=urls, config=graph_config ) diff --git a/examples/mistral/script_multi_generator_mistral.py b/examples/mistral/script_multi_generator_mistral.py index f4d5d5b5..142b5140 100644 --- a/examples/mistral/script_multi_generator_mistral.py +++ b/examples/mistral/script_multi_generator_mistral.py @@ -29,8 +29,8 @@ # ************************************************ urls=[ - "https://perinim.github.io/", - "https://perinim.github.io/cv/" + "https://schultzbergagency.com/emil-raste-karlsen/", + "https://schultzbergagency.com/johanna-hedberg/", ] # ************************************************ @@ -38,7 +38,8 @@ # ************************************************ script_creator_graph = ScriptCreatorMultiGraph( - prompt="Who is Marco Perini?", + prompt="Find information about actors", + # also accepts a string with the already downloaded HTML code source=urls, config=graph_config ) diff --git a/examples/nemotron/script_multi_generator_nemotron.py b/examples/nemotron/script_multi_generator_nemotron.py index ad0b4b48..c1426e85 100644 --- a/examples/nemotron/script_multi_generator_nemotron.py +++ b/examples/nemotron/script_multi_generator_nemotron.py @@ -29,8 +29,8 @@ # ************************************************ urls=[ - "https://perinim.github.io/", - "https://perinim.github.io/cv/" + "https://schultzbergagency.com/emil-raste-karlsen/", + "https://schultzbergagency.com/johanna-hedberg/", ] # ************************************************ @@ -38,7 +38,8 @@ # ************************************************ script_creator_graph = ScriptCreatorMultiGraph( - prompt="Who is Marco Perini?", + prompt="Find information about actors", + # also accepts a string with the already downloaded HTML code source=urls, config=graph_config ) diff --git a/examples/openai/script_multi_generator_openai.py b/examples/openai/script_multi_generator_openai.py index 3fdd029f..6693ac0f 100644 --- a/examples/openai/script_multi_generator_openai.py +++ b/examples/openai/script_multi_generator_openai.py @@ -29,8 +29,8 @@ # ************************************************ urls=[ - "https://perinim.github.io/", - "https://perinim.github.io/cv/" + "https://schultzbergagency.com/emil-raste-karlsen/", + "https://schultzbergagency.com/johanna-hedberg/", ] # ************************************************ @@ -38,7 +38,8 @@ # ************************************************ script_creator_graph = ScriptCreatorMultiGraph( - prompt="Who is Marco Perini?", + prompt="Find information about actors", + # also accepts a string with the already downloaded HTML code source=urls, config=graph_config ) diff --git a/pyproject.toml b/pyproject.toml index 0dadc5d7..b7a83040 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "scrapegraphai" -version = "1.24.0" +version = "1.24.1" description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines." diff --git a/scrapegraphai/graphs/json_scraper_multi_graph.py b/scrapegraphai/graphs/json_scraper_multi_graph.py index 2a78094f..6e5434f0 100644 --- a/scrapegraphai/graphs/json_scraper_multi_graph.py +++ b/scrapegraphai/graphs/json_scraper_multi_graph.py @@ -45,9 +45,7 @@ def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[BaseModel] = None): self.max_results = config.get("max_results", 3) - self.copy_config = safe_deepcopy(config) - self.copy_schema = deepcopy(schema) super().__init__(prompt, config, source, schema) diff --git a/scrapegraphai/graphs/markdown_scraper_multi_graph.py b/scrapegraphai/graphs/markdown_scraper_multi_graph.py index fb7d6863..1857f872 100644 --- a/scrapegraphai/graphs/markdown_scraper_multi_graph.py +++ b/scrapegraphai/graphs/markdown_scraper_multi_graph.py @@ -1,7 +1,7 @@ """ MDScraperMultiGraph Module """ -from copy import copy, deepcopy +from copy import deepcopy from typing import List, Optional from pydantic import BaseModel from .base_graph import BaseGraph diff --git a/scrapegraphai/graphs/script_creator_multi_graph.py b/scrapegraphai/graphs/script_creator_multi_graph.py index 83ed8592..de1ab6f7 100644 --- a/scrapegraphai/graphs/script_creator_multi_graph.py +++ b/scrapegraphai/graphs/script_creator_multi_graph.py @@ -1,6 +1,7 @@ """ ScriptCreatorMultiGraph Module """ +from copy import deepcopy from typing import List, Optional from pydantic import BaseModel from .base_graph import BaseGraph @@ -45,7 +46,7 @@ def __init__(self, prompt: str, source: List[str], self.max_results = config.get("max_results", 3) self.copy_config = safe_deepcopy(config) - + self.copy_schema = deepcopy(schema) super().__init__(prompt, config, source, schema) def _create_graph(self) -> BaseGraph: @@ -55,19 +56,14 @@ def _create_graph(self) -> BaseGraph: BaseGraph: A graph instance representing the web scraping and searching workflow. """ - script_generator_instance = ScriptCreatorGraph( - prompt="", - source="", - config=self.copy_config, - schema=self.schema - ) - graph_iterator_node = GraphIteratorNode( input="user_prompt & urls", output=["scripts"], node_config={ - "graph_instance": script_generator_instance, - } + "graph_instance": ScriptCreatorGraph, + "scraper_config": self.copy_config, + }, + schema=self.copy_schema ) merge_scripts_node = MergeGeneratedScriptsNode( diff --git a/scrapegraphai/graphs/smart_scraper_multi_graph.py b/scrapegraphai/graphs/smart_scraper_multi_graph.py index 7a86d3cd..183d8144 100644 --- a/scrapegraphai/graphs/smart_scraper_multi_graph.py +++ b/scrapegraphai/graphs/smart_scraper_multi_graph.py @@ -60,13 +60,6 @@ def _create_graph(self) -> BaseGraph: BaseGraph: A graph instance representing the web scraping and searching workflow. """ - # smart_scraper_instance = SmartScraperGraph( - # prompt="", - # source="", - # config=self.copy_config, - # schema=self.copy_schema - # ) - graph_iterator_node = GraphIteratorNode( input="user_prompt & urls", output=["results"], diff --git a/scrapegraphai/graphs/xml_scraper_multi_graph.py b/scrapegraphai/graphs/xml_scraper_multi_graph.py index fd26d279..10887c6b 100644 --- a/scrapegraphai/graphs/xml_scraper_multi_graph.py +++ b/scrapegraphai/graphs/xml_scraper_multi_graph.py @@ -45,9 +45,7 @@ def __init__(self, prompt: str, source: List[str], config: dict, schema: Optional[BaseModel] = None): self.copy_config = safe_deepcopy(config) - self.copy_schema = deepcopy(schema) - super().__init__(prompt, config, source, schema) def _create_graph(self) -> BaseGraph: @@ -57,14 +55,6 @@ def _create_graph(self) -> BaseGraph: Returns: BaseGraph: A graph instance representing the web scraping and searching workflow. """ - - # smart_scraper_instance = XMLScraperGraph( - # prompt="", - # source="", - # config=self.copy_config, - # schema=self.copy_schema - # ) - graph_iterator_node = GraphIteratorNode( input="user_prompt & jsons", output=["results"],