feat: ⏰added graph timeout and fixed model_tokens param (#810 #856 #853)

ScrapeGraphAI · Jan 6, 2025 · 01a331a · 01a331a
1 parent 5f2df70
commit 01a331a
Show file tree

Hide file tree

Showing 6 changed files with 16 additions and 8 deletions.
diff --git a/examples/local_models/smart_scraper_ollama.py b/examples/local_models/smart_scraper_ollama.py
@@ -1,21 +1,24 @@
-""" 
+"""
 Basic example of scraping pipeline using SmartScraper
 """
+
 from scrapegraphai.graphs import SmartScraperGraph
 from scrapegraphai.utils import prettify_exec_info
+
 # ************************************************
 # Define the configuration for the graph
 # ************************************************
 
 graph_config = {
     "llm": {
-        "model": "ollama/llama3.1",
+        "model": "ollama/llama3.2:3b",
         "temperature": 0,
         "format": "json",  # Ollama needs the format to be specified explicitly
         # "base_url": "http://localhost:11434", # set ollama URL arbitrarily
+        "model_tokens": 1024,
     },
     "verbose": True,
-    "headless": False
+    "headless": False,
 }
 
 # ************************************************
@@ -24,7 +27,7 @@
 smart_scraper_graph = SmartScraperGraph(
     prompt="Find some information about what does the company do, the name and a contact email.",
     source="https://scrapegraphai.com/",
-    config=graph_config
+    config=graph_config,
 )
 
 result = smart_scraper_graph.run()

diff --git a/examples/openai/smart_scraper_openai.py b/examples/openai/smart_scraper_openai.py
@@ -20,7 +20,7 @@
 graph_config = {
     "llm": {
         "api_key": os.getenv("OPENAI_API_KEY"),
-        "model": "openai/gpt-4o00",
+        "model": "openai/gpt-4o-mini",
     },
     "verbose": True,
     "headless": False,

diff --git a/pyproject.toml b/pyproject.toml
@@ -31,6 +31,7 @@ dependencies = [
     "async-timeout>=4.0.3",
     "simpleeval>=1.0.0",
     "jsonschema>=4.23.0",
+    "transformers>=4.46.3",
 ]
 
 readme = "README.md"

diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py
@@ -68,6 +68,7 @@ def __init__(
         self.browser_base = self.config.get("browser_base")
         self.scrape_do = self.config.get("scrape_do")
         self.storage_state = self.config.get("storage_state")
+        self.timeout = self.config.get("timeout", 480)
 
         self.graph = self._create_graph()
         self.final_state = None
@@ -86,6 +87,7 @@ def __init__(
             "loader_kwargs": self.loader_kwargs,
             "llm_model": self.llm_model,
             "cache_path": self.cache_path,
+            "timeout": self.timeout,
         }
 
         self.set_common_params(common_params, overwrite=True)
@@ -194,7 +196,7 @@ def _create_llm(self, llm_config: dict) -> object:
                              If possible, try to use a model instance instead."""
             )
 
-        if "model_tokens" not in llm_params:
+        if llm_params.get("model_tokens", None) is None:
             try:
                 self.model_token = models_tokens[llm_params["model_provider"]][
                     llm_params["model"]

diff --git a/scrapegraphai/nodes/generate_answer_node.py b/scrapegraphai/nodes/generate_answer_node.py
@@ -66,7 +66,7 @@ def __init__(
         self.script_creator = node_config.get("script_creator", False)
         self.is_md_scraper = node_config.get("is_md_scraper", False)
         self.additional_info = node_config.get("additional_info")
-        self.timeout = node_config.get("timeout", 120)
+        self.timeout = node_config.get("timeout", 480)
 
     def invoke_with_timeout(self, chain, inputs, timeout):
         """Helper method to invoke chain with timeout"""

diff --git a/uv.lock b/uv.lock