Skip to content

Commit

Permalink
feat: ⏰added graph timeout and fixed model_tokens param (#810 #856 #853)
Browse files Browse the repository at this point in the history
  • Loading branch information
PeriniM committed Jan 6, 2025
1 parent 5f2df70 commit 01a331a
Show file tree
Hide file tree
Showing 6 changed files with 16 additions and 8 deletions.
11 changes: 7 additions & 4 deletions examples/local_models/smart_scraper_ollama.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,24 @@
"""
"""
Basic example of scraping pipeline using SmartScraper
"""

from scrapegraphai.graphs import SmartScraperGraph
from scrapegraphai.utils import prettify_exec_info

# ************************************************
# Define the configuration for the graph
# ************************************************

graph_config = {
"llm": {
"model": "ollama/llama3.1",
"model": "ollama/llama3.2:3b",
"temperature": 0,
"format": "json", # Ollama needs the format to be specified explicitly
# "base_url": "http://localhost:11434", # set ollama URL arbitrarily
"model_tokens": 1024,
},
"verbose": True,
"headless": False
"headless": False,
}

# ************************************************
Expand All @@ -24,7 +27,7 @@
smart_scraper_graph = SmartScraperGraph(
prompt="Find some information about what does the company do, the name and a contact email.",
source="https://scrapegraphai.com/",
config=graph_config
config=graph_config,
)

result = smart_scraper_graph.run()
Expand Down
2 changes: 1 addition & 1 deletion examples/openai/smart_scraper_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
graph_config = {
"llm": {
"api_key": os.getenv("OPENAI_API_KEY"),
"model": "openai/gpt-4o00",
"model": "openai/gpt-4o-mini",
},
"verbose": True,
"headless": False,
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ dependencies = [
"async-timeout>=4.0.3",
"simpleeval>=1.0.0",
"jsonschema>=4.23.0",
"transformers>=4.46.3",
]

readme = "README.md"
Expand Down
4 changes: 3 additions & 1 deletion scrapegraphai/graphs/abstract_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def __init__(
self.browser_base = self.config.get("browser_base")
self.scrape_do = self.config.get("scrape_do")
self.storage_state = self.config.get("storage_state")
self.timeout = self.config.get("timeout", 480)

self.graph = self._create_graph()
self.final_state = None
Expand All @@ -86,6 +87,7 @@ def __init__(
"loader_kwargs": self.loader_kwargs,
"llm_model": self.llm_model,
"cache_path": self.cache_path,
"timeout": self.timeout,
}

self.set_common_params(common_params, overwrite=True)
Expand Down Expand Up @@ -194,7 +196,7 @@ def _create_llm(self, llm_config: dict) -> object:
If possible, try to use a model instance instead."""
)

if "model_tokens" not in llm_params:
if llm_params.get("model_tokens", None) is None:
try:
self.model_token = models_tokens[llm_params["model_provider"]][
llm_params["model"]
Expand Down
2 changes: 1 addition & 1 deletion scrapegraphai/nodes/generate_answer_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def __init__(
self.script_creator = node_config.get("script_creator", False)
self.is_md_scraper = node_config.get("is_md_scraper", False)
self.additional_info = node_config.get("additional_info")
self.timeout = node_config.get("timeout", 120)
self.timeout = node_config.get("timeout", 480)

def invoke_with_timeout(self, chain, inputs, timeout):
"""Helper method to invoke chain with timeout"""
Expand Down
4 changes: 3 additions & 1 deletion uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 01a331a

Please sign in to comment.