chore: made some libs optional

ScrapeGraphAI · Jan 6, 2025 · 5cdf055 · 5cdf055
1 parent 54c69a2
commit 5cdf055
Show file tree

Hide file tree

Showing 17 changed files with 65 additions and 1,393 deletions.
diff --git a/README.md b/README.md
@@ -32,36 +32,12 @@ The reference page for Scrapegraph-ai is available on the official page of PyPI:
 ```bash
 pip install scrapegraphai
 
-# IMPORTANT (to fetch webpage content)
+# IMPORTANT (to fetch websites content)
 playwright install
 ```
 
 **Note**: it is recommended to install the library in a virtual environment to avoid conflicts with other libraries 🐱
 
-<details>
-<summary><b>Optional Dependencies</b></summary>
-Additional dependecies can be added while installing the library:
-
-- <b>More Language Models</b>: additional language models are installed, such as Fireworks, Groq, Anthropic, Hugging Face, and Nvidia AI Endpoints.
-
-  This group allows you to use additional language models like Fireworks, Groq, Anthropic, Together AI, Hugging Face, and Nvidia AI Endpoints.
-  ```bash
-  pip install scrapegraphai[other-language-models]
-  ```
-- <b>Semantic Options</b>: this group includes tools for advanced semantic processing, such as Graphviz.
-
-  ```bash
-  pip install scrapegraphai[more-semantic-options]
-  ```
-
-- <b>Browsers Options</b>: this group includes additional browser management tools/services, such as Browserbase.
-
-  ```bash
-  pip install scrapegraphai[more-browser-options]
-  ```
-
-</details>
-
 
 ## 💻 Usage
 There are multiple standard scraping pipelines that can be used to extract information from a website (or local file).

diff --git a/docs/turkish.md b/docs/turkish.md
@@ -31,31 +31,6 @@ playwright install
 
 **Not**: Diğer kütüphanelerle çakışmaları önlemek için kütüphaneyi sanal bir ortamda kurmanız önerilir 🐱
 
-<details>
-<summary><b>Opsiyonel Bağımlılıklar</b></summary>
-Kütüphaneyi kurarken ek bağımlılıklar ekleyebilirsiniz:
-
-- **Daha Fazla Dil Modeli**: Fireworks, Groq, Anthropic, Hugging Face ve Nvidia AI Endpoints gibi ek dil modelleri kurulur.
-
-  Bu grup, Fireworks, Groq, Anthropic, Together AI, Hugging Face ve Nvidia AI Endpoints gibi ek dil modellerini kullanmanızı sağlar.
-
-  ```bash
-  pip install scrapegraphai[other-language-models]
-  ```
-
-- **Semantik Seçenekler**: Graphviz gibi gelişmiş semantik işleme araçlarını içerir.
-
-  ```bash
-  pip install scrapegraphai[more-semantic-options]
-  ```
-
-- **Tarayıcı Seçenekleri**: Browserbase gibi ek tarayıcı yönetim araçları/hizmetlerini içerir.
-
-  ```bash
-  pip install scrapegraphai[more-browser-options]
-  ```
-
-</details>
 
 ## 💻 Kullanım
 

diff --git a/examples/openai/depth_search_graph_openai.py b/examples/openai/depth_search_graph_openai.py
@@ -7,7 +7,7 @@
 
 load_dotenv()
 
-openai_key = os.getenv("OPENAI_APIKEY")
+openai_key = os.getenv("OPENAI_API_KEY")
 
 graph_config = {
     "llm": {

diff --git a/examples/openai/search_graph_openai.py b/examples/openai/search_graph_openai.py
@@ -11,7 +11,7 @@
 # Define the configuration for the graph
 # ************************************************
 
-openai_key = os.getenv("OPENAI_APIKEY")
+openai_key = os.getenv("OPENAI_API_KEY")
 
 graph_config = {
     "llm": {

diff --git a/examples/openai/speech_graph_openai.py b/examples/openai/speech_graph_openai.py
@@ -20,7 +20,7 @@
 # Define the configuration for the graph
 # ************************************************
 
-openai_key = os.getenv("OPENAI_APIKEY")
+openai_key = os.getenv("OPENAI_API_KEY")
 
 graph_config = {
     "llm": {

diff --git a/funding.json b/funding.json
diff --git a/pyproject.toml b/pyproject.toml
@@ -11,12 +11,11 @@ authors = [
 
 dependencies = [
     "langchain>=0.3.0",
-    "langchain-google-genai>=1.0.7",
     "langchain-openai>=0.1.22",
     "langchain-mistralai>=0.1.12",
     "langchain_community>=0.2.9",
     "langchain-aws>=0.1.3",
-    "mistral-common>=1.4.0",
+    "langchain-ollama>=0.1.3",
     "html2text>=2024.2.26",
     "beautifulsoup4>=4.12.3",
     "python-dotenv>=1.0.1",
@@ -26,16 +25,11 @@ dependencies = [
     "free-proxy>=1.1.1",
     "playwright>=1.43.0",
     "undetected-playwright>=0.3.0",
-    "langchain-ollama>=0.1.3",
     "semchunk>=2.2.0",
-    "qdrant-client>=1.11.3",
-    "fastembed>=0.3.6",
-
-    "transformers>=4.44.2",
     "googlesearch-python>=1.2.5",
     "async-timeout>=4.0.3",
     "simpleeval>=1.0.0",
-    "scrapegraph-py>=1.7.0"
+    "jsonschema>=4.23.0",
 ]
 
 readme = "README.md"
@@ -73,30 +67,7 @@ requires-python = ">=3.10,<4.0"
 [project.optional-dependencies]
 burr = ["burr[start]==0.22.1"]
 docs = ["sphinx==6.0", "furo==2024.5.6"]
-
-# Group 1: Other Language Models
-other-language-models = [
-    "langchain-google-vertexai>=1.0.7",
-    "langchain-fireworks>=0.1.3",
-    "langchain-groq>=0.1.3",
-    "langchain-anthropic>=0.1.11",
-    "langchain-huggingface>=0.0.3",
-    "langchain-nvidia-ai-endpoints>=0.1.6",
-    "langchain_together>=0.2.0"
-]
-
-# Group 2: More Semantic Options
-more-semantic-options = [
-    "graphviz>=0.20.3",
-]
-
-# Group 3: More Browser Options
-more-browser-options = [
-    "browserbase>=0.3.0",
-]
-
-# Group 4: Surya Library
-screenshot_scraper = [
+ocr = [
     "surya-ocr>=0.5.0",
     "matplotlib>=3.7.2",
     "ipywidgets>=8.1.0",
@@ -105,21 +76,13 @@ screenshot_scraper = [
 
 [build-system]
 requires = ["hatchling==1.26.3"]
-
 build-backend = "hatchling.build"
 
-[dependency-groups]
-dev = [
-    "burr[start]==0.22.1",
-    "sphinx==6.0",
-    "furo==2024.5.6",
-]
-
 [tool.uv]
 dev-dependencies = [
-    "poethepoet>=0.31.1",
-    "pytest==8.0.0",
-    "pytest-mock==3.14.0",
+    "pytest>=8.0.0",
+    "pytest-mock>=3.14.0",
+    "pytest-asyncio>=0.25.0",
     "pylint>=3.2.5",
 ]
 

diff --git a/scrapegraphai/builders/graph_builder.py b/scrapegraphai/builders/graph_builder.py
@@ -4,7 +4,6 @@
 from langchain_core.prompts import ChatPromptTemplate
 from langchain.chains import create_extraction_chain
 from langchain_community.chat_models import ErnieBotChat
-from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_openai import ChatOpenAI
 from ..helpers import nodes_metadata, graph_schema
 
@@ -70,6 +69,10 @@ def _create_llm(self, llm_config: dict):
         if "gpt-" in llm_params["model"]:
             return ChatOpenAI(llm_params)
         elif "gemini" in llm_params["model"]:
+            try:
+                from langchain_google_genai import ChatGoogleGenerativeAI
+            except ImportError:
+                raise ImportError("langchain_google_genai is not installed. Please install it using 'pip install langchain-google-genai'.")
             return ChatGoogleGenerativeAI(llm_params)
         elif "ernie" in llm_params["model"]:
             return ErnieBotChat(llm_params)

diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py
@@ -234,15 +234,15 @@ def _create_llm(self, llm_config: dict) -> object:
                         from langchain_together import ChatTogether
                     except ImportError:
                         raise ImportError("""The langchain_together module is not installed. 
-                                          Please install it using `pip install scrapegraphai[other-language-models]`.""")
+                                          Please install it using `pip install langchain-together`.""")
                     return ChatTogether(**llm_params)
 
                 elif model_provider == "nvidia":
                     try:
                         from langchain_nvidia_ai_endpoints import ChatNVIDIA
                     except ImportError:
                         raise ImportError("""The langchain_nvidia_ai_endpoints module is not installed. 
-                                          Please install it using `pip install scrapegraphai[other-language-models]`.""")
+                                          Please install it using `pip install langchain-nvidia-ai-endpoints`.""")
                     return ChatNVIDIA(**llm_params)
 
         except Exception as e:

diff --git a/scrapegraphai/graphs/smart_scraper_graph.py b/scrapegraphai/graphs/smart_scraper_graph.py
@@ -3,8 +3,6 @@
 """
 from typing import Optional
 from pydantic import BaseModel
-from scrapegraph_py import Client
-from scrapegraph_py.logger import sgai_logger
 from .base_graph import BaseGraph
 from .abstract_graph import AbstractGraph
 from ..nodes import (
@@ -67,7 +65,12 @@ def _create_graph(self) -> BaseGraph:
             BaseGraph: A graph instance representing the web scraping workflow.
         """
         if self.llm_model == "scrapegraphai/smart-scraper":
-
+            try:
+                from scrapegraph_py import Client
+                from scrapegraph_py.logger import sgai_logger
+            except ImportError:
+                raise ImportError("scrapegraph_py is not installed. Please install it using 'pip install scrapegraph-py'.")
+
             sgai_logger.set_logging(level="INFO")
 
             # Initialize the client with explicit API key

diff --git a/scrapegraphai/nodes/fetch_node.py b/scrapegraphai/nodes/fetch_node.py
@@ -10,7 +10,6 @@
 from ..utils.cleanup_html import cleanup_html
 from ..docloaders import ChromiumLoader
 from ..utils.convert_to_md import convert_to_md
-from ..utils.logging import get_logger
 from .base_node import BaseNode
 
 class FetchNode(BaseNode):
@@ -79,24 +78,6 @@ def __init__(
             None if node_config is None else node_config.get("storage_state", None)
         )
 
-    def is_valid_url(self, source: str) -> bool:
-        """
-        Validates if the source string is a valid URL using regex.
-
-        Parameters:
-        source (str): The URL string to validate
-
-        Raises:
-        ValueError: If the URL is invalid
-        """
-        import re
-
-        url_pattern = r"^https?://[^\s/$.?#].[^\s]*$"
-        if not bool(re.match(url_pattern, source)):
-            raise ValueError(
-                f"Invalid URL format: {source}. URL must start with http(s):// and contain a valid domain."
-            )
-        return True
 
     def execute(self, state):
         """
@@ -129,12 +110,9 @@ def execute(self, state):
         elif self.input == "pdf_dir":
             return state
 
-        # For web sources, validate URL before proceeding
         try:
-            if self.is_valid_url(source):
-                return self.handle_web_source(state, source)
+            return self.handle_web_source(state, source)
         except ValueError as e:
-            # Re-raise the exception from is_valid_url
             raise
 
         return self.handle_local_source(state, source)