Skip to content

Commit

Permalink
docs: improved readme + fix csv scraper imports
Browse files Browse the repository at this point in the history
  • Loading branch information
PeriniM committed Jan 8, 2025
1 parent 0b582be commit 14b4b19
Show file tree
Hide file tree
Showing 19 changed files with 150 additions and 112 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
"""

import os
import pandas as pd

from scrapegraphai.graphs import CSVScraperMultiGraph
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
from scrapegraphai.utils import prettify_exec_info

# ************************************************
# Read the CSV file
Expand All @@ -15,7 +15,8 @@
curr_dir = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(curr_dir, FILE_NAME)

text = pd.read_csv(file_path)
with open(file_path, "r") as file:
text = file.read()

# ************************************************
# Define the configuration for the graph
Expand Down Expand Up @@ -44,7 +45,7 @@
csv_scraper_graph = CSVScraperMultiGraph(
prompt="List me all the last names",
source=[str(text), str(text)],
config=graph_config
config=graph_config,
)

result = csv_scraper_graph.run()
Expand All @@ -56,7 +57,3 @@

graph_exec_info = csv_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

# Save to json or csv
convert_to_csv(result, "result")
convert_to_json(result, "result")
13 changes: 5 additions & 8 deletions examples/csv_scraper_graph/ollama/csv_scraper_ollama.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
"""

import os
import pandas as pd

from scrapegraphai.graphs import CSVScraperGraph
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
from scrapegraphai.utils import prettify_exec_info

# ************************************************
# Read the CSV file
Expand All @@ -15,7 +15,8 @@
curr_dir = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(curr_dir, FILE_NAME)

text = pd.read_csv(file_path)
with open(file_path, "r") as file:
text = file.read()

# ************************************************
# Define the configuration for the graph
Expand Down Expand Up @@ -44,7 +45,7 @@
csv_scraper_graph = CSVScraperGraph(
prompt="List me all the last names",
source=str(text), # Pass the content of the file, not the file object
config=graph_config
config=graph_config,
)

result = csv_scraper_graph.run()
Expand All @@ -56,7 +57,3 @@

graph_exec_info = csv_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

# Save to json or csv
convert_to_csv(result, "result")
convert_to_json(result, "result")
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
"""
Basic example of scraping pipeline using CSVScraperMultiGraph from CSV documents
"""

import os

from dotenv import load_dotenv
import pandas as pd

from scrapegraphai.graphs import CSVScraperMultiGraph
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
from scrapegraphai.utils import prettify_exec_info

load_dotenv()
# ************************************************
Expand All @@ -16,15 +18,16 @@
curr_dir = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(curr_dir, FILE_NAME)

text = pd.read_csv(file_path)
with open(file_path, "r") as file:
text = file.read()

# ************************************************
# Define the configuration for the graph
# ************************************************
openai_key = os.getenv("OPENAI_APIKEY")

graph_config = {
"llm": {
"llm": {
"api_key": openai_key,
"model": "openai/gpt-4o",
},
Expand All @@ -37,7 +40,7 @@
csv_scraper_graph = CSVScraperMultiGraph(
prompt="List me all the last names",
source=[str(text), str(text)],
config=graph_config
config=graph_config,
)

result = csv_scraper_graph.run()
Expand All @@ -49,7 +52,3 @@

graph_exec_info = csv_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

# Save to json or csv
convert_to_csv(result, "result")
convert_to_json(result, "result")
15 changes: 7 additions & 8 deletions examples/csv_scraper_graph/openai/csv_scraper_openai.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
"""
Basic example of scraping pipeline using CSVScraperGraph from CSV documents
"""

import os

from dotenv import load_dotenv
import pandas as pd

from scrapegraphai.graphs import CSVScraperGraph
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
from scrapegraphai.utils import prettify_exec_info

load_dotenv()

Expand All @@ -17,7 +19,8 @@
curr_dir = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(curr_dir, FILE_NAME)

text = pd.read_csv(file_path)
with open(file_path, "r") as file:
text = file.read()

# ************************************************
# Define the configuration for the graph
Expand All @@ -39,7 +42,7 @@
csv_scraper_graph = CSVScraperGraph(
prompt="List me all the last names",
source=str(text), # Pass the content of the file, not the file object
config=graph_config
config=graph_config,
)

result = csv_scraper_graph.run()
Expand All @@ -51,7 +54,3 @@

graph_exec_info = csv_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

# Save to json or csv
convert_to_csv(result, "result")
convert_to_json(result, "result")
68 changes: 49 additions & 19 deletions examples/readme.md
Original file line number Diff line number Diff line change
@@ -1,32 +1,62 @@
# Scrapegraph-ai Examples
# 🕷️ Scrapegraph-ai Examples

This directory contains various example implementations of Scrapegraph-ai for different use cases.
This directory contains various example implementations of Scrapegraph-ai for different use cases. Each example demonstrates how to leverage the power of Scrapegraph-ai for specific scenarios.

If you want more specific examples, visit [this](https://github.com/ScrapeGraphAI/ScrapegraphLib-Examples).
> **Note:** While these examples showcase implementations using OpenAI and Ollama, Scrapegraph-ai supports many other LLM providers! Check out our [documentation](https://docs-oss.scrapegraphai.com/examples) for the full list of supported providers.
## Available Examples
## 📚 Available Examples

- `smart_scraper/` - Advanced web scraping with intelligent content extraction
- `depth_search_graph/` - Deep web crawling and content exploration
- `csv_scraper_graph/` - Scraping and processing data into CSV format
- `xml_scraper_graph/` - XML data extraction and processing
- `speech_graph/` - Speech processing and analysis
- `omni_scraper_graph/` - Universal web scraping for multiple data types
- `omni_search_graph/` - Comprehensive search across multiple sources
- `document_scraper_graph/` - Document parsing and data extraction
- `script_generator_graph/` - Automated script generation
- `custom_graph/` - Custom graph implementation examples
- `code_generator_graph/` - Code generation utilities
- `json_scraper_graph/` - JSON data extraction and processing
- `search_graph/` - Web search and data retrieval
- 🧠 `smart_scraper/` - Advanced web scraping with intelligent content extraction
- 🔎 `search_graph/` - Web search and data retrieval
- ⚙️ `script_generator_graph/` - Automated script generation
- 🌐 `depth_search_graph/` - Deep web crawling and content exploration
- 📊 `csv_scraper_graph/` - Scraping and processing data into CSV format
- 📑 `xml_scraper_graph/` - XML data extraction and processing
- 🎤 `speech_graph/` - Speech processing and analysis
- 🔄 `omni_scraper_graph/` - Universal web scraping for multiple data types
- 🔍 `omni_search_graph/` - Comprehensive search across multiple sources
- 📄 `document_scraper_graph/` - Document parsing and data extraction
- 🛠️ `custom_graph/` - Custom graph implementation examples
- 💻 `code_generator_graph/` - Code generation utilities
- 📋 `json_scraper_graph/` - JSON data extraction and processing

## Getting Started
## 🚀 Getting Started

1. Choose the example that best fits your use case
2. Navigate to the corresponding directory
3. Follow the README instructions in each directory
4. Configure any required environment variables using the provided `.env.example` files

## Requirements
## ⚡ Quick Setup

```bash
pip install scrapegraphai

playwright install

# choose an example
cd examples/smart_scraper_graph/openai

# run the example
python smart_scraper_openai.py
```

## 📋 Requirements

Each example may have its own specific requirements. Please refer to the individual README files in each directory for detailed setup instructions.

## 📚 Additional Resources

- 📖 [Full Documentation](https://docs-oss.scrapegraphai.com/examples)
- 💡 [Examples Repository](https://github.com/ScrapeGraphAI/ScrapegraphLib-Examples)
- 🤝 [Community Support](https://github.com/ScrapeGraphAI/scrapegraph-ai/discussions)

## 🤔 Need Help?

- Check out our [documentation](https://docs-oss.scrapegraphai.com)
- Join our [Discord community](https://discord.gg/scrapegraphai)
- Open an [issue](https://github.com/ScrapeGraphAI/scrapegraph-ai/issues)

---

⭐ Don't forget to star our repository if you find these examples helpful!
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ OPENAI_API_KEY=your-openai-api-key-here
# Optional Configurations
MAX_TOKENS=4000
MODEL_NAME=gpt-4-1106-preview
TEMPERATURE=0.7
TEMPERATURE=0.7
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,4 @@ results = graph.scrape("https://example.com")
## Environment Variables

Required environment variables:
- `OPENAI_API_KEY`: Your OpenAI API key
- `OPENAI_API_KEY`: Your OpenAI API key
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
"""
"""
Basic example of scraping pipeline using SmartScraper
"""

import json

from scrapegraphai.graphs import SmartScraperLiteGraph
from scrapegraphai.utils import prettify_exec_info

Expand All @@ -14,13 +16,13 @@
"base_url": "http://localhost:11434",
},
"verbose": True,
"headless": False
"headless": False,
}

smart_scraper_lite_graph = SmartScraperLiteGraph(
prompt="Who is Marco Perini?",
source="https://perinim.github.io/",
config=graph_config
config=graph_config,
)

result = smart_scraper_lite_graph.run()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
"""
"""
Basic example of scraping pipeline using SmartScraper
"""

import os
import json

from dotenv import load_dotenv

from scrapegraphai.graphs import SmartScraperMultiConcatGraph

load_dotenv()
Expand All @@ -18,10 +19,10 @@
"model": "ollama/llama3.1",
"temperature": 0,
"format": "json", # Ollama needs the format to be specified explicitly
"base_url": "http://localhost:11434", # set ollama URL arbitrarily
"base_url": "http://localhost:11434", # set ollama URL arbitrarily
},
"verbose": True,
"headless": False
"headless": False,
}

# *******************************************************
Expand All @@ -30,12 +31,9 @@

multiple_search_graph = SmartScraperMultiConcatGraph(
prompt="Who is Marco Perini?",
source= [
"https://perinim.github.io/",
"https://perinim.github.io/cv/"
],
source=["https://perinim.github.io/", "https://perinim.github.io/cv/"],
schema=None,
config=graph_config
config=graph_config,
)

result = multiple_search_graph.run()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
"""
"""
Basic example of scraping pipeline using SmartScraper
"""

import json

from scrapegraphai.graphs import SmartScraperMultiLiteGraph
from scrapegraphai.utils import prettify_exec_info

Expand All @@ -17,7 +19,7 @@
"base_url": "http://localhost:11434", # set ollama URL arbitrarily
},
"verbose": True,
"headless": False
"headless": False,
}

# ************************************************
Expand All @@ -26,11 +28,8 @@

smart_scraper_multi_lite_graph = SmartScraperMultiLiteGraph(
prompt="Who is Marco Perini?",
source= [
"https://perinim.github.io/",
"https://perinim.github.io/cv/"
],
config=graph_config
source=["https://perinim.github.io/", "https://perinim.github.io/cv/"],
config=graph_config,
)

result = smart_scraper_multi_lite_graph.run()
Expand All @@ -42,4 +41,3 @@

graph_exec_info = smart_scraper_multi_lite_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

Loading

0 comments on commit 14b4b19

Please sign in to comment.