Skip to content

Commit

Permalink
fix: local html handling
Browse files Browse the repository at this point in the history
  • Loading branch information
PeriniM committed Jan 6, 2025
1 parent 013a196 commit 2a15581
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 13 deletions.
7 changes: 2 additions & 5 deletions examples/openai/xml_scraper_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import os
from dotenv import load_dotenv
from scrapegraphai.graphs import XMLScraperGraph
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
from scrapegraphai.utils import prettify_exec_info

load_dotenv()

Expand All @@ -23,7 +23,7 @@
# Define the configuration for the graph
# ************************************************

openai_key = os.getenv("OPENAI_APIKEY")
openai_key = os.getenv("OPENAI_API_KEY")

graph_config = {
"llm": {
Expand Down Expand Up @@ -53,6 +53,3 @@
graph_exec_info = xml_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

# Save to json or csv
convert_to_csv(result, "result")
convert_to_json(result, "result")
2 changes: 1 addition & 1 deletion scrapegraphai/graphs/smart_scraper_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def _create_graph(self) -> BaseGraph:
return response

fetch_node = FetchNode(
input="url| local_dir",
input="url | local_dir",
output=["doc"],
node_config={
"llm_model": self.llm_model,
Expand Down
12 changes: 5 additions & 7 deletions scrapegraphai/nodes/fetch_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,15 +107,13 @@ def execute(self, state):

if input_type in handlers:
return handlers[input_type](state, input_type, source)
elif self.input == "pdf_dir":
return state

try:
elif input_type == "local_dir":
return self.handle_local_source(state, source)
elif input_type == "url":
return self.handle_web_source(state, source)
except ValueError as e:
raise
else:
raise ValueError(f"Invalid input type: {input_type}")

return self.handle_local_source(state, source)

def handle_directory(self, state, input_type, source):
"""
Expand Down

0 comments on commit 2a15581

Please sign in to comment.