Skip to content

Commit

Permalink
fix unstructured api,remove unused parameters (#3056)
Browse files Browse the repository at this point in the history
  • Loading branch information
vikeychen authored Apr 3, 2024
1 parent d241d66 commit e4f686d
Show file tree
Hide file tree
Showing 7 changed files with 8 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def __init__(

def extract(self) -> list[Document]:
from unstructured.partition.email import partition_email
elements = partition_email(filename=self._file_path, api_url=self._api_url)
elements = partition_email(filename=self._file_path)

# noinspection PyBroadException
try:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def __init__(
def extract(self) -> list[Document]:
from unstructured.partition.md import partition_md

elements = partition_md(filename=self._file_path, api_url=self._api_url)
elements = partition_md(filename=self._file_path)
from unstructured.chunking.title import chunk_by_title
chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=2000)
documents = []
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def __init__(
def extract(self) -> list[Document]:
from unstructured.partition.msg import partition_msg

elements = partition_msg(filename=self._file_path, api_url=self._api_url)
elements = partition_msg(filename=self._file_path)
from unstructured.chunking.title import chunk_by_title
chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=2000)
documents = []
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ def __init__(
self._api_url = api_url

def extract(self) -> list[Document]:
from unstructured.partition.ppt import partition_ppt
from unstructured.partition.api import partition_via_api

elements = partition_ppt(filename=self._file_path, api_url=self._api_url)
elements = partition_via_api(filename=self._file_path, api_url=self._api_url)
text_by_page = {}
for element in elements:
page = element.metadata.page_number
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def __init__(
def extract(self) -> list[Document]:
from unstructured.partition.pptx import partition_pptx

elements = partition_pptx(filename=self._file_path, api_url=self._api_url)
elements = partition_pptx(filename=self._file_path)
text_by_page = {}
for element in elements:
page = element.metadata.page_number
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def __init__(
def extract(self) -> list[Document]:
from unstructured.partition.text import partition_text

elements = partition_text(filename=self._file_path, api_url=self._api_url)
elements = partition_text(filename=self._file_path)
from unstructured.chunking.title import chunk_by_title
chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=2000)
documents = []
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def __init__(
def extract(self) -> list[Document]:
from unstructured.partition.xml import partition_xml

elements = partition_xml(filename=self._file_path, xml_keep_tags=True, api_url=self._api_url)
elements = partition_xml(filename=self._file_path, xml_keep_tags=True)
from unstructured.chunking.title import chunk_by_title
chunks = chunk_by_title(elements, max_characters=2000, combine_text_under_n_chars=2000)
documents = []
Expand Down

0 comments on commit e4f686d

Please sign in to comment.