From a27745795fb516bceee671c75b2b46b63a937b2a Mon Sep 17 00:00:00 2001 From: Christophe Bornet Date: Fri, 20 Sep 2024 16:19:22 +0200 Subject: [PATCH] Add GLiNERLinkExtractorComponent --- .../link_extractors/GliNERLinkExtractor.py | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 src/backend/base/langflow/components/link_extractors/GliNERLinkExtractor.py diff --git a/src/backend/base/langflow/components/link_extractors/GliNERLinkExtractor.py b/src/backend/base/langflow/components/link_extractors/GliNERLinkExtractor.py new file mode 100644 index 000000000000..be6e0db03cd4 --- /dev/null +++ b/src/backend/base/langflow/components/link_extractors/GliNERLinkExtractor.py @@ -0,0 +1,40 @@ +from typing import Any + +from langchain_community.graph_vectorstores.extractors import LinkExtractorTransformer, GLiNERLinkExtractor +from langchain_core.documents import BaseDocumentTransformer + +from langflow.base.document_transformers.model import LCDocumentTransformerComponent +from langflow.inputs import DataInput, StrInput, DictInput + + +class GLiNERLinkExtractorComponent(LCDocumentTransformerComponent): + display_name = "GliNER Link Extractor" + description = "Extract named entities links from documents using GLiNER" + documentation = "https://python.langchain.com/api_reference/community/graph_vectorstores/langchain_community.graph_vectorstores.extractors.gliner_link_extractor.GLiNERLinkExtractor.html" + name = "GLiNERLinkExtractor" + + inputs = [ + StrInput(name="labels", display_name="List of kinds of entities to extract", required=True, is_list=True), + StrInput(name="kind", display_name="Kind of edge", value="entity"), + StrInput(name="model", display_name="GLiNER model to use", value="urchade/gliner_mediumv2.1"), + DictInput( + name="extract_kwargs", + display_name="Arguments to pass to GLiNER.", + is_list=True, + advanced=True, + ), + DataInput( + name="data_input", + display_name="Input", + info="The texts from which to extract links.", + input_types=["Document", "Data"], + ), + ] + + def get_data_input(self) -> Any: + return self.data_input + + def build_document_transformer(self) -> BaseDocumentTransformer: + return LinkExtractorTransformer( + [GLiNERLinkExtractor(self.labels, kind=self.kind, model=self.model, extract_kwargs=self.extract_kwargs)] + )