Skip to content

Commit

Permalink
Merge branch 'feat-jina-rerank' of https://github.com/JoanFM/langchain
Browse files Browse the repository at this point in the history
…into feat-jina-rerank
  • Loading branch information
JoanFM committed Apr 8, 2024
2 parents 41bdb01 + 455d320 commit d50d611
Show file tree
Hide file tree
Showing 28 changed files with 825 additions and 594 deletions.
2 changes: 1 addition & 1 deletion cookbook/rewrite.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@
"\n",
"\n",
"def _parse(text):\n",
" return text.strip(\"**\")"
" return text.strip('\"').strip(\"**\")"
]
},
{
Expand Down
5 changes: 2 additions & 3 deletions docs/docs/integrations/retrievers/jina-reranker.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
"source": [
"# Helper function for printing docs\n",
"\n",
"\n",
"def pretty_print_docs(docs):\n",
" print(\n",
" f\"\\n{'-' * 100}\\n\".join(\n",
Expand Down Expand Up @@ -125,9 +126,7 @@
"text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)\n",
"texts = text_splitter.split_documents(documents)\n",
"\n",
"embedding = JinaEmbeddings(\n",
" model_name=\"jina-embeddings-v2-base-en\"\n",
")\n",
"embedding = JinaEmbeddings(model_name=\"jina-embeddings-v2-base-en\")\n",
"retriever = FAISS.from_documents(texts, embedding).as_retriever(search_kwargs={\"k\": 20})\n",
"\n",
"query = \"What did the president say about Ketanji Brown Jackson\"\n",
Expand Down
12 changes: 7 additions & 5 deletions docs/docs/integrations/toolkits/pandas.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@
"import pandas as pd\n",
"from langchain_openai import OpenAI\n",
"\n",
"df = pd.read_csv(\"titanic.csv\")"
"df = pd.read_csv(\n",
" \"https://raw.githubusercontent.com/pandas-dev/pandas/main/doc/data/titanic.csv\"\n",
")"
]
},
{
Expand Down Expand Up @@ -116,7 +118,7 @@
}
],
"source": [
"agent.run(\"how many rows are there?\")"
"agent.invoke(\"how many rows are there?\")"
]
},
{
Expand Down Expand Up @@ -154,7 +156,7 @@
}
],
"source": [
"agent.run(\"how many people have more than 3 siblings\")"
"agent.invoke(\"how many people have more than 3 siblings\")"
]
},
{
Expand Down Expand Up @@ -204,7 +206,7 @@
}
],
"source": [
"agent.run(\"whats the square root of the average age?\")"
"agent.invoke(\"whats the square root of the average age?\")"
]
},
{
Expand Down Expand Up @@ -264,7 +266,7 @@
],
"source": [
"agent = create_pandas_dataframe_agent(OpenAI(temperature=0), [df, df1], verbose=True)\n",
"agent.run(\"how many rows in the age column are different?\")"
"agent.invoke(\"how many rows in the age column are different?\")"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion libs/cli/langchain_cli/utils/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def _get_repo_path(gitstring: str, ref: Optional[str], repo_dir: Path) -> Path:
removed_protocol = gitstring.split("://")[-1]
removed_basename = re.split(r"[/:]", removed_protocol, 1)[-1]
removed_extras = removed_basename.split("#")[0]
foldername = re.sub(r"[^a-zA-Z0-9_]", "_", removed_extras)
foldername = re.sub(r"\W", "_", removed_extras)

directory_name = f"{foldername}_{hashed}"
return repo_dir / directory_name
Expand Down
2 changes: 1 addition & 1 deletion libs/community/langchain_community/chat_models/cohere.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,4 +244,4 @@ async def _agenerate(

def get_num_tokens(self, text: str) -> int:
"""Calculate number of tokens."""
return len(self.client.tokenize(text).tokens)
return len(self.client.tokenize(text=text).tokens)
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ def __init__(
headers: Optional[dict] = None,
check_response_status: bool = False,
continue_on_failure: bool = True,
*,
base_url: Optional[str] = None,
) -> None:
"""Initialize with URL to crawl and any subdirectories to exclude.
Expand All @@ -120,6 +122,7 @@ def __init__(
URLs with error responses (400-599).
continue_on_failure: If True, continue if getting or parsing a link raises
an exception. Otherwise, raise the exception.
base_url: The base url to check for outside links against.
"""

self.url = url
Expand All @@ -146,6 +149,7 @@ def __init__(
self.headers = headers
self.check_response_status = check_response_status
self.continue_on_failure = continue_on_failure
self.base_url = base_url if base_url is not None else url

def _get_child_links_recursive(
self, url: str, visited: Set[str], *, depth: int = 0
Expand Down Expand Up @@ -187,7 +191,7 @@ def _get_child_links_recursive(
sub_links = extract_sub_links(
response.text,
url,
base_url=self.url,
base_url=self.base_url,
pattern=self.link_regex,
prevent_outside=self.prevent_outside,
exclude_prefixes=self.exclude_dirs,
Expand Down Expand Up @@ -273,7 +277,7 @@ async def _async_get_child_links_recursive(
sub_links = extract_sub_links(
text,
url,
base_url=self.url,
base_url=self.base_url,
pattern=self.link_regex,
prevent_outside=self.prevent_outside,
exclude_prefixes=self.exclude_dirs,
Expand Down
2 changes: 1 addition & 1 deletion libs/community/langchain_community/llms/llamacpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,11 +344,11 @@ def _stream(
text=part["choices"][0]["text"],
generation_info={"logprobs": logprobs},
)
yield chunk
if run_manager:
run_manager.on_llm_new_token(
token=chunk.text, verbose=self.verbose, log_probs=logprobs
)
yield chunk

def get_num_tokens(self, text: str) -> int:
tokenized_text = self.client.tokenize(text.encode("utf-8"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class DocArrayHnswSearch(DocArrayIndex):
"""`HnswLib` storage using `DocArray` package.
To use it, you should have the ``docarray`` package with version >=0.32.0 installed.
You can install it with `pip install "langchain[docarray]"`.
You can install it with `pip install "docarray[hnswlib]"`.
"""

@classmethod
Expand Down
74 changes: 73 additions & 1 deletion libs/core/langchain_core/prompts/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -551,7 +551,10 @@ def pretty_print(self) -> None:

MessageLikeRepresentation = Union[
MessageLike,
Tuple[Union[str, Type], Union[str, List[dict], List[object]]],
Tuple[
Union[str, Type],
Union[str, List[dict], List[object]],
],
str,
]

Expand Down Expand Up @@ -590,6 +593,45 @@ class ChatPromptTemplate(BaseChatPromptTemplate):
# ]
#)
Messages Placeholder:
.. code-block:: python
# In addition to Human/AI/Tool/Function messages,
# you can initialize the template with a MessagesPlaceholder
# either using the class directly or with the shorthand tuple syntax:
template = ChatPromptTemplate.from_messages([
("system", "You are a helpful AI bot."),
# Means the template will receive an optional list of messages under
# the "conversation" key
("placeholder", "{conversation}")
# Equivalently:
# MessagesPlaceholder(variable_name="conversation", optional=True)
])
prompt_value = template.invoke(
{
"conversation": [
("human", "Hi!"),
("ai", "How can I assist you today?"),
("human", "Can you make me an ice cream sundae?"),
("ai", "No.")
]
}
)
# Output:
# ChatPromptValue(
# messages=[
# SystemMessage(content='You are a helpful AI bot.'),
# HumanMessage(content='Hi!'),
# AIMessage(content='How can I assist you today?'),
# HumanMessage(content='Can you make me an ice cream sundae?'),
# AIMessage(content='No.'),
# ]
#)
Single-variable template:
If your prompt has only a single input variable (i.e., 1 instance of "{variable_nams}"),
Expand Down Expand Up @@ -949,6 +991,36 @@ def _create_template_from_message_type(
message = AIMessagePromptTemplate.from_template(cast(str, template))
elif message_type == "system":
message = SystemMessagePromptTemplate.from_template(cast(str, template))
elif message_type == "placeholder":
if isinstance(template, str):
if template[0] != "{" or template[-1] != "}":
raise ValueError(
f"Invalid placeholder template: {template}."
" Expected a variable name surrounded by curly braces."
)
var_name = template[1:-1]
message = MessagesPlaceholder(variable_name=var_name, optional=True)
elif len(template) == 2 and isinstance(template[1], bool):
var_name_wrapped, is_optional = template
if not isinstance(var_name_wrapped, str):
raise ValueError(
"Expected variable name to be a string." f" Got: {var_name_wrapped}"
)
if var_name_wrapped[0] != "{" or var_name_wrapped[-1] != "}":
raise ValueError(
f"Invalid placeholder template: {var_name_wrapped}."
" Expected a variable name surrounded by curly braces."
)
var_name = var_name_wrapped[1:-1]

message = MessagesPlaceholder(variable_name=var_name, optional=is_optional)
else:
raise ValueError(
"Unexpected arguments for placeholder message type."
" Expected either a single string variable name"
" or a list of [variable_name: str, is_optional: bool]."
f" Got: {template}"
)
else:
raise ValueError(
f"Unexpected message type: {message_type}. Use one of 'human',"
Expand Down
100 changes: 100 additions & 0 deletions libs/core/langchain_core/utils/function_calling.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from __future__ import annotations

import inspect
import uuid
from typing import (
TYPE_CHECKING,
Any,
Expand All @@ -20,6 +21,12 @@
from typing_extensions import TypedDict

from langchain_core._api import deprecated
from langchain_core.messages import (
AIMessage,
BaseMessage,
HumanMessage,
ToolMessage,
)
from langchain_core.pydantic_v1 import BaseModel
from langchain_core.utils.json_schema import dereference_refs

Expand Down Expand Up @@ -332,3 +339,96 @@ def convert_to_openai_tool(
return tool
function = convert_to_openai_function(tool)
return {"type": "function", "function": function}


def tool_example_to_messages(
input: str, tool_calls: List[BaseModel], tool_outputs: Optional[List[str]] = None
) -> List[BaseMessage]:
"""Convert an example into a list of messages that can be fed into an LLM.
This code is an adapter that converts a single example to a list of messages
that can be fed into a chat model.
The list of messages per example corresponds to:
1) HumanMessage: contains the content from which content should be extracted.
2) AIMessage: contains the extracted information from the model
3) ToolMessage: contains confirmation to the model that the model requested a tool
correctly.
The ToolMessage is required because some chat models are hyper-optimized for agents
rather than for an extraction use case.
Arguments:
input: string, the user input
tool_calls: List[BaseModel], a list of tool calls represented as Pydantic
BaseModels
tool_outputs: Optional[List[str]], a list of tool call outputs.
Does not need to be provided. If not provided, a placeholder value
will be inserted.
Returns:
A list of messages
Examples:
.. code-block:: python
from typing import List, Optional
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_openai import ChatOpenAI
class Person(BaseModel):
'''Information about a person.'''
name: Optional[str] = Field(..., description="The name of the person")
hair_color: Optional[str] = Field(
..., description="The color of the peron's eyes if known"
)
height_in_meters: Optional[str] = Field(
..., description="Height in METERs"
)
examples = [
(
"The ocean is vast and blue. It's more than 20,000 feet deep.",
Person(name=None, height_in_meters=None, hair_color=None),
),
(
"Fiona traveled far from France to Spain.",
Person(name="Fiona", height_in_meters=None, hair_color=None),
),
]
messages = []
for txt, tool_call in examples:
messages.extend(
tool_example_to_messages(txt, [tool_call])
)
"""
messages: List[BaseMessage] = [HumanMessage(content=input)]
openai_tool_calls = []
for tool_call in tool_calls:
openai_tool_calls.append(
{
"id": str(uuid.uuid4()),
"type": "function",
"function": {
# The name of the function right now corresponds to the name
# of the pydantic model. This is implicit in the API right now,
# and will be improved over time.
"name": tool_call.__class__.__name__,
"arguments": tool_call.json(),
},
}
)
messages.append(
AIMessage(content="", additional_kwargs={"tool_calls": openai_tool_calls})
)
tool_outputs = tool_outputs or ["You have correctly called this tool."] * len(
openai_tool_calls
)
for output, tool_call_dict in zip(tool_outputs, openai_tool_calls):
messages.append(ToolMessage(content=output, tool_call_id=tool_call_dict["id"])) # type: ignore
return messages
19 changes: 19 additions & 0 deletions libs/core/tests/unit_tests/prompts/test_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,25 @@ def test_chat_prompt_message_placeholder_partial() -> None:
assert prompt.format_messages() == [SystemMessage(content="foo")]


def test_chat_prompt_message_placeholder_tuple() -> None:
prompt = ChatPromptTemplate.from_messages([("placeholder", "{convo}")])
assert prompt.format_messages(convo=[("user", "foo")]) == [
HumanMessage(content="foo")
]

assert prompt.format_messages() == []

# Is optional = True
optional_prompt = ChatPromptTemplate.from_messages(
[("placeholder", ["{convo}", False])]
)
assert optional_prompt.format_messages(convo=[("user", "foo")]) == [
HumanMessage(content="foo")
]
with pytest.raises(KeyError):
assert optional_prompt.format_messages() == []


def test_messages_prompt_accepts_list() -> None:
prompt = ChatPromptTemplate.from_messages([MessagesPlaceholder("history")])
value = prompt.invoke([("user", "Hi there")]) # type: ignore
Expand Down
Loading

0 comments on commit d50d611

Please sign in to comment.