diff --git a/examples/extensions/huggingface/huggingface_agent.py b/examples/extensions/huggingface/huggingface_agent.py index f8a1bf06..4259bd89 100644 --- a/examples/extensions/huggingface/huggingface_agent.py +++ b/examples/extensions/huggingface/huggingface_agent.py @@ -49,7 +49,7 @@ def __call__(self): agent = IBMGenAIAgent( client=client, - model="meta-llama/llama-2-70b-chat", + model="meta-llama/llama-3-70b-instruct", parameters=TextGenerationParameters(min_new_tokens=10, max_new_tokens=200, random_seed=777, temperature=0), additional_tools=[BitcoinPriceFetcher()], ) diff --git a/examples/extensions/langchain/langchain_agent.py b/examples/extensions/langchain/langchain_agent.py index 1c21e98c..1bd79e98 100644 --- a/examples/extensions/langchain/langchain_agent.py +++ b/examples/extensions/langchain/langchain_agent.py @@ -78,7 +78,7 @@ def _run(self, word: str, run_manager: Optional[CallbackManagerForToolRun] = Non client = Client(credentials=Credentials.from_env()) llm = LangChainChatInterface( client=client, - model_id="meta-llama/llama-2-70b-chat", + model_id="meta-llama/llama-3-70b-instruct", parameters=TextGenerationParameters( max_new_tokens=250, min_new_tokens=20, temperature=0, stop_sequences=["\nObservation"] ), diff --git a/examples/extensions/langchain/langchain_chat_generate.py b/examples/extensions/langchain/langchain_chat_generate.py index 086172ee..c7d07076 100644 --- a/examples/extensions/langchain/langchain_chat_generate.py +++ b/examples/extensions/langchain/langchain_chat_generate.py @@ -27,7 +27,7 @@ def heading(text: str) -> str: llm = LangChainChatInterface( client=Client(credentials=Credentials.from_env()), - model_id="meta-llama/llama-2-70b-chat", + model_id="meta-llama/llama-3-70b-instruct", parameters=TextGenerationParameters( decoding_method=DecodingMethod.SAMPLE, max_new_tokens=100, diff --git a/examples/extensions/langchain/langchain_chat_stream.py b/examples/extensions/langchain/langchain_chat_stream.py index e9794083..169336cd 100644 --- a/examples/extensions/langchain/langchain_chat_stream.py +++ b/examples/extensions/langchain/langchain_chat_stream.py @@ -21,7 +21,7 @@ def heading(text: str) -> str: print(heading("Stream chat with langchain")) llm = LangChainChatInterface( - model_id="meta-llama/llama-2-70b-chat", + model_id="meta-llama/llama-3-70b-instruct", client=Client(credentials=Credentials.from_env()), parameters=TextGenerationParameters( decoding_method=DecodingMethod.SAMPLE, diff --git a/examples/extensions/langchain/langchain_sql_agent.py b/examples/extensions/langchain/langchain_sql_agent.py index d4601795..4e212372 100644 --- a/examples/extensions/langchain/langchain_sql_agent.py +++ b/examples/extensions/langchain/langchain_sql_agent.py @@ -75,7 +75,7 @@ def create_llm(): client = Client(credentials=Credentials.from_env()) return LangChainChatInterface( client=client, - model_id="meta-llama/llama-2-70b-chat", + model_id="meta-llama/llama-3-70b-instruct", parameters=TextGenerationParameters( max_new_tokens=250, min_new_tokens=20, temperature=0, stop_sequences=["\nObservation"] ), diff --git a/examples/extensions/llama_index/llama_index_llm.py b/examples/extensions/llama_index/llama_index_llm.py index b9e5c552..8703ae7f 100644 --- a/examples/extensions/llama_index/llama_index_llm.py +++ b/examples/extensions/llama_index/llama_index_llm.py @@ -22,7 +22,7 @@ def heading(text: str) -> str: llm = IBMGenAILlamaIndex( client=client, - model_id="meta-llama/llama-2-70b-chat", + model_id="meta-llama/llama-3-70b-instruct", parameters=TextGenerationParameters( decoding_method=DecodingMethod.SAMPLE, max_new_tokens=100, diff --git a/examples/text/chat.py b/examples/text/chat.py index 16633c59..d14de0d9 100644 --- a/examples/text/chat.py +++ b/examples/text/chat.py @@ -31,7 +31,7 @@ def heading(text: str) -> str: ) client = Client(credentials=Credentials.from_env()) -model_id = "meta-llama/llama-2-70b-chat" +model_id = "meta-llama/llama-3-70b-instruct" prompt = "What is NLP and how it has evolved over the years?" print(heading("Generating a chat response")) diff --git a/src/genai/extensions/langchain/chat_llm.py b/src/genai/extensions/langchain/chat_llm.py index f3606567..c6242818 100644 --- a/src/genai/extensions/langchain/chat_llm.py +++ b/src/genai/extensions/langchain/chat_llm.py @@ -93,7 +93,7 @@ class LangChainChatInterface(BaseChatModel): client = Client(credentials=Credentials.from_env()) llm = LangChainChatInterface( client=client, - model_id="meta-llama/llama-2-70b-chat", + model_id="meta-llama/llama-3-70b-instruct", parameters=TextGenerationParameters( max_new_tokens=250, ) diff --git a/src/genai/extensions/langchain/llm.py b/src/genai/extensions/langchain/llm.py index 866ab2f8..1619f53e 100644 --- a/src/genai/extensions/langchain/llm.py +++ b/src/genai/extensions/langchain/llm.py @@ -62,7 +62,7 @@ class LangChainInterface(LLM): client = Client(credentials=Credentials.from_env()) llm = LangChainInterface( client=client, - model_id="meta-llama/llama-2-70b-chat", + model_id="meta-llama/llama-3-70b-instruct", parameters=TextGenerationParameters(max_new_tokens=50) ) diff --git a/src/genai/text/chat/chat_generation_service.py b/src/genai/text/chat/chat_generation_service.py index 2e824471..56a35a28 100644 --- a/src/genai/text/chat/chat_generation_service.py +++ b/src/genai/text/chat/chat_generation_service.py @@ -79,7 +79,7 @@ def create( # Create a new conversation response = client.text.chat.create( - model_id="meta-llama/llama-2-70b-chat", + model_id="meta-llama/llama-3-70b-instruct", messages=[HumanMessage(content="Describe the game Chess?")], parameters=TextGenerationParameters(max_token_limit=100) ) @@ -152,7 +152,7 @@ def create_stream( # Create a new conversation for response in client.text.chat.create_stream( - model_id="meta-llama/llama-2-70b-chat", + model_id="meta-llama/llama-3-70b-instruct", messages=[HumanMessage(content="Describe the game Chess?")], parameters=TextGenerationParameters(max_token_limit=100) ): diff --git a/tests/integration/extensions/cassettes/test_huggingface_agent/TestHuggingfaceAgent.test_agent.yaml b/tests/integration/extensions/cassettes/test_huggingface_agent/TestHuggingfaceAgent.test_agent.yaml index c75e2a87..5484eaf3 100644 --- a/tests/integration/extensions/cassettes/test_huggingface_agent/TestHuggingfaceAgent.test_agent.yaml +++ b/tests/integration/extensions/cassettes/test_huggingface_agent/TestHuggingfaceAgent.test_agent.yaml @@ -12,22 +12,22 @@ interactions: uri: https://api.com/v2/text/generation/limits?version=2023-11-22 response: body: - string: '{"result":{"concurrency":{"limit":200,"remaining":200}}}' + string: '{"result":{"concurrency":{"limit":10,"remaining":10}}}' headers: cache-control: - private content-length: - - '56' + - '54' content-type: - application/json; charset=utf-8 content-version: - '2023-11-22' date: - - Wed, 20 Mar 2024 08:27:00 GMT + - Mon, 20 May 2024 14:12:17 GMT keep-alive: - timeout=72 set-cookie: - - 2eef5f4c257f6bca76e8da5586743beb=1e3545705d3737525c7629e9f28dc93d; path=/; + - 2eef5f4c257f6bca76e8da5586743beb=de04e502e3969a930842cae695f31f86; path=/; HttpOnly; Secure; SameSite=None vary: - accept-encoding @@ -75,47 +75,41 @@ interactions: `src_lang`, which should be the language of the text to translate and `tgt_lang`, which should be the language for the desired ouput language. Both `src_lang` and `tgt_lang` are written in plain English, such as ''Romanian'', or ''Albanian''. - It returns the text translated in `tgt_lang`.\n- image_transformer: This is - a tool that transforms an image according to a prompt. It takes two inputs: - `image`, which should be the image to transform, and `prompt`, which should - be the prompt to use to change it. The prompt should only contain descriptive - adjectives, as if completing the prompt of the original image. It returns the - modified image.\n- text_downloader: This is a tool that downloads a file from - a `url`. It takes the `url` as input, and returns the text contained in the - file.\n- image_generator: This is a tool that creates an image according to - a prompt, which is a text description. It takes an input named `prompt` which - contains the image description and outputs an image.\n- video_generator: This - is a tool that creates a video according to a text description. It takes an - input named `prompt` which contains the image description, as well as an optional - input `seconds` which will be the duration of the video. The default is of two - seconds. The tool outputs a video object.\n\n\nTask: \"Answer the question in - the variable `question` about the image stored in the variable `image`. The - question is in French.\"\n\nI will use the following tools: `translator` to - translate the question into English and then `image_qa` to answer the question - on the input image.\n\nAnswer:\n```py\ntranslated_question = translator(question=question, - src_lang=\"French\", tgt_lang=\"English\")\nprint(f\"The translated question - is {translated_question}.\")\nanswer = image_qa(image=image, question=translated_question)\nprint(f\"The - answer is {answer}\")\n```\n\nTask: \"Identify the oldest person in the `document` - and create an image showcasing the result.\"\n\nI will use the following tools: - `document_qa` to find the oldest person in the document, then `image_generator` - to generate an image according to the answer.\n\nAnswer:\n```py\nanswer = document_qa(document, - question=\"What is the oldest person?\")\nprint(f\"The answer is {answer}.\")\nimage - = image_generator(answer)\n```\n\nTask: \"Generate an image using the text given - in the variable `caption`.\"\n\nI will use the following tool: `image_generator` - to generate an image.\n\nAnswer:\n```py\nimage = image_generator(prompt=caption)\n```\n\nTask: - \"Summarize the text given in the variable `text` and read it out loud.\"\n\nI - will use the following tools: `summarizer` to create a summary of the input - text, then `text_reader` to read it out loud.\n\nAnswer:\n```py\nsummarized_text - = summarizer(text)\nprint(f\"Summary: {summarized_text}\")\naudio_summary = - text_reader(summarized_text)\n```\n\nTask: \"Answer the question in the variable - `question` about the text in the variable `text`. Use the answer to generate - an image.\"\n\nI will use the following tools: `text_qa` to create the answer, - then `image_generator` to generate an image according to the answer.\n\nAnswer:\n```py\nanswer - = text_qa(text=text, question=question)\nprint(f\"The answer is {answer}.\")\nimage - = image_generator(answer)\n```\n\nTask: \"Caption the following `image`.\"\n\nI - will use the following tool: `image_captioner` to generate a caption for the - image.\n\nAnswer:\n```py\ncaption = image_captioner(image)\n```\n\nTask: \"Summarize - the chat\"\n\nI will use the following", "model_id": "meta-llama/llama-2-70b", + It returns the text translated in `tgt_lang`.\n- image_transformation: This + is a tool that transforms an image according to a prompt and returns the modified + image.\n- text_downloader: This is a tool that downloads a file from a `url`. + It takes the `url` as input, and returns the text contained in the file.\n- + image_generator: This is a tool that creates an image according to a prompt, + which is a text description.\n- video_generator: This is a tool that creates + a video according to a text description. It takes an optional input `seconds` + which will be the duration of the video. The default is of two seconds. The + tool outputs a video object.\n\n\nTask: \"Answer the question in the variable + `question` about the image stored in the variable `image`. The question is in + French.\"\n\nI will use the following tools: `translator` to translate the question + into English and then `image_qa` to answer the question on the input image.\n\nAnswer:\n```py\ntranslated_question + = translator(question=question, src_lang=\"French\", tgt_lang=\"English\")\nprint(f\"The + translated question is {translated_question}.\")\nanswer = image_qa(image=image, + question=translated_question)\nprint(f\"The answer is {answer}\")\n```\n\nTask: + \"Identify the oldest person in the `document` and create an image showcasing + the result.\"\n\nI will use the following tools: `document_qa` to find the oldest + person in the document, then `image_generator` to generate an image according + to the answer.\n\nAnswer:\n```py\nanswer = document_qa(document, question=\"What + is the oldest person?\")\nprint(f\"The answer is {answer}.\")\nimage = image_generator(answer)\n```\n\nTask: + \"Generate an image using the text given in the variable `caption`.\"\n\nI will + use the following tool: `image_generator` to generate an image.\n\nAnswer:\n```py\nimage + = image_generator(prompt=caption)\n```\n\nTask: \"Summarize the text given in + the variable `text` and read it out loud.\"\n\nI will use the following tools: + `summarizer` to create a summary of the input text, then `text_reader` to read + it out loud.\n\nAnswer:\n```py\nsummarized_text = summarizer(text)\nprint(f\"Summary: + {summarized_text}\")\naudio_summary = text_reader(summarized_text)\n```\n\nTask: + \"Answer the question in the variable `question` about the text in the variable + `text`. Use the answer to generate an image.\"\n\nI will use the following tools: + `text_qa` to create the answer, then `image_generator` to generate an image + according to the answer.\n\nAnswer:\n```py\nanswer = text_qa(text=text, question=question)\nprint(f\"The + answer is {answer}.\")\nimage = image_generator(answer)\n```\n\nTask: \"Caption + the following `image`.\"\n\nI will use the following tool: `image_captioner` + to generate a caption for the image.\n\nAnswer:\n```py\ncaption = image_captioner(image)\n```\n\nTask: + \"Summarize the chat\"\n\nI will use the following", "model_id": "meta-llama/llama-2-70b", "parameters": {"max_new_tokens": 500, "stop_sequences": ["Task:"]}}' headers: accept: @@ -125,30 +119,24 @@ interactions: connection: - keep-alive content-length: - - '6535' + - '6135' content-type: - application/json method: POST uri: https://api.com/v2/text/generation?version=2024-03-19 response: body: - string: '{"id":"230e435f-6c47-46bd-b0f2-ddc56c834dee","model_id":"meta-llama/llama-2-70b","created_at":"2024-03-20T08:27:06.828Z","results":[{"generated_text":" - tools: `document_qa` to identify the oldest person, then `image_captioner` - to generate a caption about that person, I will then generate an image from - that caption using `image_generator`, and finally `doc_to_text` and `text_reader` - to read the generated text. The list of tools will be executed one after the - other.\n\nAnswer:\n```py\nanswer = document_qa(document, question=\"What is - the oldest person?\")\ncaption = image_captions(answer)\nimage = image_generator(prompt=caption)\noutput - = summary_of_document(document)\n```\n\nTask:","generated_token_count":142,"input_token_count":1580,"stop_reason":"stop_sequence","stop_sequence":"Task:","seed":3264333442}]}' + string: '{"id":"22234f91-ef73-4721-9c85-04a22aad9296","model_id":"meta-llama/llama-2-70b","created_at":"2024-05-20T14:12:25.703Z","results":[{"generated_text":" + tool: `summarizer` to generate a summary of the chat.\n\nAnswer:\n```py\n))","generated_token_count":25,"input_token_count":1490,"stop_reason":"eos_token","seed":3150004800}]}' headers: content-length: - - '817' + - '327' content-type: - application/json; charset=utf-8 content-version: - '2024-03-19' date: - - Wed, 20 Mar 2024 08:27:06 GMT + - Mon, 20 May 2024 14:12:25 GMT keep-alive: - timeout=72 vary: diff --git a/tests/integration/extensions/cassettes/test_langchain_chat/TestLangChainChat.test_async_generate.yaml b/tests/integration/extensions/cassettes/test_langchain_chat/TestLangChainChat.test_async_generate.yaml index 71796a97..12d88294 100644 --- a/tests/integration/extensions/cassettes/test_langchain_chat/TestLangChainChat.test_async_generate.yaml +++ b/tests/integration/extensions/cassettes/test_langchain_chat/TestLangChainChat.test_async_generate.yaml @@ -8,7 +8,8 @@ interactions: explain why instead of answering something incorrectly.\n If you don''t know the answer to a question, please don''t share false information.\n ", "role": "system"}, {"content": "What is NLP and how it has evolved over the years?", - "role": "user"}], "model_id": "meta-llama/llama-2-70b-chat", "parameters": {}}' + "role": "user"}], "model_id": "meta-llama/llama-3-70b-instruct", "parameters": + {}}' headers: accept: - '*/*' @@ -17,28 +18,29 @@ interactions: connection: - keep-alive content-length: - - '730' + - '734' content-type: - application/json method: POST uri: https://api.com/v2/text/chat?version=2024-03-19 response: body: - string: '{"id":"1b962f25-87c0-4b96-9c77-739bd9f5b2f6","model_id":"meta-llama/llama-2-70b-chat","created_at":"2024-03-20T08:27:23.202Z","results":[{"generated_text":" NLP - (Natural Language Processing) refers to the branch of Artificial Intelligence","generated_token_count":20,"input_token_count":160,"stop_reason":"max_tokens","seed":683510637}],"conversation_id":"adc37c54-87cc-43ad-bf50-16013eec263f"}' + string: '{"id":"01f28b95-5ecd-4a01-9a2b-7803c6db824f","model_id":"meta-llama/llama-3-70b-instruct","created_at":"2024-05-20T14:12:28.750Z","results":[{"generated_text":"Natural + Language Processing (NLP) is a subfield of artificial intelligence (AI) that + deals with","generated_token_count":20,"input_token_count":134,"stop_reason":"max_tokens","seed":1025128500}],"conversation_id":"90edb70b-c4e7-45f5-81c3-fab231227b7a"}' headers: content-length: - - '395' + - '412' content-type: - application/json; charset=utf-8 content-version: - '2024-03-19' date: - - Wed, 20 Mar 2024 08:27:23 GMT + - Mon, 20 May 2024 14:12:28 GMT keep-alive: - timeout=72 set-cookie: - - 2eef5f4c257f6bca76e8da5586743beb=1e3545705d3737525c7629e9f28dc93d; path=/; + - 2eef5f4c257f6bca76e8da5586743beb=fad6ffcbebbca45726eaa14ee11d2c44; path=/; HttpOnly; Secure; SameSite=None vary: - accept-encoding diff --git a/tests/integration/extensions/cassettes/test_langchain_chat/TestLangChainChat.test_generate.yaml b/tests/integration/extensions/cassettes/test_langchain_chat/TestLangChainChat.test_generate.yaml index 74d1c395..cd1c860f 100644 --- a/tests/integration/extensions/cassettes/test_langchain_chat/TestLangChainChat.test_generate.yaml +++ b/tests/integration/extensions/cassettes/test_langchain_chat/TestLangChainChat.test_generate.yaml @@ -8,7 +8,8 @@ interactions: explain why instead of answering something incorrectly.\n If you don''t know the answer to a question, please don''t share false information.\n ", "role": "system"}, {"content": "What is NLP and how it has evolved over the years?", - "role": "user"}], "model_id": "meta-llama/llama-2-70b-chat", "parameters": {}}' + "role": "user"}], "model_id": "meta-llama/llama-3-70b-instruct", "parameters": + {}}' headers: accept: - '*/*' @@ -17,29 +18,29 @@ interactions: connection: - keep-alive content-length: - - '730' + - '734' content-type: - application/json method: POST uri: https://api.com/v2/text/chat?version=2024-03-19 response: body: - string: '{"id":"fb9c6250-62c1-412e-beb3-e8db76569817","model_id":"meta-llama/llama-2-70b-chat","created_at":"2024-03-20T08:27:17.855Z","results":[{"generated_text":" NLP, - or Natural Language Processing, is a subfield of artificial intelligence that - deals","generated_token_count":20,"input_token_count":160,"stop_reason":"max_tokens","seed":3913660195}],"conversation_id":"a8512cf7-652f-42cc-9471-ff4996aecef9"}' + string: '{"id":"d8160c0f-36a3-49a0-b001-a3f9cbdb9643","model_id":"meta-llama/llama-3-70b-instruct","created_at":"2024-05-20T14:12:27.354Z","results":[{"generated_text":"NLP, + or Natural Language Processing, is a subfield of artificial intelligence (AI) + that deals","generated_token_count":20,"input_token_count":134,"stop_reason":"max_tokens","seed":2032949123}],"conversation_id":"bcc5d917-5e64-48e0-9047-ac5e032e8f21"}' headers: content-length: - - '403' + - '410' content-type: - application/json; charset=utf-8 content-version: - '2024-03-19' date: - - Wed, 20 Mar 2024 08:27:17 GMT + - Mon, 20 May 2024 14:12:27 GMT keep-alive: - timeout=72 set-cookie: - - 2eef5f4c257f6bca76e8da5586743beb=c7a0964ef13502a09f12e4a9f37d8d7f; path=/; + - 2eef5f4c257f6bca76e8da5586743beb=fad6ffcbebbca45726eaa14ee11d2c44; path=/; HttpOnly; Secure; SameSite=None vary: - accept-encoding diff --git a/tests/integration/extensions/test_langchain_chat.py b/tests/integration/extensions/test_langchain_chat.py index 37418d64..038f7e00 100644 --- a/tests/integration/extensions/test_langchain_chat.py +++ b/tests/integration/extensions/test_langchain_chat.py @@ -18,7 +18,7 @@ @pytest.mark.integration class TestLangChainChat: def setup_method(self): - self.model_id = "meta-llama/llama-2-70b-chat" + self.model_id = "meta-llama/llama-3-70b-instruct" @pytest.fixture def parameters(self): diff --git a/tests/integration/text/cassettes/test_chat_service/TestChatService.test_create_history.yaml b/tests/integration/text/cassettes/test_chat_service/TestChatService.test_create_history.yaml index 8b85128a..92abb615 100644 --- a/tests/integration/text/cassettes/test_chat_service/TestChatService.test_create_history.yaml +++ b/tests/integration/text/cassettes/test_chat_service/TestChatService.test_create_history.yaml @@ -1,7 +1,7 @@ interactions: - request: body: '{"messages": [{"content": "Do you want to destroy the world?", "role": - "user"}], "model_id": "meta-llama/llama-2-70b-chat"}' + "user"}], "model_id": "meta-llama/llama-3-70b-instruct"}' headers: accept: - '*/*' @@ -10,28 +10,30 @@ interactions: connection: - keep-alive content-length: - - '123' + - '127' content-type: - application/json method: POST uri: https://api.com/v2/text/chat?version=2024-03-19 response: body: - string: '{"id":"e6f57241-93e6-4eb2-8c1f-2641b5cdfc73","model_id":"meta-llama/llama-2-70b-chat","created_at":"2024-03-21T12:35:13.145Z","results":[{"generated_text":" No, - I do not want to destroy the world. As a responsible AI language model,","generated_token_count":20,"input_token_count":18,"stop_reason":"max_tokens","seed":3100294952}],"conversation_id":"0e0776ed-5bdb-4c1a-8b21-c23586c5f05e"}' + string: '{"id":"191d90ab-e7e0-4821-a66a-64dfdc0415b0","model_id":"meta-llama/llama-3-70b-instruct","created_at":"2024-05-20T14:12:31.312Z","results":[{"generated_text":"No, + I do not want to destroy the world. I am designed to assist and provide helpful + information","generated_token_count":20,"input_token_count":18,"stop_reason":"max_tokens","seed":967002347}],"conversation_id":"02593997-bb70-47dd-97d4-209aa820aa98"}' headers: - Connection: - - keep-alive - Date: - - Thu, 21 Mar 2024 12:35:13 GMT - Keep-Alive: - - timeout=72 content-length: - - '389' + - '410' content-type: - application/json; charset=utf-8 content-version: - '2024-03-19' + date: + - Mon, 20 May 2024 14:12:31 GMT + keep-alive: + - timeout=72 + set-cookie: + - 2eef5f4c257f6bca76e8da5586743beb=fad6ffcbebbca45726eaa14ee11d2c44; path=/; + HttpOnly; Secure; SameSite=None vary: - accept-encoding status: @@ -47,25 +49,29 @@ interactions: connection: - keep-alive method: GET - uri: https://api.com/v2/requests/chat/0e0776ed-5bdb-4c1a-8b21-c23586c5f05e?version=2024-03-19 + uri: https://api.com/v2/requests/chat/02593997-bb70-47dd-97d4-209aa820aa98?version=2024-03-19 response: body: - string: '{"results":[{"id":"e6f57241-93e6-4eb2-8c1f-2641b5cdfc73","duration":1303,"request":{"messages":[{"role":"user","content":"Do - you want to destroy the world?"}],"model_id":"meta-llama/llama-2-70b-chat","conversation_id":"0e0776ed-5bdb-4c1a-8b21-c23586c5f05e"},"status":"success","created_at":"2024-03-21T12:35:13.000Z","response":{"id":"e6f57241-93e6-4eb2-8c1f-2641b5cdfc73","results":[{"seed":3100294952,"stop_reason":"max_tokens","generated_text":" No, - I do not want to destroy the world. As a responsible AI language model,","input_token_count":18,"generated_token_count":20}],"model_id":"meta-llama/llama-2-70b-chat","created_at":"2024-03-21T12:35:13.145Z","conversation_id":"0e0776ed-5bdb-4c1a-8b21-c23586c5f05e"},"version":{"api":"v2","date":"2024-03-19"}}]}' + string: '{"results":[{"id":"191d90ab-e7e0-4821-a66a-64dfdc0415b0","duration":759,"request":{"messages":[{"role":"user","content":"Do + you want to destroy the world?"}],"model_id":"meta-llama/llama-3-70b-instruct","conversation_id":"02593997-bb70-47dd-97d4-209aa820aa98"},"status":"success","created_at":"2024-05-20T14:12:31.000Z","response":{"id":"191d90ab-e7e0-4821-a66a-64dfdc0415b0","results":[{"seed":967002347,"stop_reason":"max_tokens","generated_text":"No, + I do not want to destroy the world. I am designed to assist and provide helpful + information","input_token_count":18,"generated_token_count":20}],"model_id":"meta-llama/llama-3-70b-instruct","created_at":"2024-05-20T14:12:31.312Z","conversation_id":"02593997-bb70-47dd-97d4-209aa820aa98"},"version":{"api":"v2","date":"2024-03-19"}}]}' headers: - Connection: - - keep-alive - Date: - - Thu, 21 Mar 2024 12:35:13 GMT - Keep-Alive: - - timeout=72 + cache-control: + - private content-length: - - '763' + - '787' content-type: - application/json; charset=utf-8 content-version: - '2024-03-19' + date: + - Mon, 20 May 2024 14:12:31 GMT + keep-alive: + - timeout=72 + set-cookie: + - 2eef5f4c257f6bca76e8da5586743beb=de04e502e3969a930842cae695f31f86; path=/; + HttpOnly; Secure; SameSite=None vary: - accept-encoding x-ratelimit-limit: @@ -78,9 +84,9 @@ interactions: code: 200 message: OK - request: - body: '{"conversation_id": "0e0776ed-5bdb-4c1a-8b21-c23586c5f05e", "messages": + body: '{"conversation_id": "02593997-bb70-47dd-97d4-209aa820aa98", "messages": [{"content": "What was my previous question?", "role": "user"}], "model_id": - "meta-llama/llama-2-70b-chat"}' + "meta-llama/llama-3-70b-instruct"}' headers: accept: - '*/*' @@ -89,28 +95,29 @@ interactions: connection: - keep-alive content-length: - - '179' + - '183' content-type: - application/json method: POST uri: https://api.com/v2/text/chat?version=2024-03-19 response: body: - string: '{"id":"e5747964-decd-46d4-85b0-0e5c0c4013a1","model_id":"meta-llama/llama-2-70b-chat","created_at":"2024-03-21T12:35:15.717Z","results":[{"generated_text":" Sure! - Your previous question was: \"Do you want to destroy the world?\"","generated_token_count":18,"input_token_count":52,"stop_reason":"eos_token","seed":1803219522}],"conversation_id":"0e0776ed-5bdb-4c1a-8b21-c23586c5f05e"}' + string: '{"id":"098d2940-3e40-4b67-8239-b63e16ae53f2","model_id":"meta-llama/llama-3-70b-instruct","created_at":"2024-05-20T14:12:32.733Z","results":[{"generated_text":"Your + previous question was \"Do you want to destroy the world?\"","generated_token_count":14,"input_token_count":54,"stop_reason":"eos_token","seed":934152098}],"conversation_id":"02593997-bb70-47dd-97d4-209aa820aa98"}' headers: - Connection: - - keep-alive - Date: - - Thu, 21 Mar 2024 12:35:15 GMT - Keep-Alive: - - timeout=72 content-length: - - '384' + - '378' content-type: - application/json; charset=utf-8 content-version: - '2024-03-19' + date: + - Mon, 20 May 2024 14:12:32 GMT + keep-alive: + - timeout=72 + set-cookie: + - 2eef5f4c257f6bca76e8da5586743beb=8b84dad6e66ad6c44a10cddf9bcb53d9; path=/; + HttpOnly; Secure; SameSite=None vary: - accept-encoding status: @@ -126,29 +133,33 @@ interactions: connection: - keep-alive method: GET - uri: https://api.com/v2/requests/chat/0e0776ed-5bdb-4c1a-8b21-c23586c5f05e?version=2024-03-19 + uri: https://api.com/v2/requests/chat/02593997-bb70-47dd-97d4-209aa820aa98?version=2024-03-19 response: body: - string: '{"results":[{"id":"e6f57241-93e6-4eb2-8c1f-2641b5cdfc73","duration":1303,"request":{"messages":[{"role":"user","content":"Do - you want to destroy the world?"}],"model_id":"meta-llama/llama-2-70b-chat","conversation_id":"0e0776ed-5bdb-4c1a-8b21-c23586c5f05e"},"status":"success","created_at":"2024-03-21T12:35:13.000Z","response":{"id":"e6f57241-93e6-4eb2-8c1f-2641b5cdfc73","results":[{"seed":3100294952,"stop_reason":"max_tokens","generated_text":" No, - I do not want to destroy the world. As a responsible AI language model,","input_token_count":18,"generated_token_count":20}],"model_id":"meta-llama/llama-2-70b-chat","created_at":"2024-03-21T12:35:13.145Z","conversation_id":"0e0776ed-5bdb-4c1a-8b21-c23586c5f05e"},"version":{"api":"v2","date":"2024-03-19"}},{"id":"e5747964-decd-46d4-85b0-0e5c0c4013a1","duration":1047,"request":{"messages":[{"role":"user","content":"What - was my previous question?"}],"model_id":"meta-llama/llama-2-70b-chat","parent_id":"e6f57241-93e6-4eb2-8c1f-2641b5cdfc73","conversation_id":"0e0776ed-5bdb-4c1a-8b21-c23586c5f05e"},"status":"success","created_at":"2024-03-21T12:35:16.000Z","response":{"id":"e5747964-decd-46d4-85b0-0e5c0c4013a1","results":[{"seed":1803219522,"stop_reason":"eos_token","generated_text":" Sure! - Your previous question was: \"Do you want to destroy the world?\"","input_token_count":52,"generated_token_count":18}],"model_id":"meta-llama/llama-2-70b-chat","created_at":"2024-03-21T12:35:15.717Z","conversation_id":"0e0776ed-5bdb-4c1a-8b21-c23586c5f05e"},"parent_id":"e6f57241-93e6-4eb2-8c1f-2641b5cdfc73","version":{"api":"v2","date":"2024-03-19"}}]}' + string: '{"results":[{"id":"191d90ab-e7e0-4821-a66a-64dfdc0415b0","duration":759,"request":{"messages":[{"role":"user","content":"Do + you want to destroy the world?"}],"model_id":"meta-llama/llama-3-70b-instruct","conversation_id":"02593997-bb70-47dd-97d4-209aa820aa98"},"status":"success","created_at":"2024-05-20T14:12:31.000Z","response":{"id":"191d90ab-e7e0-4821-a66a-64dfdc0415b0","results":[{"seed":967002347,"stop_reason":"max_tokens","generated_text":"No, + I do not want to destroy the world. I am designed to assist and provide helpful + information","input_token_count":18,"generated_token_count":20}],"model_id":"meta-llama/llama-3-70b-instruct","created_at":"2024-05-20T14:12:31.312Z","conversation_id":"02593997-bb70-47dd-97d4-209aa820aa98"},"version":{"api":"v2","date":"2024-03-19"}},{"id":"098d2940-3e40-4b67-8239-b63e16ae53f2","duration":582,"request":{"messages":[{"role":"user","content":"What + was my previous question?"}],"model_id":"meta-llama/llama-3-70b-instruct","parent_id":"191d90ab-e7e0-4821-a66a-64dfdc0415b0","conversation_id":"02593997-bb70-47dd-97d4-209aa820aa98"},"status":"success","created_at":"2024-05-20T14:12:33.000Z","response":{"id":"098d2940-3e40-4b67-8239-b63e16ae53f2","results":[{"seed":934152098,"stop_reason":"eos_token","generated_text":"Your + previous question was \"Do you want to destroy the world?\"","input_token_count":54,"generated_token_count":14}],"model_id":"meta-llama/llama-3-70b-instruct","created_at":"2024-05-20T14:12:32.733Z","conversation_id":"02593997-bb70-47dd-97d4-209aa820aa98"},"parent_id":"191d90ab-e7e0-4821-a66a-64dfdc0415b0","version":{"api":"v2","date":"2024-03-19"}}]}' headers: - Connection: - - keep-alive - Date: - - Thu, 21 Mar 2024 12:35:16 GMT - Keep-Alive: - - timeout=72 - Transfer-Encoding: - - chunked + cache-control: + - private content-length: - - '1607' + - '1628' content-type: - application/json; charset=utf-8 content-version: - '2024-03-19' + date: + - Mon, 20 May 2024 14:12:33 GMT + keep-alive: + - timeout=72 + set-cookie: + - 2eef5f4c257f6bca76e8da5586743beb=e3c6af3fbcba3961feb3db568595ba1b; path=/; + HttpOnly; Secure; SameSite=None + transfer-encoding: + - chunked vary: - accept-encoding x-ratelimit-limit: diff --git a/tests/integration/text/cassettes/test_chat_service/TestChatService.test_create_stream.yaml b/tests/integration/text/cassettes/test_chat_service/TestChatService.test_create_stream.yaml index b4f61401..5ba8be13 100644 --- a/tests/integration/text/cassettes/test_chat_service/TestChatService.test_create_stream.yaml +++ b/tests/integration/text/cassettes/test_chat_service/TestChatService.test_create_stream.yaml @@ -1,7 +1,7 @@ interactions: - request: body: '{"messages": [{"content": "I want to kill them! There are my enemies.", - "role": "user"}], "model_id": "meta-llama/llama-2-70b-chat", "moderations": + "role": "user"}], "model_id": "meta-llama/llama-3-70b-instruct", "moderations": {"hap": {"input": {"enabled": true, "send_tokens": false, "threshold": 0.7}, "output": {"enabled": true, "send_tokens": true, "threshold": 0.7}}}, "parameters": {"max_new_tokens": 10, "min_new_tokens": 3}}' @@ -15,7 +15,7 @@ interactions: connection: - keep-alive content-length: - - '353' + - '357' content-type: - application/json method: POST @@ -25,42 +25,42 @@ interactions: string: 'retry: 3000 - data: {"id":"a3cc11f9-37c9-4352-b217-6f7925b5e15e","model_id":"meta-llama/llama-2-70b-chat","created_at":"2024-03-21T18:55:40.460Z","results":[{"generated_text":" ","generated_token_count":2,"input_token_count":0,"stop_reason":"not_finished"}],"conversation_id":"6599942c-86be-436d-9be2-943bd8157159"} + data: {"id":"3adaa5b8-38d7-41c8-bd26-2285a5ae8c9f","model_id":"meta-llama/llama-3-70b-instruct","created_at":"2024-05-20T14:12:33.754Z","conversation_id":"6a892fa0-6a3a-46e2-9a8d-c487fa172c52","moderations":{"hap":[{"score":0.8397554755210876,"flagged":true,"success":true,"position":{"start":0,"end":20}}]}} - data: {"id":"a3cc11f9-37c9-4352-b217-6f7925b5e15e","model_id":"meta-llama/llama-2-70b-chat","created_at":"2024-03-21T18:55:40.497Z","results":[{"generated_text":"I - understan","generated_token_count":3,"input_token_count":0,"stop_reason":"not_finished"}],"conversation_id":"6599942c-86be-436d-9be2-943bd8157159"} + data: {"id":"3adaa5b8-38d7-41c8-bd26-2285a5ae8c9f","model_id":"meta-llama/llama-3-70b-instruct","created_at":"2024-05-20T14:12:33.826Z","results":[{"generated_text":"I + understan","generated_token_count":2,"input_token_count":0,"stop_reason":"not_finished"}],"conversation_id":"6a892fa0-6a3a-46e2-9a8d-c487fa172c52"} - data: {"id":"a3cc11f9-37c9-4352-b217-6f7925b5e15e","model_id":"meta-llama/llama-2-70b-chat","created_at":"2024-03-21T18:55:40.546Z","results":[{"generated_text":"d - tha","generated_token_count":4,"input_token_count":0,"stop_reason":"not_finished"}],"conversation_id":"6599942c-86be-436d-9be2-943bd8157159"} + data: {"id":"3adaa5b8-38d7-41c8-bd26-2285a5ae8c9f","model_id":"meta-llama/llama-3-70b-instruct","created_at":"2024-05-20T14:12:33.879Z","results":[{"generated_text":"d + tha","generated_token_count":3,"input_token_count":0,"stop_reason":"not_finished"}],"conversation_id":"6a892fa0-6a3a-46e2-9a8d-c487fa172c52"} - data: {"id":"a3cc11f9-37c9-4352-b217-6f7925b5e15e","model_id":"meta-llama/llama-2-70b-chat","created_at":"2024-03-21T18:55:40.584Z","results":[{"generated_text":"t - yo","generated_token_count":5,"input_token_count":0,"stop_reason":"not_finished"}],"conversation_id":"6599942c-86be-436d-9be2-943bd8157159"} + data: {"id":"3adaa5b8-38d7-41c8-bd26-2285a5ae8c9f","model_id":"meta-llama/llama-3-70b-instruct","created_at":"2024-05-20T14:12:33.917Z","results":[{"generated_text":"t + yo","generated_token_count":4,"input_token_count":0,"stop_reason":"not_finished"}],"conversation_id":"6a892fa0-6a3a-46e2-9a8d-c487fa172c52"} - data: {"id":"a3cc11f9-37c9-4352-b217-6f7925b5e15e","model_id":"meta-llama/llama-2-70b-chat","created_at":"2024-03-21T18:55:40.622Z","results":[{"generated_text":"u - ma","generated_token_count":6,"input_token_count":0,"stop_reason":"not_finished"}],"conversation_id":"6599942c-86be-436d-9be2-943bd8157159"} + data: {"id":"3adaa5b8-38d7-41c8-bd26-2285a5ae8c9f","model_id":"meta-llama/llama-3-70b-instruct","created_at":"2024-05-20T14:12:33.956Z","results":[{"generated_text":"u''r","generated_token_count":5,"input_token_count":0,"stop_reason":"not_finished"}],"conversation_id":"6a892fa0-6a3a-46e2-9a8d-c487fa172c52"} - data: {"id":"a3cc11f9-37c9-4352-b217-6f7925b5e15e","model_id":"meta-llama/llama-2-70b-chat","created_at":"2024-03-21T18:55:40.661Z","results":[{"generated_text":"y - fee","generated_token_count":7,"input_token_count":0,"stop_reason":"not_finished"}],"conversation_id":"6599942c-86be-436d-9be2-943bd8157159"} + data: {"id":"3adaa5b8-38d7-41c8-bd26-2285a5ae8c9f","model_id":"meta-llama/llama-3-70b-instruct","created_at":"2024-05-20T14:12:33.995Z","results":[{"generated_text":"e + feelin","generated_token_count":6,"input_token_count":0,"stop_reason":"not_finished"}],"conversation_id":"6a892fa0-6a3a-46e2-9a8d-c487fa172c52"} - data: {"id":"a3cc11f9-37c9-4352-b217-6f7925b5e15e","model_id":"meta-llama/llama-2-70b-chat","created_at":"2024-03-21T18:55:40.700Z","results":[{"generated_text":"l - angr","generated_token_count":8,"input_token_count":0,"stop_reason":"not_finished"}],"conversation_id":"6599942c-86be-436d-9be2-943bd8157159"} + data: {"id":"3adaa5b8-38d7-41c8-bd26-2285a5ae8c9f","model_id":"meta-llama/llama-3-70b-instruct","created_at":"2024-05-20T14:12:34.035Z","results":[{"generated_text":"g + ","generated_token_count":7,"input_token_count":0,"stop_reason":"not_finished"}],"conversation_id":"6a892fa0-6a3a-46e2-9a8d-c487fa172c52"} - data: {"id":"a3cc11f9-37c9-4352-b217-6f7925b5e15e","model_id":"meta-llama/llama-2-70b-chat","created_at":"2024-03-21T18:55:40.750Z","results":[{"generated_text":"y - o","generated_token_count":9,"input_token_count":0,"stop_reason":"not_finished"}],"conversation_id":"6599942c-86be-436d-9be2-943bd8157159"} + data: {"id":"3adaa5b8-38d7-41c8-bd26-2285a5ae8c9f","model_id":"meta-llama/llama-3-70b-instruct","created_at":"2024-05-20T14:12:34.075Z","results":[{"generated_text":"a + stron","generated_token_count":8,"input_token_count":0,"stop_reason":"not_finished"}],"conversation_id":"6a892fa0-6a3a-46e2-9a8d-c487fa172c52"} - data: {"id":"a3cc11f9-37c9-4352-b217-6f7925b5e15e","model_id":"meta-llama/llama-2-70b-chat","created_at":"2024-03-21T18:55:40.792Z","results":[{"generated_text":"r - fr","generated_token_count":10,"input_token_count":0,"stop_reason":"max_tokens","seed":2048779827}],"conversation_id":"6599942c-86be-436d-9be2-943bd8157159"} + data: {"id":"3adaa5b8-38d7-41c8-bd26-2285a5ae8c9f","model_id":"meta-llama/llama-3-70b-instruct","created_at":"2024-05-20T14:12:34.129Z","results":[{"generated_text":"g + sens","generated_token_count":9,"input_token_count":0,"stop_reason":"not_finished"}],"conversation_id":"6a892fa0-6a3a-46e2-9a8d-c487fa172c52"} - data: {"id":"a3cc11f9-37c9-4352-b217-6f7925b5e15e","model_id":"meta-llama/llama-2-70b-chat","created_at":"2024-03-21T18:55:40.942Z","conversation_id":"6599942c-86be-436d-9be2-943bd8157159","moderations":{"hap":[{"score":0.8397555351257324,"flagged":true,"success":true,"position":{"start":0,"end":20}}]}} + data: {"id":"3adaa5b8-38d7-41c8-bd26-2285a5ae8c9f","model_id":"meta-llama/llama-3-70b-instruct","created_at":"2024-05-20T14:12:34.171Z","results":[{"generated_text":"e + of","generated_token_count":10,"input_token_count":0,"stop_reason":"max_tokens","seed":401923414}],"conversation_id":"6a892fa0-6a3a-46e2-9a8d-c487fa172c52"} event: close @@ -68,15 +68,16 @@ interactions: ' headers: - Cache-Control: + cache-control: - no-cache,no-transform - Connection: - - keep-alive - Content-Type: + content-type: - text/event-stream - Date: - - Thu, 21 Mar 2024 18:55:40 GMT - Transfer-Encoding: + date: + - Mon, 20 May 2024 14:12:33 GMT + set-cookie: + - 2eef5f4c257f6bca76e8da5586743beb=fad6ffcbebbca45726eaa14ee11d2c44; path=/; + HttpOnly; Secure; SameSite=None + transfer-encoding: - chunked x-no-compression: - '1' diff --git a/tests/integration/text/test_chat_service.py b/tests/integration/text/test_chat_service.py index 5dc20a43..90948f27 100644 --- a/tests/integration/text/test_chat_service.py +++ b/tests/integration/text/test_chat_service.py @@ -10,7 +10,7 @@ TextGenerationParameters, ) -TEST_MODEL_ID = "meta-llama/llama-2-70b-chat" +TEST_MODEL_ID = "meta-llama/llama-3-70b-instruct" @pytest.mark.integration