diff --git a/examples/extensions/huggingface/huggingface_agent.py b/examples/extensions/huggingface/huggingface_agent.py
index f8a1bf06..4259bd89 100644
--- a/examples/extensions/huggingface/huggingface_agent.py
+++ b/examples/extensions/huggingface/huggingface_agent.py
@@ -49,7 +49,7 @@ def __call__(self):
 
 agent = IBMGenAIAgent(
     client=client,
-    model="meta-llama/llama-2-70b-chat",
+    model="meta-llama/llama-3-70b-instruct",
     parameters=TextGenerationParameters(min_new_tokens=10, max_new_tokens=200, random_seed=777, temperature=0),
     additional_tools=[BitcoinPriceFetcher()],
 )
diff --git a/examples/extensions/langchain/langchain_agent.py b/examples/extensions/langchain/langchain_agent.py
index 1c21e98c..1bd79e98 100644
--- a/examples/extensions/langchain/langchain_agent.py
+++ b/examples/extensions/langchain/langchain_agent.py
@@ -78,7 +78,7 @@ def _run(self, word: str, run_manager: Optional[CallbackManagerForToolRun] = Non
 client = Client(credentials=Credentials.from_env())
 llm = LangChainChatInterface(
     client=client,
-    model_id="meta-llama/llama-2-70b-chat",
+    model_id="meta-llama/llama-3-70b-instruct",
     parameters=TextGenerationParameters(
         max_new_tokens=250, min_new_tokens=20, temperature=0, stop_sequences=["\nObservation"]
     ),
diff --git a/examples/extensions/langchain/langchain_chat_generate.py b/examples/extensions/langchain/langchain_chat_generate.py
index 086172ee..c7d07076 100644
--- a/examples/extensions/langchain/langchain_chat_generate.py
+++ b/examples/extensions/langchain/langchain_chat_generate.py
@@ -27,7 +27,7 @@ def heading(text: str) -> str:
 
 llm = LangChainChatInterface(
     client=Client(credentials=Credentials.from_env()),
-    model_id="meta-llama/llama-2-70b-chat",
+    model_id="meta-llama/llama-3-70b-instruct",
     parameters=TextGenerationParameters(
         decoding_method=DecodingMethod.SAMPLE,
         max_new_tokens=100,
diff --git a/examples/extensions/langchain/langchain_chat_stream.py b/examples/extensions/langchain/langchain_chat_stream.py
index e9794083..169336cd 100644
--- a/examples/extensions/langchain/langchain_chat_stream.py
+++ b/examples/extensions/langchain/langchain_chat_stream.py
@@ -21,7 +21,7 @@ def heading(text: str) -> str:
 print(heading("Stream chat with langchain"))
 
 llm = LangChainChatInterface(
-    model_id="meta-llama/llama-2-70b-chat",
+    model_id="meta-llama/llama-3-70b-instruct",
     client=Client(credentials=Credentials.from_env()),
     parameters=TextGenerationParameters(
         decoding_method=DecodingMethod.SAMPLE,
diff --git a/examples/extensions/langchain/langchain_sql_agent.py b/examples/extensions/langchain/langchain_sql_agent.py
index d4601795..4e212372 100644
--- a/examples/extensions/langchain/langchain_sql_agent.py
+++ b/examples/extensions/langchain/langchain_sql_agent.py
@@ -75,7 +75,7 @@ def create_llm():
     client = Client(credentials=Credentials.from_env())
     return LangChainChatInterface(
         client=client,
-        model_id="meta-llama/llama-2-70b-chat",
+        model_id="meta-llama/llama-3-70b-instruct",
         parameters=TextGenerationParameters(
             max_new_tokens=250, min_new_tokens=20, temperature=0, stop_sequences=["\nObservation"]
         ),
diff --git a/examples/extensions/llama_index/llama_index_llm.py b/examples/extensions/llama_index/llama_index_llm.py
index b9e5c552..8703ae7f 100644
--- a/examples/extensions/llama_index/llama_index_llm.py
+++ b/examples/extensions/llama_index/llama_index_llm.py
@@ -22,7 +22,7 @@ def heading(text: str) -> str:
 
 llm = IBMGenAILlamaIndex(
     client=client,
-    model_id="meta-llama/llama-2-70b-chat",
+    model_id="meta-llama/llama-3-70b-instruct",
     parameters=TextGenerationParameters(
         decoding_method=DecodingMethod.SAMPLE,
         max_new_tokens=100,
diff --git a/examples/text/chat.py b/examples/text/chat.py
index 16633c59..d14de0d9 100644
--- a/examples/text/chat.py
+++ b/examples/text/chat.py
@@ -31,7 +31,7 @@ def heading(text: str) -> str:
 )
 
 client = Client(credentials=Credentials.from_env())
-model_id = "meta-llama/llama-2-70b-chat"
+model_id = "meta-llama/llama-3-70b-instruct"
 
 prompt = "What is NLP and how it has evolved over the years?"
 print(heading("Generating a chat response"))
diff --git a/src/genai/extensions/langchain/chat_llm.py b/src/genai/extensions/langchain/chat_llm.py
index f3606567..c6242818 100644
--- a/src/genai/extensions/langchain/chat_llm.py
+++ b/src/genai/extensions/langchain/chat_llm.py
@@ -93,7 +93,7 @@ class LangChainChatInterface(BaseChatModel):
         client = Client(credentials=Credentials.from_env())
         llm = LangChainChatInterface(
             client=client,
-            model_id="meta-llama/llama-2-70b-chat",
+            model_id="meta-llama/llama-3-70b-instruct",
             parameters=TextGenerationParameters(
                 max_new_tokens=250,
             )
diff --git a/src/genai/extensions/langchain/llm.py b/src/genai/extensions/langchain/llm.py
index 866ab2f8..1619f53e 100644
--- a/src/genai/extensions/langchain/llm.py
+++ b/src/genai/extensions/langchain/llm.py
@@ -62,7 +62,7 @@ class LangChainInterface(LLM):
         client = Client(credentials=Credentials.from_env())
         llm = LangChainInterface(
             client=client,
-            model_id="meta-llama/llama-2-70b-chat",
+            model_id="meta-llama/llama-3-70b-instruct",
             parameters=TextGenerationParameters(max_new_tokens=50)
         )
 
diff --git a/src/genai/text/chat/chat_generation_service.py b/src/genai/text/chat/chat_generation_service.py
index 2e824471..56a35a28 100644
--- a/src/genai/text/chat/chat_generation_service.py
+++ b/src/genai/text/chat/chat_generation_service.py
@@ -79,7 +79,7 @@ def create(
 
             # Create a new conversation
             response = client.text.chat.create(
-                model_id="meta-llama/llama-2-70b-chat",
+                model_id="meta-llama/llama-3-70b-instruct",
                 messages=[HumanMessage(content="Describe the game Chess?")],
                 parameters=TextGenerationParameters(max_token_limit=100)
             )
@@ -152,7 +152,7 @@ def create_stream(
 
             # Create a new conversation
             for response in client.text.chat.create_stream(
-                    model_id="meta-llama/llama-2-70b-chat",
+                    model_id="meta-llama/llama-3-70b-instruct",
                     messages=[HumanMessage(content="Describe the game Chess?")],
                     parameters=TextGenerationParameters(max_token_limit=100)
                 ):
diff --git a/tests/integration/extensions/cassettes/test_huggingface_agent/TestHuggingfaceAgent.test_agent.yaml b/tests/integration/extensions/cassettes/test_huggingface_agent/TestHuggingfaceAgent.test_agent.yaml
index c75e2a87..5484eaf3 100644
--- a/tests/integration/extensions/cassettes/test_huggingface_agent/TestHuggingfaceAgent.test_agent.yaml
+++ b/tests/integration/extensions/cassettes/test_huggingface_agent/TestHuggingfaceAgent.test_agent.yaml
@@ -12,22 +12,22 @@ interactions:
     uri: https://api.com/v2/text/generation/limits?version=2023-11-22
   response:
     body:
-      string: '{"result":{"concurrency":{"limit":200,"remaining":200}}}'
+      string: '{"result":{"concurrency":{"limit":10,"remaining":10}}}'
     headers:
       cache-control:
       - private
       content-length:
-      - '56'
+      - '54'
       content-type:
       - application/json; charset=utf-8
       content-version:
       - '2023-11-22'
       date:
-      - Wed, 20 Mar 2024 08:27:00 GMT
+      - Mon, 20 May 2024 14:12:17 GMT
       keep-alive:
       - timeout=72
       set-cookie:
-      - 2eef5f4c257f6bca76e8da5586743beb=1e3545705d3737525c7629e9f28dc93d; path=/;
+      - 2eef5f4c257f6bca76e8da5586743beb=de04e502e3969a930842cae695f31f86; path=/;
         HttpOnly; Secure; SameSite=None
       vary:
       - accept-encoding
@@ -75,47 +75,41 @@ interactions:
       `src_lang`, which should be the language of the text to translate and `tgt_lang`,
       which should be the language for the desired ouput language. Both `src_lang`
       and `tgt_lang` are written in plain English, such as ''Romanian'', or ''Albanian''.
-      It returns the text translated in `tgt_lang`.\n- image_transformer: This is
-      a tool that transforms an image according to a prompt. It takes two inputs:
-      `image`, which should be the image to transform, and `prompt`, which should
-      be the prompt to use to change it. The prompt should only contain descriptive
-      adjectives, as if completing the prompt of the original image. It returns the
-      modified image.\n- text_downloader: This is a tool that downloads a file from
-      a `url`. It takes the `url` as input, and returns the text contained in the
-      file.\n- image_generator: This is a tool that creates an image according to
-      a prompt, which is a text description. It takes an input named `prompt` which
-      contains the image description and outputs an image.\n- video_generator: This
-      is a tool that creates a video according to a text description. It takes an
-      input named `prompt` which contains the image description, as well as an optional
-      input `seconds` which will be the duration of the video. The default is of two
-      seconds. The tool outputs a video object.\n\n\nTask: \"Answer the question in
-      the variable `question` about the image stored in the variable `image`. The
-      question is in French.\"\n\nI will use the following tools: `translator` to
-      translate the question into English and then `image_qa` to answer the question
-      on the input image.\n\nAnswer:\n```py\ntranslated_question = translator(question=question,
-      src_lang=\"French\", tgt_lang=\"English\")\nprint(f\"The translated question
-      is {translated_question}.\")\nanswer = image_qa(image=image, question=translated_question)\nprint(f\"The
-      answer is {answer}\")\n```\n\nTask: \"Identify the oldest person in the `document`
-      and create an image showcasing the result.\"\n\nI will use the following tools:
-      `document_qa` to find the oldest person in the document, then `image_generator`
-      to generate an image according to the answer.\n\nAnswer:\n```py\nanswer = document_qa(document,
-      question=\"What is the oldest person?\")\nprint(f\"The answer is {answer}.\")\nimage
-      = image_generator(answer)\n```\n\nTask: \"Generate an image using the text given
-      in the variable `caption`.\"\n\nI will use the following tool: `image_generator`
-      to generate an image.\n\nAnswer:\n```py\nimage = image_generator(prompt=caption)\n```\n\nTask:
-      \"Summarize the text given in the variable `text` and read it out loud.\"\n\nI
-      will use the following tools: `summarizer` to create a summary of the input
-      text, then `text_reader` to read it out loud.\n\nAnswer:\n```py\nsummarized_text
-      = summarizer(text)\nprint(f\"Summary: {summarized_text}\")\naudio_summary =
-      text_reader(summarized_text)\n```\n\nTask: \"Answer the question in the variable
-      `question` about the text in the variable `text`. Use the answer to generate
-      an image.\"\n\nI will use the following tools: `text_qa` to create the answer,
-      then `image_generator` to generate an image according to the answer.\n\nAnswer:\n```py\nanswer
-      = text_qa(text=text, question=question)\nprint(f\"The answer is {answer}.\")\nimage
-      = image_generator(answer)\n```\n\nTask: \"Caption the following `image`.\"\n\nI
-      will use the following tool: `image_captioner` to generate a caption for the
-      image.\n\nAnswer:\n```py\ncaption = image_captioner(image)\n```\n\nTask: \"Summarize
-      the chat\"\n\nI will use the following", "model_id": "meta-llama/llama-2-70b",
+      It returns the text translated in `tgt_lang`.\n- image_transformation: This
+      is a tool that transforms an image according to a prompt and returns the modified
+      image.\n- text_downloader: This is a tool that downloads a file from a `url`.
+      It takes the `url` as input, and returns the text contained in the file.\n-
+      image_generator: This is a tool that creates an image according to a prompt,
+      which is a text description.\n- video_generator: This is a tool that creates
+      a video according to a text description. It takes an optional input `seconds`
+      which will be the duration of the video. The default is of two seconds. The
+      tool outputs a video object.\n\n\nTask: \"Answer the question in the variable
+      `question` about the image stored in the variable `image`. The question is in
+      French.\"\n\nI will use the following tools: `translator` to translate the question
+      into English and then `image_qa` to answer the question on the input image.\n\nAnswer:\n```py\ntranslated_question
+      = translator(question=question, src_lang=\"French\", tgt_lang=\"English\")\nprint(f\"The
+      translated question is {translated_question}.\")\nanswer = image_qa(image=image,
+      question=translated_question)\nprint(f\"The answer is {answer}\")\n```\n\nTask:
+      \"Identify the oldest person in the `document` and create an image showcasing
+      the result.\"\n\nI will use the following tools: `document_qa` to find the oldest
+      person in the document, then `image_generator` to generate an image according
+      to the answer.\n\nAnswer:\n```py\nanswer = document_qa(document, question=\"What
+      is the oldest person?\")\nprint(f\"The answer is {answer}.\")\nimage = image_generator(answer)\n```\n\nTask:
+      \"Generate an image using the text given in the variable `caption`.\"\n\nI will
+      use the following tool: `image_generator` to generate an image.\n\nAnswer:\n```py\nimage
+      = image_generator(prompt=caption)\n```\n\nTask: \"Summarize the text given in
+      the variable `text` and read it out loud.\"\n\nI will use the following tools:
+      `summarizer` to create a summary of the input text, then `text_reader` to read
+      it out loud.\n\nAnswer:\n```py\nsummarized_text = summarizer(text)\nprint(f\"Summary:
+      {summarized_text}\")\naudio_summary = text_reader(summarized_text)\n```\n\nTask:
+      \"Answer the question in the variable `question` about the text in the variable
+      `text`. Use the answer to generate an image.\"\n\nI will use the following tools:
+      `text_qa` to create the answer, then `image_generator` to generate an image
+      according to the answer.\n\nAnswer:\n```py\nanswer = text_qa(text=text, question=question)\nprint(f\"The
+      answer is {answer}.\")\nimage = image_generator(answer)\n```\n\nTask: \"Caption
+      the following `image`.\"\n\nI will use the following tool: `image_captioner`
+      to generate a caption for the image.\n\nAnswer:\n```py\ncaption = image_captioner(image)\n```\n\nTask:
+      \"Summarize the chat\"\n\nI will use the following", "model_id": "meta-llama/llama-2-70b",
       "parameters": {"max_new_tokens": 500, "stop_sequences": ["Task:"]}}'
     headers:
       accept:
@@ -125,30 +119,24 @@ interactions:
       connection:
       - keep-alive
       content-length:
-      - '6535'
+      - '6135'
       content-type:
       - application/json
     method: POST
     uri: https://api.com/v2/text/generation?version=2024-03-19
   response:
     body:
-      string: '{"id":"230e435f-6c47-46bd-b0f2-ddc56c834dee","model_id":"meta-llama/llama-2-70b","created_at":"2024-03-20T08:27:06.828Z","results":[{"generated_text":"
-        tools: `document_qa` to identify the oldest person, then `image_captioner`
-        to generate a caption about that person, I will then generate an image from
-        that caption using `image_generator`, and finally `doc_to_text` and `text_reader`
-        to read the generated text. The list of tools will be executed one after the
-        other.\n\nAnswer:\n```py\nanswer = document_qa(document, question=\"What is
-        the oldest person?\")\ncaption = image_captions(answer)\nimage = image_generator(prompt=caption)\noutput
-        = summary_of_document(document)\n```\n\nTask:","generated_token_count":142,"input_token_count":1580,"stop_reason":"stop_sequence","stop_sequence":"Task:","seed":3264333442}]}'
+      string: '{"id":"22234f91-ef73-4721-9c85-04a22aad9296","model_id":"meta-llama/llama-2-70b","created_at":"2024-05-20T14:12:25.703Z","results":[{"generated_text":"
+        tool: `summarizer` to generate a summary of the chat.\n\nAnswer:\n```py\n))","generated_token_count":25,"input_token_count":1490,"stop_reason":"eos_token","seed":3150004800}]}'
     headers:
       content-length:
-      - '817'
+      - '327'
       content-type:
       - application/json; charset=utf-8
       content-version:
       - '2024-03-19'
       date:
-      - Wed, 20 Mar 2024 08:27:06 GMT
+      - Mon, 20 May 2024 14:12:25 GMT
       keep-alive:
       - timeout=72
       vary:
diff --git a/tests/integration/extensions/cassettes/test_langchain_chat/TestLangChainChat.test_async_generate.yaml b/tests/integration/extensions/cassettes/test_langchain_chat/TestLangChainChat.test_async_generate.yaml
index 71796a97..12d88294 100644
--- a/tests/integration/extensions/cassettes/test_langchain_chat/TestLangChainChat.test_async_generate.yaml
+++ b/tests/integration/extensions/cassettes/test_langchain_chat/TestLangChainChat.test_async_generate.yaml
@@ -8,7 +8,8 @@ interactions:
       explain why instead of answering something incorrectly.\n    If you don''t know
       the answer to a question, please don''t share false information.\n    ", "role":
       "system"}, {"content": "What is NLP and how it has evolved over the years?",
-      "role": "user"}], "model_id": "meta-llama/llama-2-70b-chat", "parameters": {}}'
+      "role": "user"}], "model_id": "meta-llama/llama-3-70b-instruct", "parameters":
+      {}}'
     headers:
       accept:
       - '*/*'
@@ -17,28 +18,29 @@ interactions:
       connection:
       - keep-alive
       content-length:
-      - '730'
+      - '734'
       content-type:
       - application/json
     method: POST
     uri: https://api.com/v2/text/chat?version=2024-03-19
   response:
     body:
-      string: '{"id":"1b962f25-87c0-4b96-9c77-739bd9f5b2f6","model_id":"meta-llama/llama-2-70b-chat","created_at":"2024-03-20T08:27:23.202Z","results":[{"generated_text":"  NLP
-        (Natural Language Processing) refers to the branch of Artificial Intelligence","generated_token_count":20,"input_token_count":160,"stop_reason":"max_tokens","seed":683510637}],"conversation_id":"adc37c54-87cc-43ad-bf50-16013eec263f"}'
+      string: '{"id":"01f28b95-5ecd-4a01-9a2b-7803c6db824f","model_id":"meta-llama/llama-3-70b-instruct","created_at":"2024-05-20T14:12:28.750Z","results":[{"generated_text":"Natural
+        Language Processing (NLP) is a subfield of artificial intelligence (AI) that
+        deals with","generated_token_count":20,"input_token_count":134,"stop_reason":"max_tokens","seed":1025128500}],"conversation_id":"90edb70b-c4e7-45f5-81c3-fab231227b7a"}'
     headers:
       content-length:
-      - '395'
+      - '412'
       content-type:
       - application/json; charset=utf-8
       content-version:
       - '2024-03-19'
       date:
-      - Wed, 20 Mar 2024 08:27:23 GMT
+      - Mon, 20 May 2024 14:12:28 GMT
       keep-alive:
       - timeout=72
       set-cookie:
-      - 2eef5f4c257f6bca76e8da5586743beb=1e3545705d3737525c7629e9f28dc93d; path=/;
+      - 2eef5f4c257f6bca76e8da5586743beb=fad6ffcbebbca45726eaa14ee11d2c44; path=/;
         HttpOnly; Secure; SameSite=None
       vary:
       - accept-encoding
diff --git a/tests/integration/extensions/cassettes/test_langchain_chat/TestLangChainChat.test_generate.yaml b/tests/integration/extensions/cassettes/test_langchain_chat/TestLangChainChat.test_generate.yaml
index 74d1c395..cd1c860f 100644
--- a/tests/integration/extensions/cassettes/test_langchain_chat/TestLangChainChat.test_generate.yaml
+++ b/tests/integration/extensions/cassettes/test_langchain_chat/TestLangChainChat.test_generate.yaml
@@ -8,7 +8,8 @@ interactions:
       explain why instead of answering something incorrectly.\n    If you don''t know
       the answer to a question, please don''t share false information.\n    ", "role":
       "system"}, {"content": "What is NLP and how it has evolved over the years?",
-      "role": "user"}], "model_id": "meta-llama/llama-2-70b-chat", "parameters": {}}'
+      "role": "user"}], "model_id": "meta-llama/llama-3-70b-instruct", "parameters":
+      {}}'
     headers:
       accept:
       - '*/*'
@@ -17,29 +18,29 @@ interactions:
       connection:
       - keep-alive
       content-length:
-      - '730'
+      - '734'
       content-type:
       - application/json
     method: POST
     uri: https://api.com/v2/text/chat?version=2024-03-19
   response:
     body:
-      string: '{"id":"fb9c6250-62c1-412e-beb3-e8db76569817","model_id":"meta-llama/llama-2-70b-chat","created_at":"2024-03-20T08:27:17.855Z","results":[{"generated_text":"  NLP,
-        or Natural Language Processing, is a subfield of artificial intelligence that
-        deals","generated_token_count":20,"input_token_count":160,"stop_reason":"max_tokens","seed":3913660195}],"conversation_id":"a8512cf7-652f-42cc-9471-ff4996aecef9"}'
+      string: '{"id":"d8160c0f-36a3-49a0-b001-a3f9cbdb9643","model_id":"meta-llama/llama-3-70b-instruct","created_at":"2024-05-20T14:12:27.354Z","results":[{"generated_text":"NLP,
+        or Natural Language Processing, is a subfield of artificial intelligence (AI)
+        that deals","generated_token_count":20,"input_token_count":134,"stop_reason":"max_tokens","seed":2032949123}],"conversation_id":"bcc5d917-5e64-48e0-9047-ac5e032e8f21"}'
     headers:
       content-length:
-      - '403'
+      - '410'
       content-type:
       - application/json; charset=utf-8
       content-version:
       - '2024-03-19'
       date:
-      - Wed, 20 Mar 2024 08:27:17 GMT
+      - Mon, 20 May 2024 14:12:27 GMT
       keep-alive:
       - timeout=72
       set-cookie:
-      - 2eef5f4c257f6bca76e8da5586743beb=c7a0964ef13502a09f12e4a9f37d8d7f; path=/;
+      - 2eef5f4c257f6bca76e8da5586743beb=fad6ffcbebbca45726eaa14ee11d2c44; path=/;
         HttpOnly; Secure; SameSite=None
       vary:
       - accept-encoding
diff --git a/tests/integration/extensions/test_langchain_chat.py b/tests/integration/extensions/test_langchain_chat.py
index 37418d64..038f7e00 100644
--- a/tests/integration/extensions/test_langchain_chat.py
+++ b/tests/integration/extensions/test_langchain_chat.py
@@ -18,7 +18,7 @@
 @pytest.mark.integration
 class TestLangChainChat:
     def setup_method(self):
-        self.model_id = "meta-llama/llama-2-70b-chat"
+        self.model_id = "meta-llama/llama-3-70b-instruct"
 
     @pytest.fixture
     def parameters(self):
diff --git a/tests/integration/text/cassettes/test_chat_service/TestChatService.test_create_history.yaml b/tests/integration/text/cassettes/test_chat_service/TestChatService.test_create_history.yaml
index 8b85128a..92abb615 100644
--- a/tests/integration/text/cassettes/test_chat_service/TestChatService.test_create_history.yaml
+++ b/tests/integration/text/cassettes/test_chat_service/TestChatService.test_create_history.yaml
@@ -1,7 +1,7 @@
 interactions:
 - request:
     body: '{"messages": [{"content": "Do you want to destroy the world?", "role":
-      "user"}], "model_id": "meta-llama/llama-2-70b-chat"}'
+      "user"}], "model_id": "meta-llama/llama-3-70b-instruct"}'
     headers:
       accept:
       - '*/*'
@@ -10,28 +10,30 @@ interactions:
       connection:
       - keep-alive
       content-length:
-      - '123'
+      - '127'
       content-type:
       - application/json
     method: POST
     uri: https://api.com/v2/text/chat?version=2024-03-19
   response:
     body:
-      string: '{"id":"e6f57241-93e6-4eb2-8c1f-2641b5cdfc73","model_id":"meta-llama/llama-2-70b-chat","created_at":"2024-03-21T12:35:13.145Z","results":[{"generated_text":"  No,
-        I do not want to destroy the world. As a responsible AI language model,","generated_token_count":20,"input_token_count":18,"stop_reason":"max_tokens","seed":3100294952}],"conversation_id":"0e0776ed-5bdb-4c1a-8b21-c23586c5f05e"}'
+      string: '{"id":"191d90ab-e7e0-4821-a66a-64dfdc0415b0","model_id":"meta-llama/llama-3-70b-instruct","created_at":"2024-05-20T14:12:31.312Z","results":[{"generated_text":"No,
+        I do not want to destroy the world. I am designed to assist and provide helpful
+        information","generated_token_count":20,"input_token_count":18,"stop_reason":"max_tokens","seed":967002347}],"conversation_id":"02593997-bb70-47dd-97d4-209aa820aa98"}'
     headers:
-      Connection:
-      - keep-alive
-      Date:
-      - Thu, 21 Mar 2024 12:35:13 GMT
-      Keep-Alive:
-      - timeout=72
       content-length:
-      - '389'
+      - '410'
       content-type:
       - application/json; charset=utf-8
       content-version:
       - '2024-03-19'
+      date:
+      - Mon, 20 May 2024 14:12:31 GMT
+      keep-alive:
+      - timeout=72
+      set-cookie:
+      - 2eef5f4c257f6bca76e8da5586743beb=fad6ffcbebbca45726eaa14ee11d2c44; path=/;
+        HttpOnly; Secure; SameSite=None
       vary:
       - accept-encoding
     status:
@@ -47,25 +49,29 @@ interactions:
       connection:
       - keep-alive
     method: GET
-    uri: https://api.com/v2/requests/chat/0e0776ed-5bdb-4c1a-8b21-c23586c5f05e?version=2024-03-19
+    uri: https://api.com/v2/requests/chat/02593997-bb70-47dd-97d4-209aa820aa98?version=2024-03-19
   response:
     body:
-      string: '{"results":[{"id":"e6f57241-93e6-4eb2-8c1f-2641b5cdfc73","duration":1303,"request":{"messages":[{"role":"user","content":"Do
-        you want to destroy the world?"}],"model_id":"meta-llama/llama-2-70b-chat","conversation_id":"0e0776ed-5bdb-4c1a-8b21-c23586c5f05e"},"status":"success","created_at":"2024-03-21T12:35:13.000Z","response":{"id":"e6f57241-93e6-4eb2-8c1f-2641b5cdfc73","results":[{"seed":3100294952,"stop_reason":"max_tokens","generated_text":"  No,
-        I do not want to destroy the world. As a responsible AI language model,","input_token_count":18,"generated_token_count":20}],"model_id":"meta-llama/llama-2-70b-chat","created_at":"2024-03-21T12:35:13.145Z","conversation_id":"0e0776ed-5bdb-4c1a-8b21-c23586c5f05e"},"version":{"api":"v2","date":"2024-03-19"}}]}'
+      string: '{"results":[{"id":"191d90ab-e7e0-4821-a66a-64dfdc0415b0","duration":759,"request":{"messages":[{"role":"user","content":"Do
+        you want to destroy the world?"}],"model_id":"meta-llama/llama-3-70b-instruct","conversation_id":"02593997-bb70-47dd-97d4-209aa820aa98"},"status":"success","created_at":"2024-05-20T14:12:31.000Z","response":{"id":"191d90ab-e7e0-4821-a66a-64dfdc0415b0","results":[{"seed":967002347,"stop_reason":"max_tokens","generated_text":"No,
+        I do not want to destroy the world. I am designed to assist and provide helpful
+        information","input_token_count":18,"generated_token_count":20}],"model_id":"meta-llama/llama-3-70b-instruct","created_at":"2024-05-20T14:12:31.312Z","conversation_id":"02593997-bb70-47dd-97d4-209aa820aa98"},"version":{"api":"v2","date":"2024-03-19"}}]}'
     headers:
-      Connection:
-      - keep-alive
-      Date:
-      - Thu, 21 Mar 2024 12:35:13 GMT
-      Keep-Alive:
-      - timeout=72
+      cache-control:
+      - private
       content-length:
-      - '763'
+      - '787'
       content-type:
       - application/json; charset=utf-8
       content-version:
       - '2024-03-19'
+      date:
+      - Mon, 20 May 2024 14:12:31 GMT
+      keep-alive:
+      - timeout=72
+      set-cookie:
+      - 2eef5f4c257f6bca76e8da5586743beb=de04e502e3969a930842cae695f31f86; path=/;
+        HttpOnly; Secure; SameSite=None
       vary:
       - accept-encoding
       x-ratelimit-limit:
@@ -78,9 +84,9 @@ interactions:
       code: 200
       message: OK
 - request:
-    body: '{"conversation_id": "0e0776ed-5bdb-4c1a-8b21-c23586c5f05e", "messages":
+    body: '{"conversation_id": "02593997-bb70-47dd-97d4-209aa820aa98", "messages":
       [{"content": "What was my previous question?", "role": "user"}], "model_id":
-      "meta-llama/llama-2-70b-chat"}'
+      "meta-llama/llama-3-70b-instruct"}'
     headers:
       accept:
       - '*/*'
@@ -89,28 +95,29 @@ interactions:
       connection:
       - keep-alive
       content-length:
-      - '179'
+      - '183'
       content-type:
       - application/json
     method: POST
     uri: https://api.com/v2/text/chat?version=2024-03-19
   response:
     body:
-      string: '{"id":"e5747964-decd-46d4-85b0-0e5c0c4013a1","model_id":"meta-llama/llama-2-70b-chat","created_at":"2024-03-21T12:35:15.717Z","results":[{"generated_text":"  Sure!
-        Your previous question was: \"Do you want to destroy the world?\"","generated_token_count":18,"input_token_count":52,"stop_reason":"eos_token","seed":1803219522}],"conversation_id":"0e0776ed-5bdb-4c1a-8b21-c23586c5f05e"}'
+      string: '{"id":"098d2940-3e40-4b67-8239-b63e16ae53f2","model_id":"meta-llama/llama-3-70b-instruct","created_at":"2024-05-20T14:12:32.733Z","results":[{"generated_text":"Your
+        previous question was \"Do you want to destroy the world?\"","generated_token_count":14,"input_token_count":54,"stop_reason":"eos_token","seed":934152098}],"conversation_id":"02593997-bb70-47dd-97d4-209aa820aa98"}'
     headers:
-      Connection:
-      - keep-alive
-      Date:
-      - Thu, 21 Mar 2024 12:35:15 GMT
-      Keep-Alive:
-      - timeout=72
       content-length:
-      - '384'
+      - '378'
       content-type:
       - application/json; charset=utf-8
       content-version:
       - '2024-03-19'
+      date:
+      - Mon, 20 May 2024 14:12:32 GMT
+      keep-alive:
+      - timeout=72
+      set-cookie:
+      - 2eef5f4c257f6bca76e8da5586743beb=8b84dad6e66ad6c44a10cddf9bcb53d9; path=/;
+        HttpOnly; Secure; SameSite=None
       vary:
       - accept-encoding
     status:
@@ -126,29 +133,33 @@ interactions:
       connection:
       - keep-alive
     method: GET
-    uri: https://api.com/v2/requests/chat/0e0776ed-5bdb-4c1a-8b21-c23586c5f05e?version=2024-03-19
+    uri: https://api.com/v2/requests/chat/02593997-bb70-47dd-97d4-209aa820aa98?version=2024-03-19
   response:
     body:
-      string: '{"results":[{"id":"e6f57241-93e6-4eb2-8c1f-2641b5cdfc73","duration":1303,"request":{"messages":[{"role":"user","content":"Do
-        you want to destroy the world?"}],"model_id":"meta-llama/llama-2-70b-chat","conversation_id":"0e0776ed-5bdb-4c1a-8b21-c23586c5f05e"},"status":"success","created_at":"2024-03-21T12:35:13.000Z","response":{"id":"e6f57241-93e6-4eb2-8c1f-2641b5cdfc73","results":[{"seed":3100294952,"stop_reason":"max_tokens","generated_text":"  No,
-        I do not want to destroy the world. As a responsible AI language model,","input_token_count":18,"generated_token_count":20}],"model_id":"meta-llama/llama-2-70b-chat","created_at":"2024-03-21T12:35:13.145Z","conversation_id":"0e0776ed-5bdb-4c1a-8b21-c23586c5f05e"},"version":{"api":"v2","date":"2024-03-19"}},{"id":"e5747964-decd-46d4-85b0-0e5c0c4013a1","duration":1047,"request":{"messages":[{"role":"user","content":"What
-        was my previous question?"}],"model_id":"meta-llama/llama-2-70b-chat","parent_id":"e6f57241-93e6-4eb2-8c1f-2641b5cdfc73","conversation_id":"0e0776ed-5bdb-4c1a-8b21-c23586c5f05e"},"status":"success","created_at":"2024-03-21T12:35:16.000Z","response":{"id":"e5747964-decd-46d4-85b0-0e5c0c4013a1","results":[{"seed":1803219522,"stop_reason":"eos_token","generated_text":"  Sure!
-        Your previous question was: \"Do you want to destroy the world?\"","input_token_count":52,"generated_token_count":18}],"model_id":"meta-llama/llama-2-70b-chat","created_at":"2024-03-21T12:35:15.717Z","conversation_id":"0e0776ed-5bdb-4c1a-8b21-c23586c5f05e"},"parent_id":"e6f57241-93e6-4eb2-8c1f-2641b5cdfc73","version":{"api":"v2","date":"2024-03-19"}}]}'
+      string: '{"results":[{"id":"191d90ab-e7e0-4821-a66a-64dfdc0415b0","duration":759,"request":{"messages":[{"role":"user","content":"Do
+        you want to destroy the world?"}],"model_id":"meta-llama/llama-3-70b-instruct","conversation_id":"02593997-bb70-47dd-97d4-209aa820aa98"},"status":"success","created_at":"2024-05-20T14:12:31.000Z","response":{"id":"191d90ab-e7e0-4821-a66a-64dfdc0415b0","results":[{"seed":967002347,"stop_reason":"max_tokens","generated_text":"No,
+        I do not want to destroy the world. I am designed to assist and provide helpful
+        information","input_token_count":18,"generated_token_count":20}],"model_id":"meta-llama/llama-3-70b-instruct","created_at":"2024-05-20T14:12:31.312Z","conversation_id":"02593997-bb70-47dd-97d4-209aa820aa98"},"version":{"api":"v2","date":"2024-03-19"}},{"id":"098d2940-3e40-4b67-8239-b63e16ae53f2","duration":582,"request":{"messages":[{"role":"user","content":"What
+        was my previous question?"}],"model_id":"meta-llama/llama-3-70b-instruct","parent_id":"191d90ab-e7e0-4821-a66a-64dfdc0415b0","conversation_id":"02593997-bb70-47dd-97d4-209aa820aa98"},"status":"success","created_at":"2024-05-20T14:12:33.000Z","response":{"id":"098d2940-3e40-4b67-8239-b63e16ae53f2","results":[{"seed":934152098,"stop_reason":"eos_token","generated_text":"Your
+        previous question was \"Do you want to destroy the world?\"","input_token_count":54,"generated_token_count":14}],"model_id":"meta-llama/llama-3-70b-instruct","created_at":"2024-05-20T14:12:32.733Z","conversation_id":"02593997-bb70-47dd-97d4-209aa820aa98"},"parent_id":"191d90ab-e7e0-4821-a66a-64dfdc0415b0","version":{"api":"v2","date":"2024-03-19"}}]}'
     headers:
-      Connection:
-      - keep-alive
-      Date:
-      - Thu, 21 Mar 2024 12:35:16 GMT
-      Keep-Alive:
-      - timeout=72
-      Transfer-Encoding:
-      - chunked
+      cache-control:
+      - private
       content-length:
-      - '1607'
+      - '1628'
       content-type:
       - application/json; charset=utf-8
       content-version:
       - '2024-03-19'
+      date:
+      - Mon, 20 May 2024 14:12:33 GMT
+      keep-alive:
+      - timeout=72
+      set-cookie:
+      - 2eef5f4c257f6bca76e8da5586743beb=e3c6af3fbcba3961feb3db568595ba1b; path=/;
+        HttpOnly; Secure; SameSite=None
+      transfer-encoding:
+      - chunked
       vary:
       - accept-encoding
       x-ratelimit-limit:
diff --git a/tests/integration/text/cassettes/test_chat_service/TestChatService.test_create_stream.yaml b/tests/integration/text/cassettes/test_chat_service/TestChatService.test_create_stream.yaml
index b4f61401..5ba8be13 100644
--- a/tests/integration/text/cassettes/test_chat_service/TestChatService.test_create_stream.yaml
+++ b/tests/integration/text/cassettes/test_chat_service/TestChatService.test_create_stream.yaml
@@ -1,7 +1,7 @@
 interactions:
 - request:
     body: '{"messages": [{"content": "I want to kill them! There are my enemies.",
-      "role": "user"}], "model_id": "meta-llama/llama-2-70b-chat", "moderations":
+      "role": "user"}], "model_id": "meta-llama/llama-3-70b-instruct", "moderations":
       {"hap": {"input": {"enabled": true, "send_tokens": false, "threshold": 0.7},
       "output": {"enabled": true, "send_tokens": true, "threshold": 0.7}}}, "parameters":
       {"max_new_tokens": 10, "min_new_tokens": 3}}'
@@ -15,7 +15,7 @@ interactions:
       connection:
       - keep-alive
       content-length:
-      - '353'
+      - '357'
       content-type:
       - application/json
     method: POST
@@ -25,42 +25,42 @@ interactions:
       string: 'retry: 3000
 
 
-        data: {"id":"a3cc11f9-37c9-4352-b217-6f7925b5e15e","model_id":"meta-llama/llama-2-70b-chat","created_at":"2024-03-21T18:55:40.460Z","results":[{"generated_text":"  ","generated_token_count":2,"input_token_count":0,"stop_reason":"not_finished"}],"conversation_id":"6599942c-86be-436d-9be2-943bd8157159"}
+        data: {"id":"3adaa5b8-38d7-41c8-bd26-2285a5ae8c9f","model_id":"meta-llama/llama-3-70b-instruct","created_at":"2024-05-20T14:12:33.754Z","conversation_id":"6a892fa0-6a3a-46e2-9a8d-c487fa172c52","moderations":{"hap":[{"score":0.8397554755210876,"flagged":true,"success":true,"position":{"start":0,"end":20}}]}}
 
 
-        data: {"id":"a3cc11f9-37c9-4352-b217-6f7925b5e15e","model_id":"meta-llama/llama-2-70b-chat","created_at":"2024-03-21T18:55:40.497Z","results":[{"generated_text":"I
-        understan","generated_token_count":3,"input_token_count":0,"stop_reason":"not_finished"}],"conversation_id":"6599942c-86be-436d-9be2-943bd8157159"}
+        data: {"id":"3adaa5b8-38d7-41c8-bd26-2285a5ae8c9f","model_id":"meta-llama/llama-3-70b-instruct","created_at":"2024-05-20T14:12:33.826Z","results":[{"generated_text":"I
+        understan","generated_token_count":2,"input_token_count":0,"stop_reason":"not_finished"}],"conversation_id":"6a892fa0-6a3a-46e2-9a8d-c487fa172c52"}
 
 
-        data: {"id":"a3cc11f9-37c9-4352-b217-6f7925b5e15e","model_id":"meta-llama/llama-2-70b-chat","created_at":"2024-03-21T18:55:40.546Z","results":[{"generated_text":"d
-        tha","generated_token_count":4,"input_token_count":0,"stop_reason":"not_finished"}],"conversation_id":"6599942c-86be-436d-9be2-943bd8157159"}
+        data: {"id":"3adaa5b8-38d7-41c8-bd26-2285a5ae8c9f","model_id":"meta-llama/llama-3-70b-instruct","created_at":"2024-05-20T14:12:33.879Z","results":[{"generated_text":"d
+        tha","generated_token_count":3,"input_token_count":0,"stop_reason":"not_finished"}],"conversation_id":"6a892fa0-6a3a-46e2-9a8d-c487fa172c52"}
 
 
-        data: {"id":"a3cc11f9-37c9-4352-b217-6f7925b5e15e","model_id":"meta-llama/llama-2-70b-chat","created_at":"2024-03-21T18:55:40.584Z","results":[{"generated_text":"t
-        yo","generated_token_count":5,"input_token_count":0,"stop_reason":"not_finished"}],"conversation_id":"6599942c-86be-436d-9be2-943bd8157159"}
+        data: {"id":"3adaa5b8-38d7-41c8-bd26-2285a5ae8c9f","model_id":"meta-llama/llama-3-70b-instruct","created_at":"2024-05-20T14:12:33.917Z","results":[{"generated_text":"t
+        yo","generated_token_count":4,"input_token_count":0,"stop_reason":"not_finished"}],"conversation_id":"6a892fa0-6a3a-46e2-9a8d-c487fa172c52"}
 
 
-        data: {"id":"a3cc11f9-37c9-4352-b217-6f7925b5e15e","model_id":"meta-llama/llama-2-70b-chat","created_at":"2024-03-21T18:55:40.622Z","results":[{"generated_text":"u
-        ma","generated_token_count":6,"input_token_count":0,"stop_reason":"not_finished"}],"conversation_id":"6599942c-86be-436d-9be2-943bd8157159"}
+        data: {"id":"3adaa5b8-38d7-41c8-bd26-2285a5ae8c9f","model_id":"meta-llama/llama-3-70b-instruct","created_at":"2024-05-20T14:12:33.956Z","results":[{"generated_text":"u''r","generated_token_count":5,"input_token_count":0,"stop_reason":"not_finished"}],"conversation_id":"6a892fa0-6a3a-46e2-9a8d-c487fa172c52"}
 
 
-        data: {"id":"a3cc11f9-37c9-4352-b217-6f7925b5e15e","model_id":"meta-llama/llama-2-70b-chat","created_at":"2024-03-21T18:55:40.661Z","results":[{"generated_text":"y
-        fee","generated_token_count":7,"input_token_count":0,"stop_reason":"not_finished"}],"conversation_id":"6599942c-86be-436d-9be2-943bd8157159"}
+        data: {"id":"3adaa5b8-38d7-41c8-bd26-2285a5ae8c9f","model_id":"meta-llama/llama-3-70b-instruct","created_at":"2024-05-20T14:12:33.995Z","results":[{"generated_text":"e
+        feelin","generated_token_count":6,"input_token_count":0,"stop_reason":"not_finished"}],"conversation_id":"6a892fa0-6a3a-46e2-9a8d-c487fa172c52"}
 
 
-        data: {"id":"a3cc11f9-37c9-4352-b217-6f7925b5e15e","model_id":"meta-llama/llama-2-70b-chat","created_at":"2024-03-21T18:55:40.700Z","results":[{"generated_text":"l
-        angr","generated_token_count":8,"input_token_count":0,"stop_reason":"not_finished"}],"conversation_id":"6599942c-86be-436d-9be2-943bd8157159"}
+        data: {"id":"3adaa5b8-38d7-41c8-bd26-2285a5ae8c9f","model_id":"meta-llama/llama-3-70b-instruct","created_at":"2024-05-20T14:12:34.035Z","results":[{"generated_text":"g
+        ","generated_token_count":7,"input_token_count":0,"stop_reason":"not_finished"}],"conversation_id":"6a892fa0-6a3a-46e2-9a8d-c487fa172c52"}
 
 
-        data: {"id":"a3cc11f9-37c9-4352-b217-6f7925b5e15e","model_id":"meta-llama/llama-2-70b-chat","created_at":"2024-03-21T18:55:40.750Z","results":[{"generated_text":"y
-        o","generated_token_count":9,"input_token_count":0,"stop_reason":"not_finished"}],"conversation_id":"6599942c-86be-436d-9be2-943bd8157159"}
+        data: {"id":"3adaa5b8-38d7-41c8-bd26-2285a5ae8c9f","model_id":"meta-llama/llama-3-70b-instruct","created_at":"2024-05-20T14:12:34.075Z","results":[{"generated_text":"a
+        stron","generated_token_count":8,"input_token_count":0,"stop_reason":"not_finished"}],"conversation_id":"6a892fa0-6a3a-46e2-9a8d-c487fa172c52"}
 
 
-        data: {"id":"a3cc11f9-37c9-4352-b217-6f7925b5e15e","model_id":"meta-llama/llama-2-70b-chat","created_at":"2024-03-21T18:55:40.792Z","results":[{"generated_text":"r
-        fr","generated_token_count":10,"input_token_count":0,"stop_reason":"max_tokens","seed":2048779827}],"conversation_id":"6599942c-86be-436d-9be2-943bd8157159"}
+        data: {"id":"3adaa5b8-38d7-41c8-bd26-2285a5ae8c9f","model_id":"meta-llama/llama-3-70b-instruct","created_at":"2024-05-20T14:12:34.129Z","results":[{"generated_text":"g
+        sens","generated_token_count":9,"input_token_count":0,"stop_reason":"not_finished"}],"conversation_id":"6a892fa0-6a3a-46e2-9a8d-c487fa172c52"}
 
 
-        data: {"id":"a3cc11f9-37c9-4352-b217-6f7925b5e15e","model_id":"meta-llama/llama-2-70b-chat","created_at":"2024-03-21T18:55:40.942Z","conversation_id":"6599942c-86be-436d-9be2-943bd8157159","moderations":{"hap":[{"score":0.8397555351257324,"flagged":true,"success":true,"position":{"start":0,"end":20}}]}}
+        data: {"id":"3adaa5b8-38d7-41c8-bd26-2285a5ae8c9f","model_id":"meta-llama/llama-3-70b-instruct","created_at":"2024-05-20T14:12:34.171Z","results":[{"generated_text":"e
+        of","generated_token_count":10,"input_token_count":0,"stop_reason":"max_tokens","seed":401923414}],"conversation_id":"6a892fa0-6a3a-46e2-9a8d-c487fa172c52"}
 
 
         event: close
@@ -68,15 +68,16 @@ interactions:
 
         '
     headers:
-      Cache-Control:
+      cache-control:
       - no-cache,no-transform
-      Connection:
-      - keep-alive
-      Content-Type:
+      content-type:
       - text/event-stream
-      Date:
-      - Thu, 21 Mar 2024 18:55:40 GMT
-      Transfer-Encoding:
+      date:
+      - Mon, 20 May 2024 14:12:33 GMT
+      set-cookie:
+      - 2eef5f4c257f6bca76e8da5586743beb=fad6ffcbebbca45726eaa14ee11d2c44; path=/;
+        HttpOnly; Secure; SameSite=None
+      transfer-encoding:
       - chunked
       x-no-compression:
       - '1'
diff --git a/tests/integration/text/test_chat_service.py b/tests/integration/text/test_chat_service.py
index 5dc20a43..90948f27 100644
--- a/tests/integration/text/test_chat_service.py
+++ b/tests/integration/text/test_chat_service.py
@@ -10,7 +10,7 @@
     TextGenerationParameters,
 )
 
-TEST_MODEL_ID = "meta-llama/llama-2-70b-chat"
+TEST_MODEL_ID = "meta-llama/llama-3-70b-instruct"
 
 
 @pytest.mark.integration