From 42e060dd373af3ab4056de1fcdda1ba7124d0c55 Mon Sep 17 00:00:00 2001 From: Dhruvj07 Date: Thu, 6 Feb 2025 18:47:56 +0530 Subject: [PATCH] fix: Removing the code and moving to other repo --- examples/agents_deepval.ipynb | 1521 --------------------------------- 1 file changed, 1521 deletions(-) delete mode 100644 examples/agents_deepval.ipynb diff --git a/examples/agents_deepval.ipynb b/examples/agents_deepval.ipynb deleted file mode 100644 index 4d0e377..0000000 --- a/examples/agents_deepval.ipynb +++ /dev/null @@ -1,1521 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Embedding Set for RAG Model" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[31m\u001b[1mWARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.\u001b[0m\n", - " * Running on http://localhost:5001\n", - "\u001b[33mPress CTRL+C to quit\u001b[0m\n", - "127.0.0.1 - - [05/Feb/2025 13:47:23] \"POST /store HTTP/1.1\" 200 -\n", - "127.0.0.1 - - [05/Feb/2025 13:47:27] \"POST /query HTTP/1.1\" 200 -\n", - "127.0.0.1 - - [05/Feb/2025 13:56:15] \"POST /query HTTP/1.1\" 200 -\n", - "127.0.0.1 - - [05/Feb/2025 13:56:17] \"POST /query HTTP/1.1\" 200 -\n", - "127.0.0.1 - - [05/Feb/2025 13:56:18] \"POST /query HTTP/1.1\" 200 -\n", - "127.0.0.1 - - [05/Feb/2025 13:56:20] \"POST /query HTTP/1.1\" 200 -\n", - "127.0.0.1 - - [05/Feb/2025 13:56:22] \"POST /query HTTP/1.1\" 200 -\n", - "127.0.0.1 - - [05/Feb/2025 14:01:05] \"POST /query HTTP/1.1\" 200 -\n", - "127.0.0.1 - - [05/Feb/2025 14:01:07] \"POST /query HTTP/1.1\" 200 -\n", - "127.0.0.1 - - [05/Feb/2025 14:01:08] \"POST /query HTTP/1.1\" 200 -\n", - "127.0.0.1 - - [05/Feb/2025 14:01:09] \"POST /query HTTP/1.1\" 200 -\n", - "127.0.0.1 - - [05/Feb/2025 14:01:11] \"POST /query HTTP/1.1\" 200 -\n" - ] - } - ], - "source": [ - "import os\n", - "from flask import Flask, request, jsonify\n", - "from werkzeug.serving import run_simple\n", - "from threading import Thread\n", - "\n", - "# LangChain imports\n", - "from langchain_openai import ChatOpenAI, OpenAIEmbeddings\n", - "from langchain_chroma import Chroma\n", - "from langchain.schema import Document\n", - "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", - "from langchain.schema import SystemMessage, HumanMessage\n", - "\n", - "# ----------------------------------------------------------------------------\n", - "# 1) Environment Variables for Javelin + GPT\n", - "# ----------------------------------------------------------------------------\n", - "llm_api_key = os.environ[\"OPENAI_API_KEY\"] = \"\"\n", - "\n", - "javelin_api_key = os.environ[\"JAVELIN_API_KEY\"] = \"\"\n", - "\n", - "\n", - "# ----------------------------------------------------------------------------\n", - "# 2) Javelin Chat Model (GPT-3.5-turbo)\n", - "# ----------------------------------------------------------------------------\n", - "class JavelinOpenAI(ChatOpenAI):\n", - " def __init__(self, temperature=0.7, route=\"testing\"):\n", - " javelin_headers = {\"x-api-key\": os.environ[\"JAVELIN_API_KEY\"]}\n", - " super().__init__(\n", - " openai_api_base=f\"https://api-dev.javelin.live/v1/query/{route}\",\n", - " openai_api_key=os.environ[\"OPENAI_API_KEY\"],\n", - " model_name=\"gpt-3.5-turbo\",\n", - " temperature=temperature,\n", - " default_headers=javelin_headers\n", - " )\n", - "\n", - "# ----------------------------------------------------------------------------\n", - "# 3) Javelin Embeddings Model (If Supported)\n", - "# ----------------------------------------------------------------------------\n", - "class JavelinOpenAIEmbeddings(OpenAIEmbeddings):\n", - " def __init__(self, model=\"text-embedding-ada-002\", route=\"embeddings_route\"):\n", - " javelin_headers = {\"x-api-key\": os.environ[\"JAVELIN_API_KEY\"]}\n", - " super().__init__(\n", - " openai_api_base=f\"https://api-dev.javelin.live/v1/query/{route}\",\n", - " openai_api_key=os.environ[\"OPENAI_API_KEY\"],\n", - " model=model,\n", - " default_headers=javelin_headers\n", - " )\n", - "\n", - "# ----------------------------------------------------------------------------\n", - "# 4) Flask App Setup\n", - "# ----------------------------------------------------------------------------\n", - "app = Flask(__name__)\n", - "\n", - "# ----------------------------------------------------------------------------\n", - "# 5) Initialize Javelin LLM & Embeddings\n", - "# ----------------------------------------------------------------------------\n", - "llm = JavelinOpenAI(temperature=0.7, route=\"testing\")\n", - "\n", - "# If Javelin supports embeddings, use this:\n", - "embedding_model_custom = JavelinOpenAIEmbeddings(\n", - " model=\"text-embedding-ada-002\",\n", - " route=\"embeddings_route\"\n", - ")\n", - "\n", - "# If Javelin doesn't support embeddings, fallback to OpenAI:\n", - "# from langchain.embeddings.openai import OpenAIEmbeddings\n", - "# embedding_model_custom = OpenAIEmbeddings(openai_api_key=os.environ[\"OPENAI_API_KEY\"])\n", - "\n", - "vector_store = Chroma(\n", - " persist_directory=\"chroma_store_custom\",\n", - " embedding_function=embedding_model_custom\n", - ")\n", - "\n", - "# ----------------------------------------------------------------------------\n", - "# 6) Sample Text\n", - "# ----------------------------------------------------------------------------\n", - "sample_text = \"\"\"\n", - "Artificial Intelligence (AI) is a rapidly advancing technology ...\n", - "... enabling early intervention.\n", - "\"\"\"\n", - "\n", - "# ----------------------------------------------------------------------------\n", - "# 7) /store Endpoint: Split & Store\n", - "# ----------------------------------------------------------------------------\n", - "@app.route('/store', methods=['POST'])\n", - "def store_document():\n", - " documents = [Document(page_content=sample_text)]\n", - " text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=50)\n", - " split_docs = text_splitter.split_documents(documents)\n", - " vector_store.add_documents(split_docs)\n", - " return jsonify({\"message\": \"Document stored successfully!\"})\n", - "\n", - "# ----------------------------------------------------------------------------\n", - "# 8) /query Endpoint: Similarity Search + GPT\n", - "# ----------------------------------------------------------------------------\n", - "@app.route('/query', methods=['POST'])\n", - "def get_final_response():\n", - " data = request.get_json()\n", - " query = data.get('query', '')\n", - "\n", - " # 1) Retrieve relevant docs\n", - " relevant_docs = vector_store.similarity_search(query, k=2)\n", - " combined_docs = \"\\n\".join(doc.page_content for doc in relevant_docs)\n", - "\n", - " # 2) Build chat messages\n", - " messages = [\n", - " SystemMessage(content=\"You are a helpful scientific assistant.\"),\n", - " HumanMessage(content=f\"Question: {query}\\nDocuments:\\n{combined_docs}\\nAnswer:\")\n", - " ]\n", - "\n", - " # 3) Invoke GPT-3.5-turbo via Javelin\n", - " response = llm.invoke(messages)\n", - "\n", - " # 4) Ensure response is safe to access\n", - " response_text = response.content if hasattr(response, \"content\") else str(response)\n", - "\n", - " return jsonify({\"response\": response_text})\n", - "\n", - "# ----------------------------------------------------------------------------\n", - "# 9) Run App in Background Thread\n", - "# ----------------------------------------------------------------------------\n", - "def run_app():\n", - " run_simple('localhost', 5001, app, use_reloader=False, use_debugger=False)\n", - "\n", - "thread = Thread(target=run_app)\n", - "thread.start()\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## testing the storing" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Store Response: {\"message\":\"Document stored successfully!\"}\n", - "\n" - ] - } - ], - "source": [ - "import requests\n", - "base_url = \"http://localhost:5001\"\n", - "store_resp = requests.post(f\"{base_url}/store\")\n", - "print(\"Store Response:\", store_resp.text)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## testing the retrieving" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Status Code: 200\n", - "Response Text: {\"response\":\"AI has a significant impact on healthcare by enabling early intervention through the rapid advancement of technology. It helps in early detection of diseases, personalized treatment plans, and improved patient outcomes. The ability of AI to analyze large amounts of data quickly and accurately can lead to more precise diagnoses and treatment options, ultimately benefiting both patients and healthcare providers.\"}\n", - "\n", - "Query Response: {'response': 'AI has a significant impact on healthcare by enabling early intervention through the rapid advancement of technology. It helps in early detection of diseases, personalized treatment plans, and improved patient outcomes. The ability of AI to analyze large amounts of data quickly and accurately can lead to more precise diagnoses and treatment options, ultimately benefiting both patients and healthcare providers.'}\n" - ] - } - ], - "source": [ - "query_data = {\"query\": \"What impact does AI have on healthcare?\"}\n", - "resp = requests.post(f\"{base_url}/query\", json=query_data)\n", - "print(\"Status Code:\", resp.status_code)\n", - "print(\"Response Text:\", resp.text)\n", - "print(\"Query Response:\", resp.json())\n" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "import os " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Langgraph for Rag using Re-act agent code using OPen Ai" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
Congratulations! Login successful 🙌 \n",
-       "
\n" - ], - "text/plain": [ - "Congratulations! Login successful 🙌 \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "=== Agent Input ===\n", - "Please generate exactly 5 test queries for my app, and then immediately execute them. App Name: RAG_FOR_AI, Description: My app provides information about documents in a RAG model., System Prompt: Hello, you are a helpful scientific assistant. Based on the provided documents, answer the user's query. Document: Artificial Intelligence (AI) is a rapidly advancing technology that is transforming industries and societies across the globe. In recent years, AI models have revolutionized sectors such as healthcare, automotive, finance, and entertainment. These technologies enable machines to simulate human-like cognitive functions with unprecedented accuracy., Endpoint: http://127.0.0.1:5001/query, Extra Definitions: .\n", - "=== Agent Output ===\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/z_/93w3rhm91913vgvg7hxgnf9r0000gn/T/ipykernel_25346/2113282708.py:96: LangChainDeprecationWarning: The class `LLMChain` was deprecated in LangChain 0.1.17 and will be removed in 1.0. Use :meth:`~RunnableSequence, e.g., `prompt | llm`` instead.\n", - " llm_chain = LLMChain(llm=llm, prompt=prompt)\n", - "/var/folders/z_/93w3rhm91913vgvg7hxgnf9r0000gn/T/ipykernel_25346/2113282708.py:98: LangChainDeprecationWarning: The method `Chain.run` was deprecated in langchain 0.1.0 and will be removed in 1.0. Use :meth:`~invoke` instead.\n", - " generated = llm_chain.run({\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[DEBUG] Generating test queries...\n", - "[DEBUG] Raw LLM output: [\n", - " \"What is Artificial Intelligence?\",\n", - " \"How is AI transforming industries?\",\n", - " \"Which sectors have been revolutionized by AI models?\",\n", - " \"What functions can machines simulate with AI technologies?\",\n", - " \"Can AI accurately simulate human-like cognitive functions?\"\n", - "]\n", - "[DEBUG] Parsed queries: ['What is Artificial Intelligence?', 'How is AI transforming industries?', 'Which sectors have been revolutionized by AI models?', 'What functions can machines simulate with AI technologies?', 'Can AI accurately simulate human-like cognitive functions?']\n", - "[DEBUG] Reading test queries from RAG_FOR_AI_test_cases.csv...\n", - "[DEBUG] Query: What is Artificial Intelligence? -> Response: Artificial Intelligence (AI) is a technology that allows machines to imitate human-like cognitive functions, such as problem-solving, learning, and decision-making, with high accuracy and efficiency. It involves the use of data and advanced algorithms to enable machines to think and act like humans. AI has the potential to transform industries and societies by automating tasks and improving decision-making processes.\n", - "[DEBUG] Query: How is AI transforming industries? -> Response: AI is transforming industries in various ways, including:\n", - "\n", - "1. Automation and efficiency: AI has the ability to automate mundane and repetitive tasks, freeing up human workers to focus on more complex and creative tasks. This leads to increased efficiency and productivity in industries such as manufacturing, logistics, and customer service.\n", - "\n", - "2. Data analysis and decision-making: AI has the ability to process and analyze large amounts of data at a faster pace and with more accuracy than humans. This allows businesses to make data-driven decisions and gain insights that were previously inaccessible.\n", - "\n", - "3. Personalization and customer experience: With the help of AI, businesses can analyze customer data and behavior to personalize their products and services, providing a more tailored and seamless customer experience.\n", - "\n", - "4. Predictive maintenance: AI can analyze data and detect patterns to predict when equipment or machinery may need maintenance, reducing downtime and costs for industries such as manufacturing, transportation, and energy.\n", - "\n", - "5. Healthcare advancements: AI is transforming the healthcare industry by improving diagnosis accuracy, enabling personalized treatments, and streamlining administrative tasks. This has the potential to improve patient outcomes and reduce healthcare costs.\n", - "\n", - "6. Advancements in research and development: AI is being used in various industries, including pharmaceuticals, to accelerate research and development processes, leading to the discovery of new drugs,\n", - "[DEBUG] Query: Which sectors have been revolutionized by AI models? -> Response: Healthcare, automotive, finance, and entertainment are some of the sectors that have been revolutionized by AI models.\n", - "[DEBUG] Query: What functions can machines simulate with AI technologies? -> Response: AI technologies can simulate various functions, including problem-solving, learning, decision-making, and data analysis. They can also assist in tasks such as speech recognition, natural language processing, image and video recognition, and predictive analytics. Additionally, AI technologies can simulate human-like behavior and interactions, such as virtual assistants and chatbots.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Query >>> What is Artificial Intelligence?\n", - "Relevant docs (raw): [Document(id='b97b4ec4-7673-4774-9a35-150c277f3f71', metadata={}, page_content='Artificial Intelligence (AI) is a rapidly advancing technology that is transforming industries and societies across the globe. In recent years, the implementation of AI models has revolutionized'), Document(id='3ff65d1b-bf05-41fd-99e6-7815344307ca', metadata={}, page_content='enable machines to simulate human-like cognitive functions, such as problem-solving, learning, and decision-making, with unprecedented accuracy and efficiency. AI models leverage vast amounts of data')]\n", - "Relevant docs are >>> Artificial Intelligence (AI) is a rapidly advancing technology that is transforming industries and societies across the globe. In recent years, the implementation of AI models has revolutionized\n", - "enable machines to simulate human-like cognitive functions, such as problem-solving, learning, and decision-making, with unprecedented accuracy and efficiency. AI models leverage vast amounts of data\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "127.0.0.1 - - [05/Feb/2025 13:05:48] \"POST /query HTTP/1.1\" 200 -\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Query >>> How is AI transforming industries?\n", - "Relevant docs (raw): [Document(id='5b0b7d97-e28b-4101-b0a5-5990e55887d4', metadata={}, page_content='unimaginable. As AI continues to evolve, it is reshaping the workforce, influencing economic trends, and even altering the way people interact with technology on a day-to-day basis.'), Document(id='b97b4ec4-7673-4774-9a35-150c277f3f71', metadata={}, page_content='Artificial Intelligence (AI) is a rapidly advancing technology that is transforming industries and societies across the globe. In recent years, the implementation of AI models has revolutionized')]\n", - "Relevant docs are >>> unimaginable. As AI continues to evolve, it is reshaping the workforce, influencing economic trends, and even altering the way people interact with technology on a day-to-day basis.\n", - "Artificial Intelligence (AI) is a rapidly advancing technology that is transforming industries and societies across the globe. In recent years, the implementation of AI models has revolutionized\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "127.0.0.1 - - [05/Feb/2025 13:05:51] \"POST /query HTTP/1.1\" 200 -\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Query >>> Which sectors have been revolutionized by AI models?\n", - "Relevant docs (raw): [Document(id='b516b2d1-083c-4db4-853c-550e5ceb57fb', metadata={}, page_content='implementation of AI models has revolutionized sectors such as healthcare, automotive, finance, and entertainment, among many others. These technologies enable machines to simulate human-like'), Document(id='b97b4ec4-7673-4774-9a35-150c277f3f71', metadata={}, page_content='Artificial Intelligence (AI) is a rapidly advancing technology that is transforming industries and societies across the globe. In recent years, the implementation of AI models has revolutionized')]\n", - "Relevant docs are >>> implementation of AI models has revolutionized sectors such as healthcare, automotive, finance, and entertainment, among many others. These technologies enable machines to simulate human-like\n", - "Artificial Intelligence (AI) is a rapidly advancing technology that is transforming industries and societies across the globe. In recent years, the implementation of AI models has revolutionized\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "127.0.0.1 - - [05/Feb/2025 13:05:53] \"POST /query HTTP/1.1\" 200 -\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Query >>> What functions can machines simulate with AI technologies?\n", - "Relevant docs (raw): [Document(id='3ff65d1b-bf05-41fd-99e6-7815344307ca', metadata={}, page_content='enable machines to simulate human-like cognitive functions, such as problem-solving, learning, and decision-making, with unprecedented accuracy and efficiency. AI models leverage vast amounts of data'), Document(id='b516b2d1-083c-4db4-853c-550e5ceb57fb', metadata={}, page_content='implementation of AI models has revolutionized sectors such as healthcare, automotive, finance, and entertainment, among many others. These technologies enable machines to simulate human-like')]\n", - "Relevant docs are >>> enable machines to simulate human-like cognitive functions, such as problem-solving, learning, and decision-making, with unprecedented accuracy and efficiency. AI models leverage vast amounts of data\n", - "implementation of AI models has revolutionized sectors such as healthcare, automotive, finance, and entertainment, among many others. These technologies enable machines to simulate human-like\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "127.0.0.1 - - [05/Feb/2025 13:05:54] \"POST /query HTTP/1.1\" 200 -\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Query >>> Can AI accurately simulate human-like cognitive functions?\n", - "Relevant docs (raw): [Document(id='3ff65d1b-bf05-41fd-99e6-7815344307ca', metadata={}, page_content='enable machines to simulate human-like cognitive functions, such as problem-solving, learning, and decision-making, with unprecedented accuracy and efficiency. AI models leverage vast amounts of data'), Document(id='b516b2d1-083c-4db4-853c-550e5ceb57fb', metadata={}, page_content='implementation of AI models has revolutionized sectors such as healthcare, automotive, finance, and entertainment, among many others. These technologies enable machines to simulate human-like')]\n", - "Relevant docs are >>> enable machines to simulate human-like cognitive functions, such as problem-solving, learning, and decision-making, with unprecedented accuracy and efficiency. AI models leverage vast amounts of data\n", - "implementation of AI models has revolutionized sectors such as healthcare, automotive, finance, and entertainment, among many others. These technologies enable machines to simulate human-like\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "127.0.0.1 - - [05/Feb/2025 13:05:55] \"POST /query HTTP/1.1\" 200 -\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/z_/93w3rhm91913vgvg7hxgnf9r0000gn/T/ipykernel_25346/2113282708.py:211: LangChainDeprecationWarning: The method `BaseChatModel.__call__` was deprecated in langchain-core 0.1.7 and will be removed in 1.0. Use :meth:`~invoke` instead.\n", - " reference_answer = str(reference_llm(ref_prompt))\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[DEBUG] Query: Can AI accurately simulate human-like cognitive functions? -> Response: Yes, AI has the ability to accurately simulate human-like cognitive functions with advanced algorithms and access to vast amounts of data. This has led to significant advancements in various industries and has the potential to continue improving and expanding in the future. However, there are still limitations and challenges in fully replicating the complexity and nuance of human cognition.\n" - ] - }, - { - "data": { - "text/html": [ - "
/Users/dhruvyadav/Desktop/javelin-main/javelin-python/venv/lib/python3.12/site-packages/rich/live.py:231: \n",
-       "UserWarning: install \"ipywidgets\" for Jupyter support\n",
-       "  warnings.warn('install \"ipywidgets\" for Jupyter support')\n",
-       "
\n" - ], - "text/plain": [ - "/Users/dhruvyadav/Desktop/javelin-main/javelin-python/venv/lib/python3.12/site-packages/rich/live.py:231: \n", - "UserWarning: install \"ipywidgets\" for Jupyter support\n", - " warnings.warn('install \"ipywidgets\" for Jupyter support')\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[DEBUG] Reference answer: content='Artificial Intelligence (AI) is a branch of computer science that focuses on creating intelligent machines that can perform tasks that typically require human intelligence. These tasks include learning, reasoning, problem-solving, perception, language understanding, and decision-making.\\n\\nAI systems are designed to mimic human cognitive functions such as learning from experience, adapting to new situations, and understanding natural language. AI technologies can be classified into two main categories: Narrow AI and General AI. Narrow AI, also known as Weak AI, is designed to perform specific tasks, such as speech recognition or image recognition. General AI, also known as Strong AI, is a hypothetical form of AI that can understand, learn, and apply knowledge in a wide range of tasks, similar to human intelligence.\\n\\nAI technologies are used in a variety of applications, including virtual assistants like Siri and Alexa, self-driving cars, medical diagnosis, financial trading, and predictive analytics. Machine learning, a subset of AI, is a key technology that enables computers to learn from data and improve their performance over time without being explicitly programmed.\\n\\nWhile AI has the potential to revolutionize industries and improve efficiency, there are also concerns about the ethical implications of AI, such as bias in algorithms, job displacement, and privacy issues. As AI continues to advance, it is important for researchers, policymakers, and society as a whole to consider the implications and ensure that AI technologies are developed and used responsibly.' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 282, 'prompt_tokens': 19, 'total_tokens': 301, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-5ee5352c-644a-4164-b6d5-cc10882dd237-0' usage_metadata={'input_tokens': 19, 'output_tokens': 282, 'total_tokens': 301, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}\n" - ] - }, - { - "data": { - "text/html": [ - "
\n"
-      ],
-      "text/plain": []
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "
/Users/dhruvyadav/Desktop/javelin-main/javelin-python/venv/lib/python3.12/site-packages/rich/live.py:231: \n",
-       "UserWarning: install \"ipywidgets\" for Jupyter support\n",
-       "  warnings.warn('install \"ipywidgets\" for Jupyter support')\n",
-       "
\n" - ], - "text/plain": [ - "/Users/dhruvyadav/Desktop/javelin-main/javelin-python/venv/lib/python3.12/site-packages/rich/live.py:231: \n", - "UserWarning: install \"ipywidgets\" for Jupyter support\n", - " warnings.warn('install \"ipywidgets\" for Jupyter support')\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[DEBUG] Reference answer: content='Artificial Intelligence (AI) is transforming industries in a multitude of ways, revolutionizing the way businesses operate and making processes more efficient and effective. Some of the key ways in which AI is transforming industries include:\\n\\n1. Automation: AI is enabling automation of repetitive tasks and processes, freeing up human workers to focus on more strategic and creative tasks. This is particularly evident in industries such as manufacturing, where AI-powered robots are being used to perform tasks that are dangerous or monotonous for humans.\\n\\n2. Data analysis: AI is able to analyze vast amounts of data at a speed and accuracy that is beyond human capability. This is enabling businesses to gain valuable insights from their data, leading to better decision-making and improved efficiency. Industries such as finance, healthcare, and marketing are using AI to analyze data and make predictions that were previously impossible.\\n\\n3. Personalization: AI is enabling businesses to personalize their products and services to individual customers, based on their preferences and behavior. This is particularly evident in industries such as retail and e-commerce, where AI-powered recommendation engines are being used to suggest products to customers based on their past purchases and browsing history.\\n\\n4. Customer service: AI-powered chatbots and virtual assistants are transforming customer service in industries such as retail, banking, and healthcare. These AI-powered tools are able to provide instant responses to customer queries, improving customer satisfaction and reducing the need for human customer service agents.\\n\\n5. Predictive maintenance: AI is being used in industries such as manufacturing and transportation to predict when equipment is likely to fail, enabling businesses to perform maintenance before a breakdown occurs. This is helping to reduce downtime and maintenance costs, leading to increased efficiency and productivity.\\n\\nOverall, AI is transforming industries by enabling automation, improving data analysis, personalizing products and services, enhancing customer service, and enabling predictive maintenance. Businesses that embrace AI are likely to gain a competitive advantage in their industry and drive innovation and growth.' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 386, 'prompt_tokens': 20, 'total_tokens': 406, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-50b52856-a7ff-4786-aec9-5b570a0a1c16-0' usage_metadata={'input_tokens': 20, 'output_tokens': 386, 'total_tokens': 406, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}\n" - ] - }, - { - "data": { - "text/html": [ - "
\n"
-      ],
-      "text/plain": []
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "
/Users/dhruvyadav/Desktop/javelin-main/javelin-python/venv/lib/python3.12/site-packages/rich/live.py:231: \n",
-       "UserWarning: install \"ipywidgets\" for Jupyter support\n",
-       "  warnings.warn('install \"ipywidgets\" for Jupyter support')\n",
-       "
\n" - ], - "text/plain": [ - "/Users/dhruvyadav/Desktop/javelin-main/javelin-python/venv/lib/python3.12/site-packages/rich/live.py:231: \n", - "UserWarning: install \"ipywidgets\" for Jupyter support\n", - " warnings.warn('install \"ipywidgets\" for Jupyter support')\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[DEBUG] Reference answer: content='AI models have revolutionized a wide range of sectors across industries, transforming the way businesses operate and improving efficiency, productivity, and decision-making processes. Some of the sectors that have been significantly impacted by AI models include:\\n\\n1. Healthcare: AI models have been used to analyze medical images, diagnose diseases, predict patient outcomes, and personalize treatment plans. AI-powered tools have also been developed to improve patient care, streamline administrative tasks, and enhance drug discovery processes.\\n\\n2. Finance: AI models have been used in the finance sector to detect fraud, automate trading, predict market trends, and personalize customer experiences. AI-powered chatbots and virtual assistants have also been deployed to provide customer support and financial advice.\\n\\n3. Retail: AI models have transformed the retail sector by enabling personalized recommendations, optimizing pricing strategies, forecasting demand, and improving inventory management. AI-powered tools have also been used to enhance the shopping experience, automate customer service, and streamline supply chain operations.\\n\\n4. Manufacturing: AI models have revolutionized the manufacturing sector by enabling predictive maintenance, optimizing production processes, improving quality control, and reducing downtime. AI-powered robots and autonomous systems have also been deployed to automate tasks and enhance efficiency in manufacturing operations.\\n\\n5. Transportation: AI models have been used in the transportation sector to optimize route planning, predict traffic patterns, improve safety, and enhance customer experiences. AI-powered autonomous vehicles have also been developed to revolutionize the way people and goods are transported.\\n\\n6. Agriculture: AI models have transformed the agriculture sector by enabling precision farming, monitoring crop health, predicting yields, and optimizing resource usage. AI-powered drones and sensors have also been deployed to automate tasks and improve productivity in agricultural operations.\\n\\nOverall, AI models have had a profound impact on various sectors, driving innovation, improving decision-making processes, and transforming the way businesses operate in the digital age. As AI technology continues to advance, we can expect to see even more sectors being revolutionized by AI models in the future.' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 393, 'prompt_tokens': 24, 'total_tokens': 417, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-9a556f0c-f561-4965-8099-5b013f270cd0-0' usage_metadata={'input_tokens': 24, 'output_tokens': 393, 'total_tokens': 417, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}\n" - ] - }, - { - "data": { - "text/html": [ - "
\n"
-      ],
-      "text/plain": []
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "
/Users/dhruvyadav/Desktop/javelin-main/javelin-python/venv/lib/python3.12/site-packages/rich/live.py:231: \n",
-       "UserWarning: install \"ipywidgets\" for Jupyter support\n",
-       "  warnings.warn('install \"ipywidgets\" for Jupyter support')\n",
-       "
\n" - ], - "text/plain": [ - "/Users/dhruvyadav/Desktop/javelin-main/javelin-python/venv/lib/python3.12/site-packages/rich/live.py:231: \n", - "UserWarning: install \"ipywidgets\" for Jupyter support\n", - " warnings.warn('install \"ipywidgets\" for Jupyter support')\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[DEBUG] Reference answer: content='Machines can simulate a wide range of functions using AI technologies. Some of the key functions that machines can simulate with AI include:\\n\\n1. Pattern recognition: Machines can be trained to recognize patterns in data, images, and text. This can be used in various applications such as facial recognition, handwriting recognition, and speech recognition.\\n\\n2. Natural language processing: Machines can be programmed to understand and generate human language. This can be used in applications such as chatbots, language translation, and sentiment analysis.\\n\\n3. Predictive analytics: Machines can analyze large amounts of data to make predictions about future events. This can be used in applications such as forecasting sales, predicting customer behavior, and identifying potential risks.\\n\\n4. Image and video analysis: Machines can analyze images and videos to identify objects, people, and activities. This can be used in applications such as surveillance, medical imaging, and autonomous vehicles.\\n\\n5. Autonomous decision-making: Machines can be programmed to make decisions based on predefined rules or algorithms. This can be used in applications such as self-driving cars, automated trading systems, and recommendation engines.\\n\\n6. Cognitive computing: Machines can simulate human cognitive functions such as learning, reasoning, and problem-solving. This can be used in applications such as personalized learning systems, medical diagnosis, and fraud detection.\\n\\nOverall, machines can simulate a wide range of functions with AI technologies, making them capable of performing tasks that were previously thought to be exclusive to humans. This has led to the development of innovative solutions in various industries and has the potential to revolutionize the way we live and work in the future.' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 319, 'prompt_tokens': 23, 'total_tokens': 342, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-725cce89-bdf7-41c8-bb21-a20d2493d744-0' usage_metadata={'input_tokens': 23, 'output_tokens': 319, 'total_tokens': 342, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}\n" - ] - }, - { - "data": { - "text/html": [ - "
\n"
-      ],
-      "text/plain": []
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "
/Users/dhruvyadav/Desktop/javelin-main/javelin-python/venv/lib/python3.12/site-packages/rich/live.py:231: \n",
-       "UserWarning: install \"ipywidgets\" for Jupyter support\n",
-       "  warnings.warn('install \"ipywidgets\" for Jupyter support')\n",
-       "
\n" - ], - "text/plain": [ - "/Users/dhruvyadav/Desktop/javelin-main/javelin-python/venv/lib/python3.12/site-packages/rich/live.py:231: \n", - "UserWarning: install \"ipywidgets\" for Jupyter support\n", - " warnings.warn('install \"ipywidgets\" for Jupyter support')\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[DEBUG] Reference answer: content=\"AI has made significant advancements in simulating human-like cognitive functions, but it is still not able to accurately replicate all aspects of human cognition. \\n\\nAI systems are able to perform tasks such as natural language processing, image recognition, and decision-making with a high level of accuracy. These systems use algorithms and machine learning techniques to analyze data and make predictions or decisions based on that data. They can learn from experience and improve their performance over time.\\n\\nHowever, there are still limitations to AI's ability to simulate human-like cognitive functions. One of the main challenges is in understanding and replicating the complex and nuanced ways in which humans think and reason. Human cognition is influenced by emotions, intuition, creativity, and social interactions, which are difficult for AI systems to fully grasp.\\n\\nAdditionally, AI systems lack the ability to truly understand context, make connections between different pieces of information, and exhibit common sense reasoning. While they can perform specific tasks with high accuracy, they often lack the broader understanding and flexibility that humans possess.\\n\\nIn conclusion, while AI has made significant progress in simulating human-like cognitive functions, it is still not able to accurately replicate all aspects of human cognition. There is ongoing research and development in the field of AI to improve its capabilities and bridge the gap between artificial and human intelligence.\" additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 258, 'prompt_tokens': 23, 'total_tokens': 281, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-6a173bf4-9616-4577-a743-e9dbd809dd06-0' usage_metadata={'input_tokens': 23, 'output_tokens': 258, 'total_tokens': 281, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}\n" - ] - }, - { - "data": { - "text/html": [ - "
\n"
-      ],
-      "text/plain": []
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Evaluation Result:\n",
-      "Evaluation complete. Avg Score: 1.00. Results stored in RAG_FOR_AI_test_cases.csv.\n",
-      "Final State of Agent Returns >>> Test queries have been generated and executed successfully for the app \"RAG_FOR_AI\".\n"
-     ]
-    }
-   ],
-   "source": [
-    "import os\n",
-    "import json\n",
-    "import requests\n",
-    "import pandas as pd\n",
-    "from pydantic import BaseModel\n",
-    "\n",
-    "from langchain_openai import ChatOpenAI\n",
-    "from langchain.prompts import PromptTemplate\n",
-    "from langchain.chains import LLMChain\n",
-    "\n",
-    "from langgraph.prebuilt import create_react_agent\n",
-    "from langgraph.checkpoint.memory import MemorySaver\n",
-    "from langchain_core.tools import tool\n",
-    "\n",
-    "# Import DeepEval modules\n",
-    "from deepeval import login_with_confident_api_key, evaluate  # evaluate helper\n",
-    "from deepeval.metrics import AnswerRelevancyMetric\n",
-    "from deepeval.test_case import LLMTestCase\n",
-    "\n",
-    "\n",
-    "# -----------------------------\n",
-    "# Configure API Keys and Log In\n",
-    "# -----------------------------\n",
-    "login_with_confident_api_key(os.environ[\"DEEPEVAL_API_KEY\"])\n",
-    "\n",
-    "# -----------------------------\n",
-    "# Define AppParams model\n",
-    "# -----------------------------\n",
-    "class AppParams(BaseModel):\n",
-    "    app_name: str\n",
-    "    description: str\n",
-    "    system_prompt: str\n",
-    "    endpoint: str\n",
-    "    extra_definition: str\n",
-    "    k: int\n",
-    "\n",
-    "# -----------------------------\n",
-    "# Utility: Custom Parsing Function\n",
-    "# -----------------------------\n",
-    "def custom_parsing_for_rag(response_text: str) -> str:\n",
-    "    \"\"\"\n",
-    "    Parse the response from the RAG endpoint (expected as JSON).\n",
-    "    Return the value associated with \"answer\" if available; if not, then \"response\".\n",
-    "    Otherwise, return the original text.\n",
-    "    \"\"\"\n",
-    "    try:\n",
-    "        data = json.loads(response_text)\n",
-    "        return data.get(\"answer\") or data.get(\"response\") or response_text\n",
-    "    except Exception:\n",
-    "        return response_text\n",
-    "\n",
-    "# -----------------------------\n",
-    "# Tool: Generate Test Cases\n",
-    "# -----------------------------\n",
-    "@tool\n",
-    "def generate_test_cases(app_name: str, description: str, system_prompt: str,\n",
-    "                        endpoint: str, extra_definition: str, k: int) -> dict:\n",
-    "    \"\"\"\n",
-    "    Name: generate_test_cases\n",
-    "    Description: Generate exactly k plain test queries for the specified app details using an LLM.\n",
-    "    Input Arguments:\n",
-    "        - app_name (str): The name of the application.\n",
-    "        - description (str): A brief description of the app.\n",
-    "        - system_prompt (str): The system prompt or instructions for generating queries.\n",
-    "        - endpoint (str): The API endpoint to be tested.\n",
-    "        - extra_definition (str): Any extra definitions or clarifications needed.\n",
-    "        - k (int): The number of test queries to generate.\n",
-    "    Output:\n",
-    "        - A dictionary with {\"result\": \"\"}.\n",
-    "\n",
-    "    How it works:\n",
-    "    1) Calls an LLM to generate exactly k queries in JSON array format.\n",
-    "    2) Saves queries in CSV with columns \"Test_Cases\" (the query) and \"Response\" (empty).\n",
-    "    \"\"\"\n",
-    "    llm = ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0.7)\n",
-    "\n",
-    "    prompt_template = \"\"\"\n",
-    "    Application Name: {app_name}\n",
-    "    Description: {description}\n",
-    "    System Prompt: {system_prompt}\n",
-    "    Extra Definitions: {extra_definition}\n",
-    "    Endpoint: {endpoint}\n",
-    "    Number of Test Queries: {k}\n",
-    "\n",
-    "    Based on the above details, generate exactly {k} test queries for automated testing.\n",
-    "    Each test query should be a plain text string that represents a query to test the endpoint.\n",
-    "    Do not include any extra information, explanations, or expected output.\n",
-    "    Return the result strictly as a JSON array of strings, for example:\n",
-    "    [\"Query 1\", \"Query 2\", ...]\n",
-    "    \"\"\"\n",
-    "    prompt = PromptTemplate(\n",
-    "        template=prompt_template,\n",
-    "        input_variables=[\"app_name\", \"description\", \"system_prompt\", \"extra_definition\", \"endpoint\", \"k\"]\n",
-    "    )\n",
-    "\n",
-    "    llm_chain = LLMChain(llm=llm, prompt=prompt)\n",
-    "    print(\"[DEBUG] Generating test queries...\")\n",
-    "    generated = llm_chain.run({\n",
-    "        \"app_name\": app_name,\n",
-    "        \"description\": description,\n",
-    "        \"system_prompt\": system_prompt,\n",
-    "        \"extra_definition\": extra_definition,\n",
-    "        \"endpoint\": endpoint,\n",
-    "        \"k\": k\n",
-    "    })\n",
-    "    print(\"[DEBUG] Raw LLM output:\", generated)\n",
-    "\n",
-    "    # Attempt to parse as JSON\n",
-    "    try:\n",
-    "        queries = json.loads(generated)\n",
-    "        if not isinstance(queries, list):\n",
-    "            raise ValueError(\"JSON output is not a list.\")\n",
-    "    except Exception as e:\n",
-    "        print(\"[ERROR] Parsing JSON failed. Splitting by newlines. Error:\", e)\n",
-    "        queries = [q.strip() for q in generated.split(\"\\n\") if q.strip()]\n",
-    "\n",
-    "    print(\"[DEBUG] Parsed queries:\", queries)\n",
-    "    df = pd.DataFrame({\"Test_Cases\": queries, \"Response\": [\"\"] * len(queries)})\n",
-    "    csv_file = f\"{app_name}_test_cases.csv\"\n",
-    "    df.to_csv(csv_file, index=False)\n",
-    "    return {\"result\": f\"Test queries saved in {csv_file}.\"}\n",
-    "\n",
-    "# -----------------------------\n",
-    "# Tool: Execute Test Cases\n",
-    "# -----------------------------\n",
-    "@tool\n",
-    "def execute_test_cases(app_name: str, endpoint: str, extra_headers: dict = None) -> dict:\n",
-    "    \"\"\"\n",
-    "    Name: execute_test_cases\n",
-    "    Description: Execute existing test queries for the specified app by sending them to the given endpoint.\n",
-    "    Input Arguments:\n",
-    "        - app_name (str): The name of the application.\n",
-    "        - endpoint (str): The API endpoint to be tested.\n",
-    "        - extra_headers (dict, optional): Additional HTTP headers for requests.\n",
-    "    Output:\n",
-    "        - A dictionary with {\"result\": \"\"}.\n",
-    "\n",
-    "    How it works:\n",
-    "    1) Reads the CSV file named \"_test_cases.csv\".\n",
-    "    2) POSTs each query (from \"Test_Cases\" column) to 'endpoint' (JSON payload: {\"query\": }).\n",
-    "    3) Parses the JSON response if possible, or returns the raw text otherwise.\n",
-    "    4) Saves the response to the same CSV (under \"Response\" column).\n",
-    "    5) Calls evaluate_test_cases to measure quality via DeepEval and prints the evaluation result.\n",
-    "    \"\"\"\n",
-    "    csv_file = f\"{app_name}_test_cases.csv\"\n",
-    "    print(f\"[DEBUG] Reading test queries from {csv_file}...\")\n",
-    "    try:\n",
-    "        df = pd.read_csv(csv_file)\n",
-    "    except Exception as e:\n",
-    "        return {\"result\": f\"Could not read CSV {csv_file}: {str(e)}\"}\n",
-    "    \n",
-    "    headers = extra_headers if extra_headers is not None else {}\n",
-    "    responses = []\n",
-    "    for query in df[\"Test_Cases\"]:\n",
-    "        try:\n",
-    "            res = requests.post(endpoint, json={\"query\": query}, headers=headers)\n",
-    "            if res.status_code == 200:\n",
-    "                text = res.text\n",
-    "            else:\n",
-    "                text = f\"Error {res.status_code}: {res.text}\"\n",
-    "        except Exception as e:\n",
-    "            text = f\"Request failed: {str(e)}\"\n",
-    "        parsed = custom_parsing_for_rag(text)\n",
-    "        responses.append(parsed)\n",
-    "        print(f\"[DEBUG] Query: {query} -> Response: {parsed}\")\n",
-    "\n",
-    "    df[\"Response\"] = responses\n",
-    "    df.to_csv(csv_file, index=False)\n",
-    "\n",
-    "    # Evaluate after execution\n",
-    "    eval_result = evaluate_test_cases(app_name)\n",
-    "    print(\"Evaluation Result:\")\n",
-    "    print(eval_result[\"result\"])\n",
-    "\n",
-    "    return {\"result\": f\"Executed queries; responses saved in {csv_file}.\"}\n",
-    "\n",
-    "# -----------------------------\n",
-    "# Function: Evaluate Test Cases (Direct Function Call)\n",
-    "# -----------------------------\n",
-    "def evaluate_test_cases(app_name: str) -> dict:\n",
-    "    \"\"\"\n",
-    "    For each test case in the CSV, generate a reference answer using a reference LLM,\n",
-    "    evaluate the actual output against the reference using DeepEval,\n",
-    "    and save the per-test score and reason in new CSV columns.\n",
-    "\n",
-    "    This function is called directly (not as a tool) to avoid recursion issues.\n",
-    "    \"\"\"\n",
-    "    csv_file = f\"{app_name}_test_cases.csv\"\n",
-    "    marker_file = f\"{app_name}_evaluation.marker\"\n",
-    "\n",
-    "    if os.path.exists(marker_file):\n",
-    "        return {\"result\": \"Evaluation already completed; skipping re-evaluation.\"}\n",
-    "    \n",
-    "    try:\n",
-    "        df = pd.read_csv(csv_file)\n",
-    "    except Exception as e:\n",
-    "        return {\"result\": f\"Could not read CSV {csv_file}: {str(e)}\"}\n",
-    "    \n",
-    "    scores = []\n",
-    "    reasons = []\n",
-    "    reference_llm = ChatOpenAI(\n",
-    "        openai_api_key=os.environ[\"OPENAI_API_KEY\"],\n",
-    "        model=\"gpt-3.5-turbo\",\n",
-    "        temperature=0\n",
-    "    )\n",
-    "    \n",
-    "    for idx, row in df.iterrows():\n",
-    "        query = row[\"Test_Cases\"]\n",
-    "        rag_answer = row[\"Response\"]\n",
-    "        ref_prompt = f\"Answer the following query in detail: {query}\"\n",
-    "        reference_answer = str(reference_llm(ref_prompt))\n",
-    "        print(\"[DEBUG] Reference answer:\", reference_answer)\n",
-    "\n",
-    "        test_case = LLMTestCase(\n",
-    "            input=query,\n",
-    "            actual_output=rag_answer,\n",
-    "            retrieval_context=[reference_answer]\n",
-    "        )\n",
-    "        metric = AnswerRelevancyMetric(threshold=0.7)\n",
-    "        metric.measure(test_case)\n",
-    "        scores.append(metric.score)\n",
-    "        reasons.append(metric.reason)\n",
-    "    \n",
-    "    df[\"Answer_Score\"] = scores\n",
-    "    df[\"Answer_Reason\"] = reasons\n",
-    "    df.to_csv(csv_file, index=False)\n",
-    "    with open(marker_file, \"w\") as f:\n",
-    "        f.write(\"Evaluation complete.\")\n",
-    "\n",
-    "    avg_score = sum(scores) / len(scores) if scores else 0\n",
-    "    return {\"result\": f\"Evaluation complete. Avg Score: {avg_score:.2f}. Results stored in {csv_file}.\"}\n",
-    "\n",
-    "# -----------------------------\n",
-    "# Agent Setup\n",
-    "# -----------------------------\n",
-    "react_instructions = \"\"\"\n",
-    "You are a test-case generation and execution agent. \n",
-    "You have the following tools available:\n",
-    "\n",
-    "1) generate_test_cases\n",
-    "   - Accepts parameters: app_name (str), description (str), system_prompt (str), endpoint (str), extra_definition (str), k (int)\n",
-    "   - Use it to generate test queries and save them in a CSV.\n",
-    "\n",
-    "2) execute_test_cases\n",
-    "   - Accepts parameters: app_name (str), endpoint (str), extra_headers (dict, optional)\n",
-    "   - Use it to execute queries from the CSV and evaluate them.\n",
-    "\n",
-    "Guidance:\n",
-    "- If the user wants you to create new test queries, call generate_test_cases.\n",
-    "- If the user wants you to run or execute queries, call execute_test_cases.\n",
-    "- Return short, direct answers once the tool is done.\n",
-    "- Only call the tools if relevant.\n",
-    "- Provide the final result to the user.\n",
-    "\"\"\"\n",
-    "\n",
-    "tools = [generate_test_cases, execute_test_cases]\n",
-    "model = ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0)\n",
-    "checkpointer = MemorySaver()\n",
-    "\n",
-    "# Instead of .run(...) we do .invoke(...)\n",
-    "agent = create_react_agent(\n",
-    "    model=model,\n",
-    "    tools=tools,\n",
-    "    prompt=react_instructions,\n",
-    "    checkpointer=checkpointer\n",
-    ")\n",
-    "\n",
-    "# -----------------------------\n",
-    "# Main\n",
-    "# -----------------------------\n",
-    "if __name__ == \"__main__\":\n",
-    "    params = AppParams(\n",
-    "        app_name=\"RAG_FOR_AI\",\n",
-    "        description=\"My app provides information about documents in a RAG model.\",\n",
-    "        system_prompt=(\n",
-    "            \"Hello, you are a helpful scientific assistant. Based on the provided documents, answer the user's query. \"\n",
-    "            \"Document: Artificial Intelligence (AI) is a rapidly advancing technology that is transforming industries \"\n",
-    "            \"and societies across the globe. In recent years, AI models have revolutionized sectors such as healthcare, \"\n",
-    "            \"automotive, finance, and entertainment. These technologies enable machines to simulate human-like cognitive \"\n",
-    "            \"functions with unprecedented accuracy.\"\n",
-    "        ),\n",
-    "        endpoint=\"http://127.0.0.1:5001/query\",\n",
-    "        extra_definition=\"\",\n",
-    "        k=5\n",
-    "    )\n",
-    "\n",
-    "    # Single user instruction: generate & execute\n",
-    "    single_message = (\n",
-    "        f\"Please generate exactly {params.k} test queries for my app, and then immediately execute them. \"\n",
-    "        f\"App Name: {params.app_name}, Description: {params.description}, \"\n",
-    "        f\"System Prompt: {params.system_prompt}, Endpoint: {params.endpoint}, \"\n",
-    "        f\"Extra Definitions: {params.extra_definition}.\"\n",
-    "    )\n",
-    "\n",
-    "    conversation = [{\"role\": \"user\", \"content\": single_message}]\n",
-    "\n",
-    "    print(\"=== Agent Input ===\")\n",
-    "    print(single_message)\n",
-    "    print(\"=== Agent Output ===\")\n",
-    "\n",
-    "    # Use .invoke(...) to process the conversation\n",
-    "    final_state = agent.invoke({\"messages\": conversation}, config={\"configurable\": {\"thread_id\": 1}})\n",
-    "\n",
-    "    # The final reply from the agent\n",
-    "    print(\"Final State of Agent Returns >>>\",final_state[\"messages\"][-1].content)\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Langgraph agent using Rea-Act --> Javelin Rout"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from langchain.chat_models import ChatOpenAI\n",
-    "\n",
-    "def get_javelin_llm(\n",
-    "    model_name: str = \"gpt-3.5-turbo\",\n",
-    "    temperature: float = 0.7\n",
-    ") -> ChatOpenAI:\n",
-    "    \"\"\"\n",
-    "    Returns a ChatOpenAI instance that routes requests to the Javelin endpoint.\n",
-    "    \"\"\"\n",
-    "\n",
-    "    # Javelin headers\n",
-    "    javelin_headers = {\n",
-    "        \"x-api-key\": javelin_api_key,\n",
-    "    }\n",
-    "\n",
-    "    # Put headers under model_kwargs to avoid the warning\n",
-    "    return ChatOpenAI(\n",
-    "        model_name=model_name,\n",
-    "        temperature=temperature,\n",
-    "        openai_api_key=llm_api_key,\n",
-    "        openai_api_base=\"https://api-dev.javelin.live/v1/query/testing\",\n",
-    "        request_timeout=180, \n",
-    "        default_headers = javelin_headers\n",
-    "    )\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "
Congratulations! Login successful 🙌 \n",
-       "
\n" - ], - "text/plain": [ - "Congratulations! Login successful 🙌 \n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "=== Agent Input ===\n", - "Please generate exactly 5 test queries for my app, and then immediately execute them. App Name: RAG_FOR_AI, Description: My app provides information about documents in a RAG model., System Prompt: Hello, you are a helpful scientific assistant. Based on the provided documents, answer the user's query. Document: Artificial Intelligence (AI) is a rapidly advancing technology that is transforming industries and societies across the globe. In recent years, AI models have revolutionized sectors such as healthcare, automotive, finance, and entertainment. These technologies enable machines to simulate human-like cognitive functions with unprecedented accuracy., Endpoint: http://127.0.0.1:5001/query, Extra Definitions: .\n", - "=== Agent Output ===\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/z_/93w3rhm91913vgvg7hxgnf9r0000gn/T/ipykernel_39136/899068372.py:96: LangChainDeprecationWarning: The class `LLMChain` was deprecated in LangChain 0.1.17 and will be removed in 1.0. Use :meth:`~RunnableSequence, e.g., `prompt | llm`` instead.\n", - " llm_chain = LLMChain(llm=llm, prompt=prompt)\n", - "/var/folders/z_/93w3rhm91913vgvg7hxgnf9r0000gn/T/ipykernel_39136/899068372.py:98: LangChainDeprecationWarning: The method `Chain.run` was deprecated in langchain 0.1.0 and will be removed in 1.0. Use :meth:`~invoke` instead.\n", - " generated = llm_chain.run({\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[DEBUG] Generating test queries...\n", - "[DEBUG] Raw LLM output: [\n", - " \"What technology is rapidly advancing and transforming industries and societies globally?\",\n", - " \"Which sectors have been revolutionized by AI models in recent years?\",\n", - " \"What do AI technologies enable machines to do with unprecedented accuracy?\",\n", - " \"What is the main focus of Artificial Intelligence (AI)?\",\n", - " \"How have AI models impacted the healthcare sector?\"\n", - "]\n", - "[DEBUG] Parsed queries: ['What technology is rapidly advancing and transforming industries and societies globally?', 'Which sectors have been revolutionized by AI models in recent years?', 'What do AI technologies enable machines to do with unprecedented accuracy?', 'What is the main focus of Artificial Intelligence (AI)?', 'How have AI models impacted the healthcare sector?']\n", - "[DEBUG] Reading test queries from RAG_FOR_AI_test_cases.csv...\n", - "[DEBUG] Query: What technology is rapidly advancing and transforming industries and societies globally? -> Response: Artificial Intelligence (AI) is the rapidly advancing technology that is transforming industries and societies globally. Its implementation has revolutionized various sectors and continues to evolve, reshaping the workforce, influencing economic trends, and changing the way people interact with technology.\n", - "[DEBUG] Query: Which sectors have been revolutionized by AI models in recent years? -> Response: AI models have revolutionized sectors such as healthcare, automotive, finance, and entertainment, among many others.\n", - "[DEBUG] Query: What do AI technologies enable machines to do with unprecedented accuracy? -> Response: AI technologies enable machines to simulate human-like cognitive functions, such as problem-solving, learning, and decision-making, with unprecedented accuracy and efficiency.\n", - "[DEBUG] Query: What is the main focus of Artificial Intelligence (AI)? -> Response: The main focus of Artificial Intelligence (AI) is to enable early intervention through its rapidly advancing technology.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/var/folders/z_/93w3rhm91913vgvg7hxgnf9r0000gn/T/ipykernel_39136/899068372.py:207: LangChainDeprecationWarning: The method `BaseChatModel.__call__` was deprecated in langchain-core 0.1.7 and will be removed in 1.0. Use :meth:`~invoke` instead.\n", - " reference_answer = str(reference_llm(ref_prompt))\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[DEBUG] Query: How have AI models impacted the healthcare sector? -> Response: AI models have had a significant impact on the healthcare sector by revolutionizing patient care, diagnosis, and treatment. Machine learning algorithms are being utilized to analyze vast amounts of medical data, leading to more accurate and timely diagnoses. Additionally, AI models are helping healthcare professionals in making informed decisions, personalizing treatment plans, and improving overall patient outcomes. This technology has the potential to enhance efficiency, reduce errors, and ultimately transform the way healthcare is delivered.\n" - ] - }, - { - "data": { - "text/html": [ - "
/Users/dhruvyadav/Desktop/javelin-main/javelin-python/venv/lib/python3.12/site-packages/rich/live.py:231: \n",
-       "UserWarning: install \"ipywidgets\" for Jupyter support\n",
-       "  warnings.warn('install \"ipywidgets\" for Jupyter support')\n",
-       "
\n" - ], - "text/plain": [ - "/Users/dhruvyadav/Desktop/javelin-main/javelin-python/venv/lib/python3.12/site-packages/rich/live.py:231: \n", - "UserWarning: install \"ipywidgets\" for Jupyter support\n", - " warnings.warn('install \"ipywidgets\" for Jupyter support')\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[DEBUG] Reference answer: content='One technology that is rapidly advancing and transforming industries and societies globally is artificial intelligence (AI). AI refers to the development of computer systems that can perform tasks that typically require human intelligence, such as visual perception, speech recognition, decision-making, and language translation.\\n\\nAI is being integrated into a wide range of industries, including healthcare, finance, transportation, manufacturing, and retail. In healthcare, AI is being used to analyze medical images, predict patient outcomes, and improve diagnostics. In finance, AI is being used to detect fraud, automate trading, and personalize customer experiences. In transportation, AI is being used to optimize routes, improve safety, and develop autonomous vehicles. In manufacturing, AI is being used to optimize production processes, improve quality control, and predict equipment failures. In retail, AI is being used to personalize marketing, optimize inventory, and enhance the shopping experience.\\n\\nAI is also transforming societies globally by influencing how we work, communicate, and interact with technology. AI is automating routine tasks, creating new job opportunities, and changing the skills required in the workforce. AI is also enabling new ways of communication, such as virtual assistants and chatbots, and is helping to bridge language barriers through real-time translation. Additionally, AI is influencing how we interact with technology through voice recognition, facial recognition, and gesture control.\\n\\nOverall, AI is rapidly advancing and transforming industries and societies globally by improving efficiency, driving innovation, and creating new opportunities for growth and development. As AI continues to evolve, its impact is expected to become even more profound, shaping the future of work, communication, and technology in ways we have yet to fully imagine.' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 327, 'prompt_tokens': 26, 'total_tokens': 353, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-042505b0-2d02-49d9-8abf-205bc8a786b0-0' usage_metadata={'input_tokens': 26, 'output_tokens': 327, 'total_tokens': 353, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}\n" - ] - }, - { - "data": { - "text/html": [ - "
\n"
-      ],
-      "text/plain": []
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "
/Users/dhruvyadav/Desktop/javelin-main/javelin-python/venv/lib/python3.12/site-packages/rich/live.py:231: \n",
-       "UserWarning: install \"ipywidgets\" for Jupyter support\n",
-       "  warnings.warn('install \"ipywidgets\" for Jupyter support')\n",
-       "
\n" - ], - "text/plain": [ - "/Users/dhruvyadav/Desktop/javelin-main/javelin-python/venv/lib/python3.12/site-packages/rich/live.py:231: \n", - "UserWarning: install \"ipywidgets\" for Jupyter support\n", - " warnings.warn('install \"ipywidgets\" for Jupyter support')\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[DEBUG] Reference answer: content='Artificial Intelligence (AI) has revolutionized numerous sectors in recent years, transforming the way businesses operate and improving efficiency and productivity. Some of the key sectors that have been significantly impacted by AI models include:\\n\\n1. Healthcare: AI has revolutionized the healthcare sector by enabling the development of advanced diagnostic tools, personalized treatment plans, and predictive analytics for disease prevention. AI-powered applications can analyze medical images, genetic data, and patient records to assist healthcare professionals in making accurate diagnoses and treatment decisions.\\n\\n2. Finance: AI has transformed the finance sector by automating tasks such as fraud detection, risk assessment, and investment analysis. AI models can process vast amounts of financial data in real-time, enabling financial institutions to make faster and more informed decisions. Robo-advisors powered by AI algorithms have also become increasingly popular for providing personalized investment advice to clients.\\n\\n3. Retail: AI has revolutionized the retail sector by enabling personalized shopping experiences, efficient inventory management, and targeted marketing campaigns. AI-powered recommendation engines analyze customer data to suggest products tailored to individual preferences, increasing sales and customer satisfaction. Retailers also use AI models for demand forecasting, pricing optimization, and supply chain management.\\n\\n4. Manufacturing: AI has transformed the manufacturing sector by enabling predictive maintenance, quality control, and process optimization. AI models analyze sensor data from machinery to predict equipment failures before they occur, reducing downtime and maintenance costs. AI-powered robots and autonomous vehicles have also revolutionized production processes, increasing efficiency and precision.\\n\\n5. Transportation: AI has revolutionized the transportation sector by enabling autonomous vehicles, route optimization, and traffic management. AI models power self-driving cars, trucks, and drones, which are poised to revolutionize the way people and goods are transported. AI algorithms also analyze traffic data to optimize routes, reduce congestion, and improve safety on the roads.\\n\\nOverall, AI models have revolutionized a wide range of sectors in recent years, providing new opportunities for innovation and growth. As AI technology continues to advance, we can expect even more sectors to be transformed in the future.' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 410, 'prompt_tokens': 27, 'total_tokens': 437, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-f2f1a6fe-812b-4a7d-9667-7720f60aa445-0' usage_metadata={'input_tokens': 27, 'output_tokens': 410, 'total_tokens': 437, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}\n" - ] - }, - { - "data": { - "text/html": [ - "
\n"
-      ],
-      "text/plain": []
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "
/Users/dhruvyadav/Desktop/javelin-main/javelin-python/venv/lib/python3.12/site-packages/rich/live.py:231: \n",
-       "UserWarning: install \"ipywidgets\" for Jupyter support\n",
-       "  warnings.warn('install \"ipywidgets\" for Jupyter support')\n",
-       "
\n" - ], - "text/plain": [ - "/Users/dhruvyadav/Desktop/javelin-main/javelin-python/venv/lib/python3.12/site-packages/rich/live.py:231: \n", - "UserWarning: install \"ipywidgets\" for Jupyter support\n", - " warnings.warn('install \"ipywidgets\" for Jupyter support')\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[DEBUG] Reference answer: content='AI technologies enable machines to perform a wide range of complex tasks with unprecedented accuracy. Some of the key capabilities that AI technologies enable machines to do include:\\n\\n1. Pattern recognition: AI technologies allow machines to analyze and identify patterns in large amounts of data with great accuracy. This can be used in various fields such as healthcare, finance, and marketing to detect trends and make predictions.\\n\\n2. Natural language processing: AI technologies enable machines to understand and generate human language with a high level of accuracy. This can be used in applications such as chatbots, virtual assistants, and language translation services.\\n\\n3. Image and video recognition: AI technologies enable machines to accurately analyze and interpret images and videos. This can be used in applications such as facial recognition, object detection, and autonomous driving.\\n\\n4. Predictive analytics: AI technologies enable machines to analyze historical data and make accurate predictions about future events. This can be used in fields such as weather forecasting, stock market analysis, and predictive maintenance.\\n\\n5. Automation: AI technologies enable machines to automate repetitive tasks with high accuracy. This can be used in various industries to increase efficiency and reduce human error.\\n\\nOverall, AI technologies enable machines to perform a wide range of tasks with unprecedented accuracy, leading to improvements in productivity, efficiency, and decision-making in various industries.' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 260, 'prompt_tokens': 26, 'total_tokens': 286, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-70830496-acd0-475a-b470-8fdfb27d1aa7-0' usage_metadata={'input_tokens': 26, 'output_tokens': 260, 'total_tokens': 286, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}\n" - ] - }, - { - "data": { - "text/html": [ - "
\n"
-      ],
-      "text/plain": []
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "
/Users/dhruvyadav/Desktop/javelin-main/javelin-python/venv/lib/python3.12/site-packages/rich/live.py:231: \n",
-       "UserWarning: install \"ipywidgets\" for Jupyter support\n",
-       "  warnings.warn('install \"ipywidgets\" for Jupyter support')\n",
-       "
\n" - ], - "text/plain": [ - "/Users/dhruvyadav/Desktop/javelin-main/javelin-python/venv/lib/python3.12/site-packages/rich/live.py:231: \n", - "UserWarning: install \"ipywidgets\" for Jupyter support\n", - " warnings.warn('install \"ipywidgets\" for Jupyter support')\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[DEBUG] Reference answer: content='The main focus of Artificial Intelligence (AI) is to create intelligent machines that can simulate human intelligence and perform tasks that typically require human cognition. This includes tasks such as learning, reasoning, problem solving, understanding natural language, and perception.\\n\\nAI aims to develop machines that can think, learn, and adapt like humans, and ultimately surpass human intelligence in certain areas. The goal is to create machines that can automate complex tasks, make decisions based on data and algorithms, and improve their performance over time through learning.\\n\\nThere are several subfields within AI that focus on different aspects of intelligence, such as machine learning, natural language processing, computer vision, robotics, and expert systems. These subfields work together to create intelligent systems that can perform a wide range of tasks, from playing chess to driving a car to diagnosing medical conditions.\\n\\nOverall, the main focus of AI is to develop intelligent machines that can enhance human capabilities, improve efficiency, and solve complex problems in various fields such as healthcare, finance, transportation, and more. AI has the potential to revolutionize industries and society as a whole, and its development is a rapidly growing area of research and innovation.' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 232, 'prompt_tokens': 25, 'total_tokens': 257, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-e355213a-dd79-46e7-86dc-27eb2c362e0a-0' usage_metadata={'input_tokens': 25, 'output_tokens': 232, 'total_tokens': 257, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}\n" - ] - }, - { - "data": { - "text/html": [ - "
\n"
-      ],
-      "text/plain": []
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "
/Users/dhruvyadav/Desktop/javelin-main/javelin-python/venv/lib/python3.12/site-packages/rich/live.py:231: \n",
-       "UserWarning: install \"ipywidgets\" for Jupyter support\n",
-       "  warnings.warn('install \"ipywidgets\" for Jupyter support')\n",
-       "
\n" - ], - "text/plain": [ - "/Users/dhruvyadav/Desktop/javelin-main/javelin-python/venv/lib/python3.12/site-packages/rich/live.py:231: \n", - "UserWarning: install \"ipywidgets\" for Jupyter support\n", - " warnings.warn('install \"ipywidgets\" for Jupyter support')\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[DEBUG] Reference answer: content='AI models have had a significant impact on the healthcare sector in a variety of ways. Some of the key impacts include:\\n\\n1. Improved diagnosis and treatment: AI models have been developed to analyze medical images, such as X-rays and MRIs, to detect patterns and abnormalities that may be missed by human radiologists. This can lead to earlier and more accurate diagnosis of conditions such as cancer and other diseases, improving patient outcomes.\\n\\n2. Personalized medicine: AI models can analyze large amounts of data, such as genetic information and patient records, to identify personalized treatment plans for individual patients. This can lead to more effective and targeted treatments, reducing the likelihood of adverse reactions and improving patient outcomes.\\n\\n3. Predictive analytics: AI models can analyze data from electronic health records and other sources to predict patient outcomes and identify individuals at risk of developing certain conditions. This can help healthcare providers intervene early and prevent the progression of diseases, ultimately reducing healthcare costs and improving patient outcomes.\\n\\n4. Administrative efficiency: AI models can automate routine administrative tasks, such as scheduling appointments and processing insurance claims, freeing up healthcare providers to focus on patient care. This can improve efficiency and reduce healthcare costs.\\n\\n5. Drug discovery and development: AI models can analyze large datasets to identify potential drug candidates and predict their effectiveness. This can accelerate the drug discovery process and lead to the development of new treatments for a wide range of conditions.\\n\\nOverall, AI models have the potential to revolutionize the healthcare sector by improving diagnosis and treatment, personalizing medicine, predicting patient outcomes, improving administrative efficiency, and accelerating drug discovery and development. While there are still challenges to overcome, such as ensuring the accuracy and reliability of AI models and addressing concerns about data privacy and security, the potential benefits of AI in healthcare are vast and promising.' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 357, 'prompt_tokens': 23, 'total_tokens': 380, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-2f8bfe67-7423-4145-ab61-3006e70ee780-0' usage_metadata={'input_tokens': 23, 'output_tokens': 357, 'total_tokens': 380, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}}\n" - ] - }, - { - "data": { - "text/html": [ - "
\n"
-      ],
-      "text/plain": []
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Evaluation Result:\n",
-      "Evaluation complete. Avg Score: 0.87. Results stored in RAG_FOR_AI_test_cases.csv.\n",
-      "Final State of Agent Returns >>> Test queries have been generated and executed successfully for the app \"RAG_FOR_AI\".\n"
-     ]
-    }
-   ],
-   "source": [
-    "import os\n",
-    "import json\n",
-    "import requests\n",
-    "import pandas as pd\n",
-    "from pydantic import BaseModel\n",
-    "\n",
-    "from langchain_openai import ChatOpenAI\n",
-    "from langchain.prompts import PromptTemplate\n",
-    "from langchain.chains import LLMChain\n",
-    "\n",
-    "from langgraph.prebuilt import create_react_agent\n",
-    "from langgraph.checkpoint.memory import MemorySaver\n",
-    "from langchain_core.tools import tool\n",
-    "\n",
-    "# Import DeepEval modules\n",
-    "from deepeval import login_with_confident_api_key, evaluate  # evaluate helper\n",
-    "from deepeval.metrics import AnswerRelevancyMetric\n",
-    "from deepeval.test_case import LLMTestCase\n",
-    "\n",
-    "\n",
-    "# -----------------------------\n",
-    "# Configure API Keys and Log In\n",
-    "# -----------------------------\n",
-    "login_with_confident_api_key(os.environ[\"DEEPEVAL_API_KEY\"])\n",
-    "\n",
-    "# -----------------------------\n",
-    "# Define AppParams model\n",
-    "# -----------------------------\n",
-    "class AppParams(BaseModel):\n",
-    "    app_name: str\n",
-    "    description: str\n",
-    "    system_prompt: str\n",
-    "    endpoint: str\n",
-    "    extra_definition: str\n",
-    "    k: int\n",
-    "\n",
-    "# -----------------------------\n",
-    "# Utility: Custom Parsing Function\n",
-    "# -----------------------------\n",
-    "def custom_parsing_for_rag(response_text: str) -> str:\n",
-    "    \"\"\"\n",
-    "    Parse the response from the RAG endpoint (expected as JSON).\n",
-    "    Return the value associated with \"answer\" if available; if not, then \"response\".\n",
-    "    Otherwise, return the original text.\n",
-    "    \"\"\"\n",
-    "    try:\n",
-    "        data = json.loads(response_text)\n",
-    "        return data.get(\"answer\") or data.get(\"response\") or response_text\n",
-    "    except Exception:\n",
-    "        return response_text\n",
-    "\n",
-    "# -----------------------------\n",
-    "# Tool: Generate Test Cases\n",
-    "# -----------------------------\n",
-    "@tool\n",
-    "def generate_test_cases(app_name: str, description: str, system_prompt: str,\n",
-    "                        endpoint: str, extra_definition: str, k: int) -> dict:\n",
-    "    \"\"\"\n",
-    "    Name: generate_test_cases\n",
-    "    Description: Generate exactly k plain test queries for the specified app details using an LLM.\n",
-    "    Input Arguments:\n",
-    "        - app_name (str): The name of the application.\n",
-    "        - description (str): A brief description of the app.\n",
-    "        - system_prompt (str): The system prompt or instructions for generating queries.\n",
-    "        - endpoint (str): The API endpoint to be tested.\n",
-    "        - extra_definition (str): Any extra definitions or clarifications needed.\n",
-    "        - k (int): The number of test queries to generate.\n",
-    "    Output:\n",
-    "        - A dictionary with {\"result\": \"\"}.\n",
-    "\n",
-    "    How it works:\n",
-    "    1) Calls an LLM to generate exactly k queries in JSON array format.\n",
-    "    2) Saves queries in CSV with columns \"Test_Cases\" (the query) and \"Response\" (empty).\n",
-    "    \"\"\"\n",
-    "    llm = get_javelin_llm(model_name=\"gpt-3.5-turbo\", temperature=0.7)\n",
-    "\n",
-    "    prompt_template = \"\"\"\n",
-    "    Application Name: {app_name}\n",
-    "    Description: {description}\n",
-    "    System Prompt: {system_prompt}\n",
-    "    Extra Definitions: {extra_definition}\n",
-    "    Endpoint: {endpoint}\n",
-    "    Number of Test Queries: {k}\n",
-    "\n",
-    "    Based on the above details, generate exactly {k} test queries for automated testing.\n",
-    "    Each test query should be a plain text string that represents a query to test the endpoint.\n",
-    "    Do not include any extra information, explanations, or expected output.\n",
-    "    Return the result strictly as a JSON array of strings, for example:\n",
-    "    [\"Query 1\", \"Query 2\", ...]\n",
-    "    \"\"\"\n",
-    "    prompt = PromptTemplate(\n",
-    "        template=prompt_template,\n",
-    "        input_variables=[\"app_name\", \"description\", \"system_prompt\", \"extra_definition\", \"endpoint\", \"k\"]\n",
-    "    )\n",
-    "\n",
-    "    llm_chain = LLMChain(llm=llm, prompt=prompt)\n",
-    "    print(\"[DEBUG] Generating test queries...\")\n",
-    "    generated = llm_chain.run({\n",
-    "        \"app_name\": app_name,\n",
-    "        \"description\": description,\n",
-    "        \"system_prompt\": system_prompt,\n",
-    "        \"extra_definition\": extra_definition,\n",
-    "        \"endpoint\": endpoint,\n",
-    "        \"k\": k\n",
-    "    })\n",
-    "    print(\"[DEBUG] Raw LLM output:\", generated)\n",
-    "\n",
-    "    # Attempt to parse as JSON\n",
-    "    try:\n",
-    "        queries = json.loads(generated)\n",
-    "        if not isinstance(queries, list):\n",
-    "            raise ValueError(\"JSON output is not a list.\")\n",
-    "    except Exception as e:\n",
-    "        print(\"[ERROR] Parsing JSON failed. Splitting by newlines. Error:\", e)\n",
-    "        queries = [q.strip() for q in generated.split(\"\\n\") if q.strip()]\n",
-    "\n",
-    "    print(\"[DEBUG] Parsed queries:\", queries)\n",
-    "    df = pd.DataFrame({\"Test_Cases\": queries, \"Response\": [\"\"] * len(queries)})\n",
-    "    csv_file = f\"{app_name}_test_cases.csv\"\n",
-    "    df.to_csv(csv_file, index=False)\n",
-    "    return {\"result\": f\"Test queries saved in {csv_file}.\"}\n",
-    "\n",
-    "# -----------------------------\n",
-    "# Tool: Execute Test Cases\n",
-    "# -----------------------------\n",
-    "@tool\n",
-    "def execute_test_cases(app_name: str, endpoint: str, extra_headers: dict = None) -> dict:\n",
-    "    \"\"\"\n",
-    "    Name: execute_test_cases\n",
-    "    Description: Execute existing test queries for the specified app by sending them to the given endpoint.\n",
-    "    Input Arguments:\n",
-    "        - app_name (str): The name of the application.\n",
-    "        - endpoint (str): The API endpoint to be tested.\n",
-    "        - extra_headers (dict, optional): Additional HTTP headers for requests.\n",
-    "    Output:\n",
-    "        - A dictionary with {\"result\": \"\"}.\n",
-    "\n",
-    "    How it works:\n",
-    "    1) Reads the CSV file named \"_test_cases.csv\".\n",
-    "    2) POSTs each query (from \"Test_Cases\" column) to 'endpoint' (JSON payload: {\"query\": }).\n",
-    "    3) Parses the JSON response if possible, or returns the raw text otherwise.\n",
-    "    4) Saves the response to the same CSV (under \"Response\" column).\n",
-    "    5) Calls evaluate_test_cases to measure quality via DeepEval and prints the evaluation result.\n",
-    "    \"\"\"\n",
-    "    csv_file = f\"{app_name}_test_cases.csv\"\n",
-    "    print(f\"[DEBUG] Reading test queries from {csv_file}...\")\n",
-    "    try:\n",
-    "        df = pd.read_csv(csv_file)\n",
-    "    except Exception as e:\n",
-    "        return {\"result\": f\"Could not read CSV {csv_file}: {str(e)}\"}\n",
-    "    \n",
-    "    headers = extra_headers if extra_headers is not None else {}\n",
-    "    responses = []\n",
-    "    for query in df[\"Test_Cases\"]:\n",
-    "        try:\n",
-    "            res = requests.post(endpoint, json={\"query\": query}, headers=headers)\n",
-    "            if res.status_code == 200:\n",
-    "                text = res.text\n",
-    "            else:\n",
-    "                text = f\"Error {res.status_code}: {res.text}\"\n",
-    "        except Exception as e:\n",
-    "            text = f\"Request failed: {str(e)}\"\n",
-    "        parsed = custom_parsing_for_rag(text)\n",
-    "        responses.append(parsed)\n",
-    "        print(f\"[DEBUG] Query: {query} -> Response: {parsed}\")\n",
-    "\n",
-    "    df[\"Response\"] = responses\n",
-    "    df.to_csv(csv_file, index=False)\n",
-    "\n",
-    "    # Evaluate after execution\n",
-    "    eval_result = evaluate_test_cases(app_name)\n",
-    "    print(\"Evaluation Result:\")\n",
-    "    print(eval_result[\"result\"])\n",
-    "\n",
-    "    return {\"result\": f\"Executed queries; responses saved in {csv_file}.\"}\n",
-    "\n",
-    "# -----------------------------\n",
-    "# Function: Evaluate Test Cases (Direct Function Call)\n",
-    "# -----------------------------\n",
-    "def evaluate_test_cases(app_name: str) -> dict:\n",
-    "    \"\"\"\n",
-    "    For each test case in the CSV, generate a reference answer using a reference LLM,\n",
-    "    evaluate the actual output against the reference using DeepEval,\n",
-    "    and save the per-test score and reason in new CSV columns.\n",
-    "\n",
-    "    This function is called directly (not as a tool) to avoid recursion issues.\n",
-    "    \"\"\"\n",
-    "    csv_file = f\"{app_name}_test_cases.csv\"\n",
-    "    marker_file = f\"{app_name}_evaluation.marker\"\n",
-    "\n",
-    "    if os.path.exists(marker_file):\n",
-    "        return {\"result\": \"Evaluation already completed; skipping re-evaluation.\"}\n",
-    "    \n",
-    "    try:\n",
-    "        df = pd.read_csv(csv_file)\n",
-    "    except Exception as e:\n",
-    "        return {\"result\": f\"Could not read CSV {csv_file}: {str(e)}\"}\n",
-    "    \n",
-    "    scores = []\n",
-    "    reasons = []\n",
-    "    reference_llm = get_javelin_llm(model_name=\"gpt-3.5-turbo\", temperature=0.7)\n",
-    "    \n",
-    "    for idx, row in df.iterrows():\n",
-    "        query = row[\"Test_Cases\"]\n",
-    "        rag_answer = row[\"Response\"]\n",
-    "        ref_prompt = f\"Answer the following query in detail: {query}\"\n",
-    "        reference_answer = str(reference_llm(ref_prompt))\n",
-    "        print(\"[DEBUG] Reference answer:\", reference_answer)\n",
-    "\n",
-    "        test_case = LLMTestCase(\n",
-    "            input=query,\n",
-    "            actual_output=rag_answer,\n",
-    "            retrieval_context=[reference_answer]\n",
-    "        )\n",
-    "        metric = AnswerRelevancyMetric(threshold=0.7)\n",
-    "        metric.measure(test_case)\n",
-    "        scores.append(metric.score)\n",
-    "        reasons.append(metric.reason)\n",
-    "    \n",
-    "    df[\"Answer_Score\"] = scores\n",
-    "    df[\"Answer_Reason\"] = reasons\n",
-    "    df.to_csv(csv_file, index=False)\n",
-    "    with open(marker_file, \"w\") as f:\n",
-    "        f.write(\"Evaluation complete.\")\n",
-    "\n",
-    "    avg_score = sum(scores) / len(scores) if scores else 0\n",
-    "    return {\"result\": f\"Evaluation complete. Avg Score: {avg_score:.2f}. Results stored in {csv_file}.\"}\n",
-    "\n",
-    "# -----------------------------\n",
-    "# Agent Setup\n",
-    "# -----------------------------\n",
-    "react_instructions = \"\"\"\n",
-    "You are a test-case generation and execution agent. \n",
-    "You have the following tools available:\n",
-    "\n",
-    "1) generate_test_cases\n",
-    "   - Accepts parameters: app_name (str), description (str), system_prompt (str), endpoint (str), extra_definition (str), k (int)\n",
-    "   - Use it to generate test queries and save them in a CSV.\n",
-    "\n",
-    "2) execute_test_cases\n",
-    "   - Accepts parameters: app_name (str), endpoint (str), extra_headers (dict, optional)\n",
-    "   - Use it to execute queries from the CSV and evaluate them.\n",
-    "\n",
-    "Guidance:\n",
-    "- If the user wants you to create new test queries, call generate_test_cases.\n",
-    "- If the user wants you to run or execute queries, call execute_test_cases.\n",
-    "- Return short, direct answers once the tool is done.\n",
-    "- Only call the tools if relevant.\n",
-    "- Provide the final result to the user.\n",
-    "\"\"\"\n",
-    "\n",
-    "tools = [generate_test_cases, execute_test_cases]\n",
-    "model = ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0)\n",
-    "checkpointer = MemorySaver()\n",
-    "\n",
-    "# Instead of .run(...) we do .invoke(...)\n",
-    "agent = create_react_agent(\n",
-    "    model=model,\n",
-    "    tools=tools,\n",
-    "    prompt=react_instructions,\n",
-    "    checkpointer=checkpointer,\n",
-    ")\n",
-    "\n",
-    "# -----------------------------\n",
-    "# Main\n",
-    "# -----------------------------\n",
-    "if __name__ == \"__main__\":\n",
-    "    params = AppParams(\n",
-    "        app_name=\"RAG_FOR_AI\",\n",
-    "        description=\"My app provides information about documents in a RAG model.\",\n",
-    "        system_prompt=(\n",
-    "            \"Hello, you are a helpful scientific assistant. Based on the provided documents, answer the user's query. \"\n",
-    "            \"Document: Artificial Intelligence (AI) is a rapidly advancing technology that is transforming industries \"\n",
-    "            \"and societies across the globe. In recent years, AI models have revolutionized sectors such as healthcare, \"\n",
-    "            \"automotive, finance, and entertainment. These technologies enable machines to simulate human-like cognitive \"\n",
-    "            \"functions with unprecedented accuracy.\"\n",
-    "        ),\n",
-    "        endpoint=\"http://127.0.0.1:5001/query\",\n",
-    "        extra_definition=\"\",\n",
-    "        k=5\n",
-    "    )\n",
-    "\n",
-    "    # Single user instruction: generate & execute\n",
-    "    single_message = (\n",
-    "        f\"Please generate exactly {params.k} test queries for my app, and then immediately execute them. \"\n",
-    "        f\"App Name: {params.app_name}, Description: {params.description}, \"\n",
-    "        f\"System Prompt: {params.system_prompt}, Endpoint: {params.endpoint}, \"\n",
-    "        f\"Extra Definitions: {params.extra_definition}.\"\n",
-    "    )\n",
-    "\n",
-    "    conversation = [{\"role\": \"user\", \"content\": single_message}]\n",
-    "\n",
-    "    print(\"=== Agent Input ===\")\n",
-    "    print(single_message)\n",
-    "    print(\"=== Agent Output ===\")\n",
-    "\n",
-    "    # Use .invoke(...) to process the conversation\n",
-    "    final_state = agent.invoke({\"messages\": conversation}, config={\"configurable\": {\"thread_id\": 1}})\n",
-    "\n",
-    "    # The final reply from the agent\n",
-    "    print(\"Final State of Agent Returns >>>\",final_state[\"messages\"][-1].content)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "venv",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.8"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}