Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Support calling the OpenAI API directly #85

Merged
merged 7 commits into from
May 15, 2023
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 28 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# gpt-review

<p align="center">
<a href="https://github.com/microsoft/gpt-review/actions"><img alt="Actions Status" src="https://github.com/microsoft/gpt-review/workflows/Python%20CI/badge.svg"></a>
<a href="https://codecov.io/gh/microsoft/gpt-review"><img alt="Coverage Status" src="https://codecov.io/gh/microsoft/gpt-review/branch/main/graph/badge.svg"></a>
Expand All @@ -10,25 +11,39 @@

A Python based CLI and GitHub Action to use Open AI or Azure Open AI models to review contents of pull requests.

## How to install CLI:

Install the package via `pip` and set the environment variables for your OpenAI API Key and Organization ID.
To use Azure OpenAI, set the environment variable `AZURE_OPENAI_API_URL` and `AZURE_OPENAI_API_URL_KEY` to the URL and key for your Azure OpenAI API.
## How to install CLI

First, install the package via `pip`:

```bash
pip install gpt-review

export AZURE_OPENAI_API=<your azure api url>
export AZURE_OPENAI_API_KEY=<your azure key>
```

Or use Azure Key Vault to secure your API Key. Create secrets for the url named `azure-open-ai`, and for the API Key named `azure-openai-key`.
```bash
export AZURE_KEY_VAULT_URL=https://<keyvault_name>.vault.azure.net/
### GPT API credentials

az login
```
You will need to provide an OpenAI API key to use this CLI tool. In order of precedence, it will check the following methods:

1. Presence of a context file at `azure.yaml` or wherever `CONTEXT_FILE` points to. See `azure.yaml.template` for an example.

2. `AZURE_OPENAI_API_URL` and `AZURE_OPENAI_API_KEY` to connect to an Azure OpenAI API:

```bash
export AZURE_OPENAI_API=<your azure api url>
export AZURE_OPENAI_API_KEY=<your azure key>
```

3. `OPENAI_API_KEY` for direct use of the OpenAI API

```bash
export OPENAI_API_KEY=<your openai key>
```

4. `AZURE_KEY_VAULT_URL` to use Azure Key Vault. Put secrets for the url at `azure-open-ai` and the API Key `azure-openai-key`, then run:

```bash
export AZURE_KEY_VAULT_URL=https://<keyvault_name>.vault.azure.net/
az login
```

## Main Commands

Expand Down Expand Up @@ -94,6 +109,7 @@ gpt COMMAND --help
Replace COMMAND with one of the main commands listed above (e.g., 'ask').

## Developer Setup

To install the package in development mode, with additional packages for testing, run the following command:

```bash
Expand Down
40 changes: 26 additions & 14 deletions src/gpt_review/_llama_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from typing import List, Optional

import openai
from langchain.chat_models import AzureChatOpenAI
from langchain.chat_models import AzureChatOpenAI, ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.llms import AzureOpenAI
from llama_index import (
Expand Down Expand Up @@ -124,25 +124,37 @@ def _load_service_context(fast: bool = False, large: bool = False) -> ServiceCon
"""

context = _load_azure_openai_context()

llm_type = AzureGPT35Turbo if fast else AzureChatOpenAI
llm_name = (
model_name = (
context.turbo_llm_model_deployment_id
if fast
else context.large_llm_model_deployment_id
if large
else context.smart_llm_model_deployment_id
)
llm = llm_type( # type: ignore
deployment_name=llm_name,
model_kwargs={
"api_key": openai.api_key,
"api_base": openai.api_base,
"api_type": "azure",
"api_version": "2023-03-15-preview",
},
max_retries=C.MAX_RETRIES,
)

if openai.api_type == C.AZURE_API_TYPE:
llm_type = AzureGPT35Turbo if fast else AzureChatOpenAI
llm = llm_type( # type: ignore
deployment_name=model_name,
model_kwargs={
"api_key": openai.api_key,
"api_base": openai.api_base,
"api_type": openai.api_type,
"api_version": openai.api_version,
},
max_retries=C.MAX_RETRIES,
)
else:
llm = ChatOpenAI(
model_name=model_name,
model_kwargs={
"api_key": openai.api_key,
"api_base": openai.api_base,
"api_type": openai.api_type,
"api_version": openai.api_version,
},
max_retries=C.MAX_RETRIES,
)

llm_predictor = LLMPredictor(llm=llm)

Expand Down
12 changes: 6 additions & 6 deletions src/gpt_review/_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ def _count_tokens(prompt) -> int:
return int(len(prompt) / 4 * 3)


def _get_engine(prompt: str, max_tokens: int, fast: bool = False, large: bool = False) -> str:
def _get_model(prompt: str, max_tokens: int, fast: bool = False, large: bool = False) -> str:
"""
Get the Engine based on the prompt length.
Get the OpenAI model based on the prompt length.
- when greater then 8k use gpt-4-32k
- otherwise use gpt-4
- enable fast to use gpt-35-turbo for small prompts
Expand All @@ -36,7 +36,7 @@ def _get_engine(prompt: str, max_tokens: int, fast: bool = False, large: bool =
large (bool, optional): Whether to use the large model. Defaults to False.

Returns:
str: The engine to use.
str: The model to use.
"""
context = _load_azure_openai_context()

Expand Down Expand Up @@ -80,12 +80,12 @@ def _call_gpt(
"""
messages = messages or [{"role": "user", "content": prompt}]
try:
engine = _get_engine(prompt, max_tokens=max_tokens, fast=fast, large=large)
logging.info("Model Selected based on prompt size: %s", engine)
model = _get_model(prompt, max_tokens=max_tokens, fast=fast, large=large)
logging.info(f"Model Selected based on prompt size: {model}")

logging.info("Prompt sent to GPT: %s\n", prompt)
completion = openai.ChatCompletion.create(
engine=engine,
model=model,
messages=messages,
max_tokens=max_tokens,
temperature=temperature,
Expand Down
1 change: 1 addition & 0 deletions src/gpt_review/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
AZURE_SMART_MODEL = "gpt-4"
AZURE_LARGE_MODEL = "gpt-4-32k"
AZURE_EMBEDDING_MODEL = "text-embedding-ada-002"
AZURE_KEY_VAULT = "https://dciborow-openai.vault.azure.net/"

BUG_PROMPT_YAML = "prompt_bug.yaml"
COVERAGE_PROMPT_YAML = "prompt_coverage.yaml"
Expand Down
43 changes: 28 additions & 15 deletions src/gpt_review/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@

import gpt_review.constants as C

DEFAULT_KEY_VAULT = "https://dciborow-openai.vault.azure.net/"


@dataclass
class Context:
Expand All @@ -25,33 +23,48 @@ class Context:

def _load_context_file():
"""Import from yaml file and return the context."""
context_file = os.getenv("CONTEXT_FILE", "azure.yaml")
context_file = os.getenv("CONTEXT_FILE", C.AZURE_CONFIG_FILE)
with open(context_file, "r", encoding="utf8") as file:
return yaml.load(file, Loader=yaml.SafeLoader)


def _load_azure_openai_context() -> Context:
"""
Load the Azure OpenAI context.
If the environment variables are not set, retrieve the values from Azure Key Vault.
Set both the environment variables and the openai package variables.
- Without setting the environment variables, the integration tests fail.
- Without setting the openai package variables, the cli tests fail.
"""
"""Load the context from the environment variables or the context file.

If a config file is available its values will take precedence. Otherwise
it will first check for an AZURE_OPENAI_API key, next OPENAI_API_KEY, and
lastly the Azure Key Vault.

Returns:
Context: The context for the Azure OpenAI API and the models.
"""
azure_config = _load_context_file() if os.path.exists(os.getenv("CONTEXT_FILE", C.AZURE_CONFIG_FILE)) else {}

openai.api_type = os.environ["OPENAI_API_TYPE"] = azure_config.get("azure_api_type", C.AZURE_API_TYPE)
openai.api_version = os.environ["OPENAI_API_VERSION"] = azure_config.get("azure_api_version", C.AZURE_API_VERSION)
if azure_config.get("azure_api_type"):
openai.api_type = os.environ["OPENAI_API_TYPE"] = azure_config.get("azure_api_type")
elif "AZURE_OPENAI_API" in os.environ:
openai.api_type = os.environ["OPENAI_API_TYPE"] = C.AZURE_API_TYPE
elif "OPENAI_API_TYPE" in os.environ:
openai.api_type = os.environ["OPENAI_API_TYPE"]

if azure_config.get("azure_api_version"):
openai.api_version = os.environ["OPENAI_API_VERSION"] = azure_config.get("azure_api_version")
elif "AZURE_OPENAI_API_VERSION" in os.environ:
openai.api_version = os.environ["OPENAI_API_VERSION"] = C.AZURE_API_VERSION
dciborow marked this conversation as resolved.
Show resolved Hide resolved
elif "OPENAI_API_VERSION" in os.environ:
openai.api_version = os.environ["OPENAI_API_VERSION"]

if os.getenv("AZURE_OPENAI_API"):
if openai.api_type == C.AZURE_API_TYPE or (not openai.api_type and os.getenv("AZURE_OPENAI_API")):
openai.api_type = os.environ["OPENAI_API_TYPE"] = C.AZURE_API_TYPE
openai.api_base = os.environ["OPENAI_API_BASE"] = os.getenv("AZURE_OPENAI_API") or azure_config.get(
"azure_api_base"
)
openai.api_key = os.environ["OPENAI_API_KEY"] = os.getenv("AZURE_OPENAI_API_KEY") # type: ignore
else: # pragma: no cover
elif os.getenv("OPENAI_API_KEY"):
openai.api_key = os.environ["OPENAI_API_KEY"]
else:
kv_client = SecretClient(
vault_url=os.getenv("AZURE_KEY_VAULT_URL", DEFAULT_KEY_VAULT), credential=DefaultAzureCredential()
vault_url=os.getenv("AZURE_KEY_VAULT_URL", C.AZURE_KEY_VAULT), credential=DefaultAzureCredential()
)
openai.api_base = os.environ["OPENAI_API_BASE"] = kv_client.get_secret("azure-open-ai").value # type: ignore
openai.api_key = os.environ["OPENAI_API_KEY"] = kv_client.get_secret("azure-openai-key").value # type: ignore
Expand Down
6 changes: 3 additions & 3 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest
import yaml
from collections import namedtuple

import pytest
import yaml
from llama_index import SimpleDirectoryReader


Expand Down Expand Up @@ -50,7 +50,7 @@ def as_query_engine(self):
return self

def mock_create(
engine,
model,
messages,
temperature,
max_tokens,
Expand Down
9 changes: 5 additions & 4 deletions tests/test_gpt_cli.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
"""Pytest for gpt_review/main.py"""
from dataclasses import dataclass
import os
import pytest
import subprocess
import sys
from dataclasses import dataclass

import pytest

from gpt_review._gpt_cli import cli
import gpt_review.constants as C
from gpt_review._gpt_cli import cli


@dataclass
Expand All @@ -28,7 +29,7 @@ class CLICase2(CLICase):

SAMPLE_FILE = "src/gpt_review/__init__.py"
QUESTION = "how are you"
WHAT_LANGUAGE = "what programming language is this code written in?"
WHAT_LANGUAGE = "'what programming language is this code written in?'"
HELP_TEXT = """usage: gpt ask [-h] [--verbose] [--debug] [--only-show-errors]
[--output {json,jsonc,yaml,yamlc,table,tsv,none}]
[--query JMESPATH] [--max-tokens MAX_TOKENS]
Expand Down
28 changes: 14 additions & 14 deletions tests/test_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,42 +2,42 @@
import pytest
from openai.error import RateLimitError

from gpt_review._openai import _call_gpt, _get_engine
import gpt_review.constants as C
from gpt_review._openai import _call_gpt, _get_model
from gpt_review.context import _load_azure_openai_context


def get_engine_test() -> None:
def get_model_test() -> None:
prompt = "This is a test prompt"

context = _load_azure_openai_context()

engine = _get_engine(prompt=prompt, max_tokens=1000, fast=True)
assert engine == context.turbo_llm_model_deployment_id
model = _get_model(prompt=prompt, max_tokens=1000, fast=True)
assert model == context.turbo_llm_model_deployment_id

engine = _get_engine(prompt=prompt, max_tokens=5000)
assert engine == context.smart_llm_model_deployment_id
model = _get_model(prompt=prompt, max_tokens=5000)
assert model == context.smart_llm_model_deployment_id

engine = _get_engine(prompt=prompt, max_tokens=9000)
assert engine == context.large_llm_model_deployment_id
model = _get_model(prompt=prompt, max_tokens=9000)
assert model == context.large_llm_model_deployment_id


def test_get_engine() -> None:
get_engine_test()
def test_get_model() -> None:
get_model_test()


@pytest.mark.integration
def test_int_get_engine() -> None:
get_engine_test()
def test_int_get_model() -> None:
get_model_test()


def rate_limit_test(monkeypatch):
def mock_get_engine(prompt: str, max_tokens: int, fast: bool = False, large: bool = False):
def mock_get_model(prompt: str, max_tokens: int, fast: bool = False, large: bool = False):
error = RateLimitError("Rate Limit Error")
error.headers["Retry-After"] = 10
raise error

monkeypatch.setattr("gpt_review._openai._get_engine", mock_get_engine)
monkeypatch.setattr("gpt_review._openai._get_model", mock_get_model)
with pytest.raises(RateLimitError):
_call_gpt(prompt="This is a test prompt", retry=C.MAX_RETRIES)

Expand Down