-
Notifications
You must be signed in to change notification settings - Fork 3.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 3c6d4cd
Showing
52 changed files
with
8,074 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
*.egg-info | ||
__pycache__ | ||
/public/dist |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
The MIT License | ||
|
||
Copyright (c) OpenAI (https://openai.com) | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
of this software and associated documentation files (the "Software"), to deal | ||
in the Software without restriction, including without limitation the rights | ||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
copies of the Software, and to permit persons to whom the Software is | ||
furnished to do so, subject to the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be included in | ||
all copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
THE SOFTWARE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
.PHONY: build upload | ||
|
||
build: | ||
python setup.py sdist | ||
|
||
upload: | ||
twine upload dist/openai-*.tar.gz | ||
rm dist/openai-*.tar.gz | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
# OpenAI Python Library | ||
|
||
The OpenAI Python library provides convenient access to the OpenAI API | ||
from applications written in the Python language. It includes a | ||
pre-defined set of classes for API resources that initialize | ||
themselves dynamically from API responses which makes it compatible | ||
with a wide range of versions of the OpenAI API. | ||
|
||
This library additionally provides an `openai` command-line utility | ||
which makes it easy to interact with the API from your terminal. Run | ||
`openai api -h` for usage. | ||
|
||
## Documentation | ||
|
||
See the [OpenAI API docs](https://beta.openai.com/docs/api-reference?lang=python). (During | ||
the beta, you'll need to be signed into your account to see them.) | ||
|
||
## Installation | ||
|
||
You don't need this source code unless you want to modify the package. If you just | ||
want to use the package, just run: | ||
|
||
```sh | ||
pip install --upgrade openai | ||
``` | ||
|
||
Install from source with: | ||
|
||
```sh | ||
python setup.py install | ||
``` | ||
|
||
## Requirements | ||
|
||
- Python 2.7+ or Python 3.4+ (PyPy supported) | ||
|
||
In general we want to support the versions of Python that our | ||
customers are using, so if you run into issues with any version | ||
issues, please let us know at [email protected]. | ||
|
||
## Credit | ||
|
||
This library is forked from the [Stripe Python Library](https://github.com/stripe/stripe-python). |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
#!/usr/bin/env python | ||
import argparse | ||
import json | ||
import logging | ||
import os | ||
import sys | ||
|
||
import openai | ||
from openai.cli import display_error | ||
from openai.cli import register as api_register | ||
|
||
logger = logging.getLogger() | ||
formatter = logging.Formatter("[%(asctime)s] %(message)s") | ||
handler = logging.StreamHandler(sys.stderr) | ||
handler.setFormatter(formatter) | ||
logger.addHandler(handler) | ||
|
||
|
||
def main(): | ||
parser = argparse.ArgumentParser(description=None) | ||
parser.add_argument( | ||
"-v", | ||
"--verbose", | ||
action="count", | ||
dest="verbosity", | ||
default=0, | ||
help="Set verbosity.", | ||
) | ||
parser.add_argument("-b", "--api-base", help="What API base url to use.") | ||
parser.add_argument("-k", "--api-key", help="What API key to use.") | ||
parser.add_argument( | ||
"-o", | ||
"--organization", | ||
help="Which organization to run as (will use your default organization if not specified)", | ||
) | ||
|
||
def help(args): | ||
parser.print_help() | ||
|
||
parser.set_defaults(func=help) | ||
|
||
subparsers = parser.add_subparsers() | ||
sub = subparsers.add_parser("api", help="Direct API calls") | ||
|
||
api_register(sub) | ||
|
||
args = parser.parse_args() | ||
if args.verbosity == 1: | ||
logger.setLevel(logging.INFO) | ||
elif args.verbosity >= 2: | ||
logger.setLevel(logging.DEBUG) | ||
|
||
openai.debug = True | ||
if args.api_key is not None: | ||
openai.api_key = args.api_key | ||
if args.api_base is not None: | ||
openai.api_base = args.api_base | ||
if args.organization is not None: | ||
openai.organization = args.organization | ||
|
||
try: | ||
args.func(args) | ||
except openai.error.OpenAIError as e: | ||
display_error(e) | ||
except KeyboardInterrupt: | ||
sys.stderr.write("\n") | ||
return 0 | ||
|
||
|
||
if __name__ == "__main__": | ||
sys.exit(main()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
# semanticsearch | ||
|
||
A client-side implementation of our semantic search endpoint (https://beta.openai.com/docs/api-reference/search). | ||
|
||
Our endpoint has a special fast implementation of this logic which | ||
makes it very fast for calls involving many documents, so we recommend | ||
using our implementation rather than this one for latency-sensitive | ||
workloads. | ||
|
||
We encourage you to try different variants of this client-side logic | ||
-- we don't think our setup is likely optimal at all! | ||
|
||
## Sample usage | ||
|
||
The following usage will run a client-side semantic search. This | ||
formats each document into a prompt asking the API for the document's | ||
relevance, and then post-processes the logprobs to derive relevance | ||
scores: | ||
|
||
``` | ||
$ ./semanticsearch.py -q 'positive emotion' -d happy -d sad | ||
[client-side semantic search] {'object': 'list', 'data': [{'object': 'search_result', 'document': 0, 'score': 204.448}, {'object': 'search_result', 'document': 1, 'score': 108.208}], 'model': 'ada:2020-05-03'} | ||
``` | ||
|
||
We run the exact same logic server-side: | ||
|
||
``` | ||
$ ./semanticsearch.py -q 'positive emotion' -d happy -d sad -s | ||
[server-side semantic search] {'object': 'list', 'data': [{'object': 'search_result', 'document': 0, 'score': 204.448}, {'object': 'search_result', 'document': 1, 'score': 108.208}], 'model': 'ada:2020-05-03'} | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
#!/usr/bin/env python | ||
import openai | ||
import argparse | ||
import logging | ||
import sys | ||
from typing import List | ||
|
||
logger = logging.getLogger() | ||
formatter = logging.Formatter("[%(asctime)s] [%(process)d] %(message)s") | ||
handler = logging.StreamHandler(sys.stderr) | ||
handler.setFormatter(formatter) | ||
logger.addHandler(handler) | ||
|
||
DEFAULT_COND_LOGP_TEMPLATE = ( | ||
"<|endoftext|>{document}\n\n---\n\nThe above passage is related to: {query}" | ||
) | ||
SCORE_MULTIPLIER = 100.0 | ||
|
||
|
||
class SearchScorer: | ||
def __init__( | ||
self, *, document, query, cond_logp_template=DEFAULT_COND_LOGP_TEMPLATE | ||
): | ||
self.document = document | ||
self.query = query | ||
self.cond_logp_template = cond_logp_template | ||
self.context = self.cond_logp_template.format( | ||
document=self.document, query=self.query | ||
) | ||
|
||
def get_context(self): | ||
return self.context | ||
|
||
def get_score(self, choice) -> float: | ||
assert choice.text == self.context | ||
logprobs: List[float] = choice.logprobs.token_logprobs | ||
text = choice.logprobs.tokens | ||
text_len = sum(len(token) for token in text) | ||
if text_len != len(self.context): | ||
raise RuntimeError( | ||
f"text_len={text_len}, len(self.context)={len(self.context)}" | ||
) | ||
total_len = 0 | ||
last_used = len(text) | ||
while total_len < len(self.query): | ||
assert last_used > 0 | ||
total_len += len(text[last_used - 1]) | ||
last_used -= 1 | ||
max_len = len(self.context) - self.cond_logp_template.index("{document}") | ||
assert total_len + len(self.document) <= max_len | ||
logits: List[float] = logprobs[last_used:] | ||
return sum(logits) / len(logits) * SCORE_MULTIPLIER | ||
|
||
|
||
def semantic_search(engine, query, documents): | ||
# add empty document as baseline | ||
scorers = [ | ||
SearchScorer(document=document, query=query) for document in [""] + documents | ||
] | ||
completion = openai.Completion.create( | ||
engine=engine, | ||
prompt=[scorer.get_context() for scorer in scorers], | ||
max_tokens=0, | ||
logprobs=0, | ||
echo=True, | ||
) | ||
# put the documents back in order so we can easily normalize by the empty document 0 | ||
data = sorted(completion.choices, key=lambda choice: choice.index) | ||
assert len(scorers) == len( | ||
data | ||
), f"len(scorers)={len(scorers)} len(data)={len(data)}" | ||
scores = [scorer.get_score(choice) for scorer, choice in zip(scorers, data)] | ||
# subtract score for empty document | ||
scores = [score - scores[0] for score in scores][1:] | ||
data = { | ||
"object": "list", | ||
"data": [ | ||
{ | ||
"object": "search_result", | ||
"document": document_idx, | ||
"score": round(score, 3), | ||
} | ||
for document_idx, score in enumerate(scores) | ||
], | ||
"model": completion.model, | ||
} | ||
return data | ||
|
||
|
||
def main(): | ||
parser = argparse.ArgumentParser(description=None) | ||
parser.add_argument( | ||
"-v", | ||
"--verbose", | ||
action="count", | ||
dest="verbosity", | ||
default=0, | ||
help="Set verbosity.", | ||
) | ||
parser.add_argument("-e", "--engine", default="ada") | ||
parser.add_argument("-q", "--query", required=True) | ||
parser.add_argument("-d", "--document", action="append", required=True) | ||
parser.add_argument("-s", "--server-side", action="store_true") | ||
args = parser.parse_args() | ||
|
||
if args.verbosity == 1: | ||
logger.setLevel(logging.INFO) | ||
elif args.verbosity >= 2: | ||
logger.setLevel(logging.DEBUG) | ||
|
||
if args.server_side: | ||
resp = openai.Engine(id=args.engine).search( | ||
query=args.query, documents=args.document | ||
) | ||
resp = resp.to_dict_recursive() | ||
print(f"[server-side semantic search] {resp}") | ||
else: | ||
resp = semantic_search(args.engine, query=args.query, documents=args.document) | ||
print(f"[client-side semantic search] {resp}") | ||
|
||
return 0 | ||
|
||
|
||
if __name__ == "__main__": | ||
sys.exit(main()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
from __future__ import absolute_import, division, print_function | ||
|
||
import os | ||
|
||
# OpenAI Python bindings. | ||
# | ||
# Originally forked from the MIT-licensed Stripe Python bindings. | ||
|
||
# Configuration variables | ||
|
||
api_key = os.environ.get("OPENAI_API_KEY") | ||
organization = os.environ.get("OPENAI_ORGANIZATION") | ||
client_id = None | ||
api_base = os.environ.get("OPENAI_API_BASE", "https://api.openai.com") | ||
file_api_base = None | ||
api_version = None | ||
verify_ssl_certs = True | ||
proxy = None | ||
default_http_client = None | ||
app_info = None | ||
enable_telemetry = True | ||
max_network_retries = 0 | ||
ca_bundle_path = os.path.join(os.path.dirname(__file__), "data/ca-certificates.crt") | ||
debug = False | ||
|
||
# Set to either 'debug' or 'info', controls console logging | ||
log = None | ||
|
||
# API resources | ||
from openai.api_resources import * # noqa | ||
|
||
from openai.error import OpenAIError, APIError, InvalidRequestError |
Oops, something went wrong.