Initial commit

openai · Oct 25, 2020 · 3c6d4cd · 3c6d4cd
commit 3c6d4cd
Show file tree

Hide file tree

Showing 52 changed files with 8,074 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,3 @@
+*.egg-info
+__pycache__
+/public/dist
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+The MIT License
+
+Copyright (c) OpenAI (https://openai.com)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/Makefile b/Makefile
@@ -0,0 +1,9 @@
+.PHONY: build upload
+
+build:
+	python setup.py sdist
+
+upload:
+	twine upload dist/openai-*.tar.gz
+	rm dist/openai-*.tar.gz
+
diff --git a/README.md b/README.md
@@ -0,0 +1,43 @@
+# OpenAI Python Library
+
+The OpenAI Python library provides convenient access to the OpenAI API
+from applications written in the Python language. It includes a
+pre-defined set of classes for API resources that initialize
+themselves dynamically from API responses which makes it compatible
+with a wide range of versions of the OpenAI API.
+
+This library additionally provides an `openai` command-line utility
+which makes it easy to interact with the API from your terminal. Run
+`openai api -h` for usage.
+
+## Documentation
+
+See the [OpenAI API docs](https://beta.openai.com/docs/api-reference?lang=python). (During
+the beta, you'll need to be signed into your account to see them.)
+
+## Installation
+
+You don't need this source code unless you want to modify the package. If you just
+want to use the package, just run:
+
+```sh
+pip install --upgrade openai
+```
+
+Install from source with:
+
+```sh
+python setup.py install
+```
+
+## Requirements
+
+-   Python 2.7+ or Python 3.4+ (PyPy supported)
+
+In general we want to support the versions of Python that our
+customers are using, so if you run into issues with any version
+issues, please let us know at [email protected].
+
+## Credit
+
+This library is forked from the [Stripe Python Library](https://github.com/stripe/stripe-python).
diff --git a/bin/openai b/bin/openai
@@ -0,0 +1,71 @@
+#!/usr/bin/env python
+import argparse
+import json
+import logging
+import os
+import sys
+
+import openai
+from openai.cli import display_error
+from openai.cli import register as api_register
+
+logger = logging.getLogger()
+formatter = logging.Formatter("[%(asctime)s] %(message)s")
+handler = logging.StreamHandler(sys.stderr)
+handler.setFormatter(formatter)
+logger.addHandler(handler)
+
+
+def main():
+    parser = argparse.ArgumentParser(description=None)
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        action="count",
+        dest="verbosity",
+        default=0,
+        help="Set verbosity.",
+    )
+    parser.add_argument("-b", "--api-base", help="What API base url to use.")
+    parser.add_argument("-k", "--api-key", help="What API key to use.")
+    parser.add_argument(
+        "-o",
+        "--organization",
+        help="Which organization to run as (will use your default organization if not specified)",
+    )
+
+    def help(args):
+        parser.print_help()
+
+    parser.set_defaults(func=help)
+
+    subparsers = parser.add_subparsers()
+    sub = subparsers.add_parser("api", help="Direct API calls")
+
+    api_register(sub)
+
+    args = parser.parse_args()
+    if args.verbosity == 1:
+        logger.setLevel(logging.INFO)
+    elif args.verbosity >= 2:
+        logger.setLevel(logging.DEBUG)
+
+    openai.debug = True
+    if args.api_key is not None:
+        openai.api_key = args.api_key
+    if args.api_base is not None:
+        openai.api_base = args.api_base
+    if args.organization is not None:
+        openai.organization = args.organization
+
+    try:
+        args.func(args)
+    except openai.error.OpenAIError as e:
+        display_error(e)
+    except KeyboardInterrupt:
+        sys.stderr.write("\n")
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/examples/semanticsearch/README.md b/examples/semanticsearch/README.md
@@ -0,0 +1,30 @@
+# semanticsearch
+
+A client-side implementation of our semantic search endpoint (https://beta.openai.com/docs/api-reference/search).
+
+Our endpoint has a special fast implementation of this logic which
+makes it very fast for calls involving many documents, so we recommend
+using our implementation rather than this one for latency-sensitive
+workloads.
+
+We encourage you to try different variants of this client-side logic
+-- we don't think our setup is likely optimal at all!
+
+## Sample usage
+
+The following usage will run a client-side semantic search. This
+formats each document into a prompt asking the API for the document's
+relevance, and then post-processes the logprobs to derive relevance
+scores:
+
+```
+$ ./semanticsearch.py -q 'positive emotion' -d happy -d sad
+[client-side semantic search] {'object': 'list', 'data': [{'object': 'search_result', 'document': 0, 'score': 204.448}, {'object': 'search_result', 'document': 1, 'score': 108.208}], 'model': 'ada:2020-05-03'}
+```
+
+We run the exact same logic server-side:
+
+```
+$ ./semanticsearch.py -q 'positive emotion' -d happy -d sad -s
+[server-side semantic search] {'object': 'list', 'data': [{'object': 'search_result', 'document': 0, 'score': 204.448}, {'object': 'search_result', 'document': 1, 'score': 108.208}], 'model': 'ada:2020-05-03'}
+```
diff --git a/examples/semanticsearch/semanticsearch.py b/examples/semanticsearch/semanticsearch.py
@@ -0,0 +1,125 @@
+#!/usr/bin/env python
+import openai
+import argparse
+import logging
+import sys
+from typing import List
+
+logger = logging.getLogger()
+formatter = logging.Formatter("[%(asctime)s] [%(process)d] %(message)s")
+handler = logging.StreamHandler(sys.stderr)
+handler.setFormatter(formatter)
+logger.addHandler(handler)
+
+DEFAULT_COND_LOGP_TEMPLATE = (
+    "<|endoftext|>{document}\n\n---\n\nThe above passage is related to: {query}"
+)
+SCORE_MULTIPLIER = 100.0
+
+
+class SearchScorer:
+    def __init__(
+        self, *, document, query, cond_logp_template=DEFAULT_COND_LOGP_TEMPLATE
+    ):
+        self.document = document
+        self.query = query
+        self.cond_logp_template = cond_logp_template
+        self.context = self.cond_logp_template.format(
+            document=self.document, query=self.query
+        )
+
+    def get_context(self):
+        return self.context
+
+    def get_score(self, choice) -> float:
+        assert choice.text == self.context
+        logprobs: List[float] = choice.logprobs.token_logprobs
+        text = choice.logprobs.tokens
+        text_len = sum(len(token) for token in text)
+        if text_len != len(self.context):
+            raise RuntimeError(
+                f"text_len={text_len}, len(self.context)={len(self.context)}"
+            )
+        total_len = 0
+        last_used = len(text)
+        while total_len < len(self.query):
+            assert last_used > 0
+            total_len += len(text[last_used - 1])
+            last_used -= 1
+        max_len = len(self.context) - self.cond_logp_template.index("{document}")
+        assert total_len + len(self.document) <= max_len
+        logits: List[float] = logprobs[last_used:]
+        return sum(logits) / len(logits) * SCORE_MULTIPLIER
+
+
+def semantic_search(engine, query, documents):
+    # add empty document as baseline
+    scorers = [
+        SearchScorer(document=document, query=query) for document in [""] + documents
+    ]
+    completion = openai.Completion.create(
+        engine=engine,
+        prompt=[scorer.get_context() for scorer in scorers],
+        max_tokens=0,
+        logprobs=0,
+        echo=True,
+    )
+    # put the documents back in order so we can easily normalize by the empty document 0
+    data = sorted(completion.choices, key=lambda choice: choice.index)
+    assert len(scorers) == len(
+        data
+    ), f"len(scorers)={len(scorers)} len(data)={len(data)}"
+    scores = [scorer.get_score(choice) for scorer, choice in zip(scorers, data)]
+    # subtract score for empty document
+    scores = [score - scores[0] for score in scores][1:]
+    data = {
+        "object": "list",
+        "data": [
+            {
+                "object": "search_result",
+                "document": document_idx,
+                "score": round(score, 3),
+            }
+            for document_idx, score in enumerate(scores)
+        ],
+        "model": completion.model,
+    }
+    return data
+
+
+def main():
+    parser = argparse.ArgumentParser(description=None)
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        action="count",
+        dest="verbosity",
+        default=0,
+        help="Set verbosity.",
+    )
+    parser.add_argument("-e", "--engine", default="ada")
+    parser.add_argument("-q", "--query", required=True)
+    parser.add_argument("-d", "--document", action="append", required=True)
+    parser.add_argument("-s", "--server-side", action="store_true")
+    args = parser.parse_args()
+
+    if args.verbosity == 1:
+        logger.setLevel(logging.INFO)
+    elif args.verbosity >= 2:
+        logger.setLevel(logging.DEBUG)
+
+    if args.server_side:
+        resp = openai.Engine(id=args.engine).search(
+            query=args.query, documents=args.document
+        )
+        resp = resp.to_dict_recursive()
+        print(f"[server-side semantic search] {resp}")
+    else:
+        resp = semantic_search(args.engine, query=args.query, documents=args.document)
+        print(f"[client-side semantic search] {resp}")
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/openai/__init__.py b/openai/__init__.py
@@ -0,0 +1,32 @@
+from __future__ import absolute_import, division, print_function
+
+import os
+
+# OpenAI Python bindings.
+#
+# Originally forked from the MIT-licensed Stripe Python bindings.
+
+# Configuration variables
+
+api_key = os.environ.get("OPENAI_API_KEY")
+organization = os.environ.get("OPENAI_ORGANIZATION")
+client_id = None
+api_base = os.environ.get("OPENAI_API_BASE", "https://api.openai.com")
+file_api_base = None
+api_version = None
+verify_ssl_certs = True
+proxy = None
+default_http_client = None
+app_info = None
+enable_telemetry = True
+max_network_retries = 0
+ca_bundle_path = os.path.join(os.path.dirname(__file__), "data/ca-certificates.crt")
+debug = False
+
+# Set to either 'debug' or 'info', controls console logging
+log = None
+
+# API resources
+from openai.api_resources import *  # noqa
+
+from openai.error import OpenAIError, APIError, InvalidRequestError