Change Google colab notebooks to latest supported GGUF format

snexus · Sep 5, 2023 · 79bf370 · 79bf370
1 parent 9b80a52
commit 79bf370
Show file tree

Hide file tree

Showing 4 changed files with 48 additions and 48 deletions.
diff --git a/README.md b/README.md
@@ -25,15 +25,16 @@ The purpose of this package is to offer a convenient question-answering system w
 * Supports the "Retrieve and Re-rank" strategy for semantic search, see - https://www.sbert.net/examples/applications/retrieve_rerank/README.html.
 
 * Allows interaction with embedded documents, supporting the following models and methods (including locally hosted):
-    * OpenAI models (ChatGPT 3.5/4).
+    * OpenAI models (ChatGPT 3.5/4 and Azure OpenAI).
     * HuggingFace models.
-    * GGML models through LlamaCpp.
+    * Llama cpp supported models - for full list see https://github.com/ggerganov/llama.cpp#description
     * AutoGPTQ models (temporarily disabled due to broken dependencies).
 
 * Other features
     * Simple CLI and web interfaces.
     * Deep linking into document sections - jump to an individual PDF page or a header in a markdown file.
     * Ability to save responses to an offline database for future analysis.
+    * Experimental API
 
 
 ## Demo

diff --git a/notebooks/llmsearch_google_colab_demo.ipynb b/notebooks/llmsearch_google_colab_demo.ipynb
@@ -8,15 +8,14 @@
       "source": [
         "# LLMSearch Google Colab Demo\n",
         "\n",
-        "This notebook was tested to run the following 13B model - https://huggingface.co/TheBloke/WizardLM-13B-Uncensored-GGML/tree/main\n",
-        "\n",
-        "The memory consumption given the configuration file below is almost maximum on free tier of Google Colab.\n",
+        "This notebook was tested to run the following 13B model - https://huggingface.co/TheBloke/airoboros-l2-13B-gpt4-1.4.1-GGUF\n",
         "\n",
         "In case of memory errors, tweak the config to offload some layers to CPU, or try a smaller model.\n",
         "\n",
         "## Instuctions\n",
         "\n",
         "* Upload or generate some documents (check supported format in README.md) in `sample_docs` folder.\n",
+        "* Run the notebook.\n",
         "* Optional - tweak configuration file to point to a different model\n"
       ]
     },
@@ -83,7 +82,7 @@
         "llm:\n",
         " type: llamacpp\n",
         " params:\n",
-        "   model_path: /content/llm/models/wizardLM-13B-Uncensored.ggmlv3.q4_K_M.bin\n",
+        "   model_path: /content/llm/models/airoboros-l2-13b-gpt4-1.4.1.Q4_K_M.gguf\n",
         "   prompt_template: |\n",
         "         ### Instruction:\n",
         "         Use the following pieces of context to provide detailed answer the question at the end. If answer isn't in the context, say that you don't know, don't try to make up an answer.\n",
@@ -160,7 +159,8 @@
         "\n",
         "\n",
         "cd llm/models\n",
-        "wget https://huggingface.co/TheBloke/WizardLM-13B-Uncensored-GGML/resolve/main/wizardLM-13B-Uncensored.ggmlv3.q4_K_M.bin\n"
+        "wget https://huggingface.co/TheBloke/airoboros-l2-13B-gpt4-1.4.1-GGUF/resolve/main/airoboros-l2-13b-gpt4-1.4.1.Q4_K_M.gguf\n",
+        "\n"
       ]
     },
     {

diff --git a/sample_templates/llm/llamacpp.yaml b/sample_templates/llm/llamacpp.yaml
@@ -1,28 +1,27 @@
 # This file contains a configuration section relevant to LLM, not the entire config
 
 llm:
-  type: llamacpp
-  params:
-    model_path: /storage/llm/cache/WizardLM-13B-1.0-GGML/WizardLM-13B-1.0.ggmlv3.q5_K_S.bin
-    prompt_template: |
-          ### Instruction:
-          Use the following pieces of context to answer the question at the end. If answer isn't in the context, say that you don't know, don't try to make up an answer.
+ type: llamacpp
+ params:
+   model_path: /storage/llm/cache/airoboros/airoboros-l2-13b-gpt4-1.4.1.Q4_K_M.gguf
+   prompt_template: |
+         ### Instruction:
+         Use the following pieces of context to provide detailed answer the question at the end. If answer isn't in the context, say that you don't know, don't try to make up an answer.
 
-          ### Context:
-          ---------------
-          {context}
-          ---------------
+         ### Context:
+         ---------------
+         {context}
+         ---------------
 
-          ### Question: {question}
-          ### Response:
-    model_init_params:
-      n_ctx: 1024
-      n_batch: 512
-      n_gpu_layers: 30
+         ### Question: {question}
+         ### Response:
+   model_init_params:
+     n_ctx: 1512
+     n_batch: 512
+     n_gpu_layers: 25
 
-    model_kwargs:
-      max_tokens: 512
-      top_p: 0.1
-      top_k: 40
-      temperature: 0.7
-      mirostat_mode: 0
+   model_kwargs:
+     max_tokens: 512
+     top_p: 0.1
+     top_k: 40
+     temperature: 0.2
diff --git a/sample_templates/obsidian_conf.yaml b/sample_templates/obsidian_conf.yaml
@@ -58,49 +58,49 @@ semantic_search:
 persist_response_db_path: responses_test.db
 
 
-llm:
-   type: openai
-   params:
-     prompt_template: |
-       Contex information is provided below. Given only the context and not prior knowledge, provide detailed answer to the question and references to the provided context. If answer isn't in the context, say you don't know.
+# llm:
+#    type: openai
+#    params:
+#      prompt_template: |
+#        Contex information is provided below. Given only the context and not prior knowledge, provide detailed answer to the question and references to the provided context. If answer isn't in the context, say you don't know.
 
-         ### Context:
-         ---------------------
-         {context}
-         ---------------------
+#          ### Context:
+#          ---------------------
+#          {context}
+#          ---------------------
 
-         ### Question: {question}
-     model_kwargs:
-       temperature: 0.0
-       model_name: gpt-3.5-turbo
+#          ### Question: {question}
+#      model_kwargs:
+#        temperature: 0.0
+#        model_name: gpt-3.5-turbo
 
 
-#llm:
+# llm:
 #  type: llamacpp
 #  params:
-#    model_path: /storage/llm/cache/WizardLM-13B-1.0-GGML/WizardLM-13B-1.0.ggmlv3.q5_K_S.bin
+#    model_path: /storage/llm/cache/airoboros/airoboros-l2-13b-gpt4-1.4.1.Q4_K_M.gguf
 #    prompt_template: |
 #          ### Instruction:
 #          Use the following pieces of context to provide detailed answer the question at the end. If answer isn't in the context, say that you don't know, don't try to make up an answer.
-#
+
 #          ### Context:
 #          ---------------
 #          {context}
 #          ---------------
-#
+
 #          ### Question: {question}
 #          ### Response:
 #    model_init_params:
 #      n_ctx: 1512
 #      n_batch: 512
 #      n_gpu_layers: 25
-#
+
 #    model_kwargs:
 #      max_tokens: 512
 #      top_p: 0.1
 #      top_k: 40
 #      temperature: 0.2
-#      # mirostat_mode: 1
+     # mirostat_mode: 1
 #