snexus · snexus · Mar 29, 2024 · Mar 24, 2024 · Mar 24, 2024 · Mar 24, 2024
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -0,0 +1,45 @@
+name: Package
+
+on:
+  - push
+
+jobs:
+  build_source_dist:
+      name: Build source distribution
+      runs-on: ubuntu-latest
+      steps:
+        - uses: actions/checkout@v3
+
+        - uses: actions/[email protected]
+          with:
+            python-version: "3.11"
+
+        - name: Install build
+          run: |
+            python -m pip install --upgrade pip
+            pip install build setuptools wheel twine
+
+        - name: Run build
+          run: python -m build
+
+        - uses: actions/upload-artifact@v3
+          with:
+            path: ./dist/*
+
+  publish:
+    name: Publish package
+    if: startsWith(github.event.ref, 'refs/tags/v')
+    needs:
+      - build_source_dist
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/download-artifact@v3
+        with:
+          name: artifact
+          path: ./dist
+
+      - uses: pypa/[email protected]
+        with:
+          user: __token__
+          password: ${{ secrets.PYPI_API_TOKEN }}
diff --git a/.gitignore b/.gitignore
@@ -12,6 +12,7 @@ temp_data/
 *.npz
 *.db
 sample_templates/obsidian_conf_test.yaml
+.venv2
 
 # Distribution / packaging
 .Python
@@ -35,6 +36,7 @@ share/python-wheels/
 MANIFEST
 .env.back
 templates-test-local
+src/llmsearch/_version.py
 
 # PyInstaller
 #  Usually these files are written by a python script from a template
@@ -45,6 +47,7 @@ templates-test-local
 # Installer logs
 pip-log.txt
 pip-delete-this-directory.txt
+_version.py
 
 # Unit test / coverage reports
 htmlcov/

diff --git a/.readthedocs.yaml b/.readthedocs.yaml
@@ -0,0 +1,27 @@
+# Read the Docs configuration file for Sphinx projects
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+# Required
+version: 2
+
+# Set the OS, Python version and other tools you might need
+build:
+  os: ubuntu-22.04
+  tools:
+    python: "3.11"
+    # You can also specify other tool versions:
+    # nodejs: "20"
+    # rust: "1.70"
+    # golang: "1.20"
+
+# Build documentation in the "docs/" directory with Sphinx
+sphinx:
+  configuration: docs/conf.py
+  # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs
+  # builder: "dirhtml"
+  # Fail on all warnings to avoid broken references
+  # fail_on_warning: true
+
+python:
+  install:
+    - requirements: docs/requirements.txt
diff --git a/README.md b/README.md
@@ -1,8 +1,8 @@
 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://githubtocolab.com/snexus/llm-search/blob/main/notebooks/llmsearch_google_colab_demo.ipynb)
 
-# LLM Search
+# pyLLMSeach - Advanced RAG, ready to use
 
-The purpose of this package is to offer a convenient question-answering system with a simple YAML-based configuration that enables interaction with multiple collections of local documents. Special attention is given to improvements in various components of the system **in addition to LLMs** - better document parsing, hybrid search, HyDE enabled search, deep linking, re-ranking, the ability to customize embeddings, and more. The package is designed to work with custom Large Language Models (LLMs) – whether from OpenAI or installed locally.
+The purpose of this package is to offer a convenient question-answering (RAG) system with a simple YAML-based configuration that enables interaction with multiple collections of local documents. Special attention is given to improvements in various components of the system **in addition to basic LLN based RAGs** - better document parsing, hybrid search, HyDE enabled search, deep linking, re-ranking, the ability to customize embeddings, and more. The package is designed to work with custom Large Language Models (LLMs) – whether from OpenAI or installed locally.
 
 ## Features
 
@@ -55,146 +55,9 @@ The purpose of this package is to offer a convenient question-answering system w
 
 ![Demo](media/llmsearch-demo-v2.gif)
 
-## Prerequisites
 
-* Tested on Ubuntu 22.04.
-* Nvidia GPU is required for embeddings generation and usage of locally hosted models.
-* Python 3.10, including dev packages (`python3-dev` on Ubuntu)
-* Nvidia CUDA Toolkit (tested with v11.7) - https://developer.nvidia.com/cuda-toolkit
-* To interact with OpenAI models, create `.env` in the root directory of the repository, containing OpenAI API key. A template for the `.env` file is provided in `.env_template`
-* For parsing `.epub` documents, Pandoc is required - https://pandoc.org/installing.html
+## Documentation
 
+[Browse Documentation](https://llm-search.readthedocs.io/en/latest/)
 
-## Automatic virtualenv based installation on Linux
 
-```bash
-git clone https://github.com/snexus/llm-search.git
-cd llm-search
-
-# Create a new environment
-python3 -m venv .venv 
-
-# Activate new environment
-source .venv/bin/activate
-
-./install_linux.sh
-```
-
-## Manual virtualenv based installation
-
-```bash
-git clone https://github.com/snexus/llm-search.git
-cd llm-search
-
-# Create a new environment
-python3 -m venv .venv 
-
-# Activate new environment
-source .venv/bin/activate
-
-# Set variables for llama-cpp to compile with CUDA.
-
-# Assuming Nvidia CUDA Toolkit is installed and pointing to `usr/local/cuda` on Ubuntu
-
-source ./setvars.sh 
-
-# Install newest stable torch for CUDA 11.x
-pip3 install torch torchvision --index-url https://download.pytorch.org/whl/cu118
-
-# Install the package
-pip install . # or `pip install -e .` for development
-```
-
-# Quickstart
-
-## 1) Create a configuration file for document base
-
-
-To create a configuration file in YAML format, you can refer to the example template provided in `sample_templates/generic/config_template.yaml`.
-
-
-The sample configuration file specifies how to load one of the supported locally hosted models, downloaded from Huggingface - 
-https://huggingface.co/TheBloke/airoboros-l2-13B-gpt4-1.4.1-GGUF/resolve/main/airoboros-l2-13b-gpt4-1.4.1.Q4_K_M.gguf
-
-As an alternative uncomment the llm section for OpenAI model.
-
-[Sample configuration template](sample_templates/generic/config_template.yaml)
-
-
-## 2) Create a configuration file for model
-
-To create a configuration file in YAML format, you can refer to the example templates provided in `sample_templates/llm`.
-
-The sample configuration file in [LLamacpp Model Template](sample_templates/llm/llamacpp.yaml)
-specifies how to load one of the supported locally hosted models via LLamaCPP, downloaded from Huggingface - 
-https://huggingface.co/TheBloke/airoboros-l2-13B-gpt4-1.4.1-GGUF/resolve/main/airoboros-l2-13b-gpt4-1.4.1.Q4_K_M.gguf
-
-As an alternative to other templates provided, for example OpenAI or LiteLLM.
-
-
-
-## 3) Create document embeddings
-
-To create embeddings from documents, follow these steps:
-
-1. Open the command line interface.
-2. Run the following command: 
-
-```bash
-llmsearch index create -c /path/to/config.yaml
-```
-
-Based on the example configuration above, executing this command will scan a folder containing markdown and pdf files (`/path/to/docments`) excluding the files in `subfolder1` and `subfolder2` and generate a dense embeddings database in the `/path/to/embedding/folder` directory. Additionally, a local cache folder (`/path/to/cache/folder`) will be utilized to store embedding models, LLM models, and tokenizers.
-
-
-The default vector database for dense is ChromaDB, and default embedding model is `e5-large-v2` (unless specified otherwise using `embedding_model` section such as above), which is known for its high performance. You can find more information about this and other embedding models at [https://huggingface.co/spaces/mteb/leaderboard](https://huggingface.co/spaces/mteb/leaderboard).
-
-In addition to dense embeddings, sparse embedding will be generated in `/path/to/embedding/folder/splade` using SPLADE algorithm. Both dense and sparse embeddings will be used for context search.
-
-## 4) Update document embeddings
-
-When new files are added or existing documents are changed, follow these steps to update the embeddings:
-
-```bash
-llmsearch index update -c /path/to/config.yaml
-```
-
-Executing this command will detect changed or new files (based on MD5 hash) and will incrementally update only the changes, without the need to rescan the documents from scratch.
-
-## 5) Interact with the documents
-
-To interact with the documents using one of the supported LLMs, follow these steps:
-
-1. Open the command line interface.
-2. Run one of the following commands: 
-
-* Web interface:
-
-Scans the configs and allows to switch between them.
-
-```bash
-llmsearch interact webapp -c /path/to/config_folder -m sample_templates/llm/llamacpp.yaml
-```
-
-* CLI interface:
-
-```bash
-llmsearch interact llm -c ./sample_templates/obsidian_conf.yaml -m ./sample_templates/llm/llamacpp.yaml
-
-```
-
-Based on the example configuration provided in the sample configuration file, the following actions will take place:
-
-- The system will load a quantized GGUF model using the LlamaCpp framework. The model file is located at `/storage/llm/cache/airoboros-l2-13b-gpt4-1.4.1.Q4_K_M.gguf`
-- Based on the model config, the model will be partially loaded into the GPU (30 layers) and partially into the CPU (remaining layers). The `n_gpu_layers` parameter can be adjusted according to the hardware limitations.
-- Additional LlamaCpp specific parameters specified in `model_kwargs` from the `llm->params` section will be passed to the model.
-- The system will query the embeddings database using hybrid search algorithm using sparse and dense embeddings. It will provide the most relevant context from different documents, up to a maximum context size of 4096 characters (`max_char_size` in `semantic_search`).
-- When displaying paths to relevant documents, the system will replace the part of the path `/home/snexus/projects/knowledge-base` with `obsidian://open?vault=knowledge-base&file=`. This replacement is based on the settings `substring_search` and `substring_replace` in `semantic_search->replace_output_path`. 
-
-## API (experimental)
-
-To launch an api, supply a path config file in the `FASTAPI_LLM_CONFIG` environment variable and launch `llmsearchapi` 
-
-```bash
-FASTAPI_LLM_CONFIG="/path/to/config.yaml" llmsearchapi
-```
diff --git a/docs/Makefile b/docs/Makefile
@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = .
+BUILDDIR      = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/docs/conf.py b/docs/conf.py
@@ -0,0 +1,38 @@
+# Configuration file for the Sphinx documentation builder.
+#
+# For the full list of built-in configuration values, see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Project information -----------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
+
+import os
+import sys
+
+sys.path.insert(0, os.path.abspath('../src'))
+
+project = 'LLM Search'
+copyright = '2024, Denis L.'
+author = 'Denis L.'
+
+# -- General configuration ---------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
+
+extensions = ["sphinx.ext.duration", "sphinx.ext.autodoc", "sphinxcontrib.autodoc_pydantic" ]
+
+templates_path = ['_templates']
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+
+
+
+# -- Options for HTML output -------------------------------------------------
+# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
+
+html_theme = "sphinx_rtd_theme"
+html_static_path = ['_static']
+
+autodoc_pydantic_model_show_config_summary = False
+autodoc_pydantic_show_json = False
+autodoc_pydantic_model_show_validator_members = False
+autodoc_pydantic_model_show_validator_summary = False
+autodoc_pydantic_field_list_validators = False
diff --git a/docs/configure_doc.rst b/docs/configure_doc.rst
@@ -0,0 +1,20 @@
+
+Documents and Embedding Config
+==============================
+
+
+Configuration example
+-----------------------
+
+.. literalinclude:: ../sample_templates/generic/config_template.yaml
+   :language: yaml
+
+
+Document Config Reference
+-------------------------
+
+   .. automodule:: llmsearch.config
+      :members:
+      :exclude-members: CustomDocumentExtension, get_doc_with_model_config, SuffixAppend, LLMConfig
+
+
diff --git a/docs/configure_model.rst b/docs/configure_model.rst
@@ -0,0 +1,43 @@
+
+LLM Model Config
+================
+
+
+Configuration examples
+----------------------
+
+OpenAI
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+For OpenAI configuration, create .env files containing the API key
+
+.. literalinclude:: ../sample_templates/llm/openai.yaml
+   :language: yaml
+
+llamacpp
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. literalinclude:: ../sample_templates/llm/llamacpp.yaml
+   :language: yaml
+
+Ollama + Litellm
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. literalinclude:: ../sample_templates/llm/litellm.yaml
+   :language: yaml
+
+
+Huggingface
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. literalinclude:: ../sample_templates/llm/huggingface.yaml
+   :language: yaml
+
+
+Reference
+---------
+
+
+.. automodule:: llmsearch.models.config
+   :members:
+