Merge pull request #122 from snexus/update-docs

Update configuration template and installation docs
snexus · Oct 28, 2024 · daac0ac · daac0ac
2 parents 772a937 + 124fc9e
commit daac0ac
Show file tree

Hide file tree

Showing 2 changed files with 15 additions and 2 deletions.
diff --git a/docs/installation.rst b/docs/installation.rst
@@ -26,12 +26,16 @@ Install Latest Version
     # Install packages using pip
     pip install pyllmsearch
 
-    # Optional - install packages using uv
+    # Optional dependencues for Azure parser
+    pip install "pyllmsearch[azureparser]"
+
+    # Preferred method (much faster) - install packages using uv
     pip install uv
     uv pip install pyllmsearch
 
 
 
+
 Install from source
 ===================
 
@@ -55,3 +59,6 @@ Install from source
 
     # Install the package
     pip install . # or `pip install -e .` for development
+    
+    # For Azure parser, install with optional dependencies
+    pip install ."[azureparser]"
diff --git a/sample_templates/generic/config_template.yaml b/sample_templates/generic/config_template.yaml
@@ -32,6 +32,7 @@ embeddings:
         remove_images: True # Remove image links
 
     # Optional setting
+    # For azuredoc support - pip install "pyllmsearch[azureparser]"
     pdf_table_parser: gmft # azuredoc
 
     # Optional setting
@@ -65,7 +66,12 @@ semantic_search:
 
   # Will ensure that context provided to LLM is less than max_char_size. Useful for locally hosted models and limited hardware. 
   # Reduce if out of CUDA memory.
-  max_char_size: 4096 
+  max_char_size: 16384 # Reduce if necessary for locally hosted LLMs
+
+  # Maximum number of text chunks to retrive for dense and sparse embeddings
+  # Total number of chunks is max_k * 2
+  max_k: 25
+
   query_prefix: "query: " # Often queries have to be prefixed for embedding models, such as e5
 
   hyde: