Skip to content

Commit

Permalink
Merge pull request #53 from martindevans/xml_docs_fixes
Browse files Browse the repository at this point in the history
XML docs fixes
  • Loading branch information
AsakusaRinne authored Jul 24, 2023
2 parents ae98fa1 + 2e76b79 commit d17fa99
Show file tree
Hide file tree
Showing 11 changed files with 47 additions and 17 deletions.
8 changes: 8 additions & 0 deletions LLama/Abstractions/ILLamaExecutor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ public interface ILLamaExecutor
/// The loaded model for this executor.
/// </summary>
public LLamaModel Model { get; }

/// <summary>
/// Infers a response from the model.
/// </summary>
Expand All @@ -24,6 +25,13 @@ public interface ILLamaExecutor
/// <returns></returns>
IEnumerable<string> Infer(string text, InferenceParams? inferenceParams = null, CancellationToken token = default);

/// <summary>
/// Asynchronously infers a response from the model.
/// </summary>
/// <param name="text">Your prompt</param>
/// <param name="inferenceParams">Any additional parameters</param>
/// <param name="token">A cancellation token.</param>
/// <returns></returns>
IAsyncEnumerable<string> InferAsync(string text, InferenceParams? inferenceParams = null, CancellationToken token = default);
}
}
7 changes: 5 additions & 2 deletions LLama/ChatSession.cs
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,9 @@ public virtual void LoadSession(string path)
/// <summary>
/// Get the response from the LLama model with chat histories.
/// </summary>
/// <param name="prompt"></param>
/// <param name="history"></param>
/// <param name="inferenceParams"></param>
/// <param name="cancellationToken"></param>
/// <returns></returns>
public IEnumerable<string> Chat(ChatHistory history, InferenceParams? inferenceParams = null, CancellationToken cancellationToken = default)
{
Expand All @@ -156,6 +157,7 @@ public IEnumerable<string> Chat(ChatHistory history, InferenceParams? inferenceP
/// </summary>
/// <param name="prompt"></param>
/// <param name="inferenceParams"></param>
/// <param name="cancellationToken"></param>
/// <returns></returns>
public IEnumerable<string> Chat(string prompt, InferenceParams? inferenceParams = null, CancellationToken cancellationToken = default)
{
Expand All @@ -176,8 +178,9 @@ public IEnumerable<string> Chat(string prompt, InferenceParams? inferenceParams
/// <summary>
/// Get the response from the LLama model with chat histories.
/// </summary>
/// <param name="prompt"></param>
/// <param name="history"></param>
/// <param name="inferenceParams"></param>
/// <param name="cancellationToken"></param>
/// <returns></returns>
public async IAsyncEnumerable<string> ChatAsync(ChatHistory history, InferenceParams? inferenceParams = null, [EnumeratorCancellation] CancellationToken cancellationToken = default)
{
Expand Down
23 changes: 20 additions & 3 deletions LLama/Common/ChatHistory.cs
Original file line number Diff line number Diff line change
@@ -1,16 +1,33 @@
using System;
using System.Collections.Generic;
using System.Text;
using System.Collections.Generic;

namespace LLama.Common
{
/// <summary>
/// Role of the message author, e.g. user/assistant/system
/// </summary>
public enum AuthorRole
{
/// <summary>
/// Role is unknown
/// </summary>
Unknown = -1,

/// <summary>
/// Message comes from a "system" prompt, not written by a user or language model
/// </summary>
System = 0,

/// <summary>
/// Message comes from the user
/// </summary>
User = 1,

/// <summary>
/// Messages was generated by the language model
/// </summary>
Assistant = 2,
}

// copy from semantic-kernel
/// <summary>
/// The chat history class
Expand Down
2 changes: 1 addition & 1 deletion LLama/Common/FixedSizeQueue.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ public FixedSizeQueue(int size)
}

/// <summary>
/// Fill the quene with the data. Please ensure that data.Count <= size
/// Fill the quene with the data. Please ensure that data.Count &lt;= size
/// </summary>
/// <param name="size"></param>
/// <param name="data"></param>
Expand Down
2 changes: 0 additions & 2 deletions LLama/Exceptions/RuntimeError.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
using System;
using System.Collections.Generic;
using System.Text;

namespace LLama.Exceptions
{
Expand Down
8 changes: 6 additions & 2 deletions LLama/LLamaQuantizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,12 @@ public static class LLamaQuantizer
/// <param name="dstFilename">The path to save the quantized model.</param>
/// <param name="ftype">The type of quantization.</param>
/// <param name="nthread">Thread to be used during the quantization. By default it's the physical core number.</param>
/// <param name="allowRequantize"></param>
/// <param name="quantizeOutputTensor"></param>
/// <returns>Whether the quantization is successful.</returns>
/// <exception cref="ArgumentException"></exception>
public static unsafe bool Quantize(string srcFileName, string dstFilename, LLamaFtype ftype, int nthread = -1, bool allowRequantize = true,
bool quantizeOutputTensor = false)
bool quantizeOutputTensor = false)
{
if (!ValidateFtype(ftype))
{
Expand All @@ -45,10 +47,12 @@ public static unsafe bool Quantize(string srcFileName, string dstFilename, LLama
/// <param name="dstFilename">The path to save the quantized model.</param>
/// <param name="ftype">The type of quantization.</param>
/// <param name="nthread">Thread to be used during the quantization. By default it's the physical core number.</param>
/// <param name="allowRequantize"></param>
/// <param name="quantizeOutputTensor"></param>
/// <returns>Whether the quantization is successful.</returns>
/// <exception cref="ArgumentException"></exception>
public static bool Quantize(string srcFileName, string dstFilename, string ftype, int nthread = -1, bool allowRequantize = true,
bool quantizeOutputTensor = false)
bool quantizeOutputTensor = false)
{
return Quantize(srcFileName, dstFilename, StringToFtype(ftype), nthread, allowRequantize, quantizeOutputTensor);
}
Expand Down
4 changes: 2 additions & 2 deletions LLama/LLamaTransforms.cs
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,8 @@ public class KeywordTextOutputStreamTransform : ITextStreamTransform
/// <param name="keywords">Keywords that you want to remove from the response.</param>
/// <param name="redundancyLength">The extra length when searching for the keyword. For example, if your only keyword is "highlight",
/// maybe the token you get is "\r\nhighligt". In this condition, if redundancyLength=0, the token cannot be successfully matched because the length of "\r\nhighligt" (10)
/// has already exceeded the maximum length of the keywords (8). On the contrary, setting redundancyLengyh >= 2 leads to successful match.
/// The larger the redundancyLength is, the lower the processing speed. But as an experience, it won't introduce too much performance impact when redundancyLength <= 5 </param>
/// has already exceeded the maximum length of the keywords (8). On the contrary, setting redundancyLengyh &gt;= 2 leads to successful match.
/// The larger the redundancyLength is, the lower the processing speed. But as an experience, it won't introduce too much performance impact when redundancyLength &lt;= 5 </param>
/// <param name="removeAllMatchedTokens">If set to true, when getting a matched keyword, all the related tokens will be removed. Otherwise only the part of keyword will be removed.</param>
public KeywordTextOutputStreamTransform(IEnumerable<string> keywords, int redundancyLength = 3, bool removeAllMatchedTokens = false)
{
Expand Down
2 changes: 1 addition & 1 deletion LLama/Native/LLamaModelQuantizeParams.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ namespace LLama.Native
public struct LLamaModelQuantizeParams
{
/// <summary>
/// number of threads to use for quantizing, if <=0 will use std::thread::hardware_concurrency()
/// number of threads to use for quantizing, if &lt;=0 will use std::thread::hardware_concurrency()
/// </summary>
public int nthread;
/// <summary>
Expand Down
3 changes: 1 addition & 2 deletions LLama/Native/NativeApi.Quantize.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@ public partial class NativeApi
/// </summary>
/// <param name="fname_inp"></param>
/// <param name="fname_out"></param>
/// <param name="ftype"></param>
/// <param name="nthread">how many threads to use. If <=0, will use std::thread::hardware_concurrency(), else the number given</param>
/// <param name="param"></param>
/// <remarks>not great API - very likely to change</remarks>
/// <returns>Returns 0 on success</returns>
[DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
Expand Down
4 changes: 2 additions & 2 deletions LLama/OldVersion/ChatSession.cs
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,9 @@ public ChatSession<T> WithPromptFile(string promptFilename, string encoding = "U
}

/// <summary>
/// Set the keyword to split the return value of chat AI.
/// Set the keywords to split the return value of chat AI.
/// </summary>
/// <param name="humanName"></param>
/// <param name="antiprompt"></param>
/// <returns></returns>
public ChatSession<T> WithAntiprompt(string[] antiprompt)
{
Expand Down
1 change: 1 addition & 0 deletions LLama/OldVersion/LLamaModel.cs
Original file line number Diff line number Diff line change
Expand Up @@ -796,6 +796,7 @@ public IEnumerable<string> Call(string text, string encoding = "UTF-8")
}
}

/// <inheritdoc />
public void Dispose()
{
_ctx.Dispose();
Expand Down

0 comments on commit d17fa99

Please sign in to comment.