Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

June 2024 Binary Update #751

Merged
merged 3 commits into from
Jun 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion LLama.Benchmark/LLamaExecutorBenchmark/Prefill.cs
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ public void GlobalSetup()
{
var showLLamaCppLogs = true;
NativeLibraryConfig
.Instance
.All
.WithLogCallback((level, message) =>
{
if (showLLamaCppLogs)
Expand Down
2 changes: 1 addition & 1 deletion LLama.Experimental/LLama.Experimental.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
<langversion>12</langversion>
<RootNamespace>LLama</RootNamespace>

<Version>0.12.0</Version>
<Version>0.13.0</Version>
<Authors>Rinne</Authors>
<Company>SciSharp STACK</Company>
<GeneratePackageOnBuild>true</GeneratePackageOnBuild>
Expand Down
2 changes: 1 addition & 1 deletion LLama.KernelMemory/LLamaSharp.KernelMemory.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
<TargetFrameworks>net6.0;net8.0</TargetFrameworks>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<Version>0.12.0</Version>
<Version>0.13.0</Version>
<Authors>Xbotter</Authors>
<Company>SciSharp STACK</Company>
<GeneratePackageOnBuild>true</GeneratePackageOnBuild>
Expand Down
7 changes: 1 addition & 6 deletions LLama.KernelMemory/LlamaSharpConfig.cs
Original file line number Diff line number Diff line change
@@ -1,10 +1,5 @@
using LLama.Common;
using LLama.Common;
using LLama.Native;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace LLamaSharp.KernelMemory
{
Expand Down
2 changes: 1 addition & 1 deletion LLama.KernelMemory/LlamaSharpTextGenerator.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using LLama;
using LLama;
using LLama.Common;
using LLama.Native;
using Microsoft.KernelMemory.AI;
Expand Down
4 changes: 2 additions & 2 deletions LLama.SemanticKernel/LLamaSharp.SemanticKernel.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>

<Version>0.12.0</Version>
<Version>0.13.0</Version>
<Authors>Tim Miller, Xbotter</Authors>
<Company>SciSharp STACK</Company>
<GeneratePackageOnBuild>true</GeneratePackageOnBuild>
Expand Down Expand Up @@ -45,7 +45,7 @@
<ProjectReference Include="..\LLama\LLamaSharp.csproj" />
</ItemGroup>
<ItemGroup>
<InternalsVisibleTo Include="Llama.Unittest"/>
<InternalsVisibleTo Include="Llama.Unittest" />
</ItemGroup>

</Project>
1 change: 0 additions & 1 deletion LLama/Abstractions/IContextParams.cs
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,6 @@ public interface IContextParams
/// <summary>
/// defragment the KV cache if holes/size &gt; defrag_threshold, Set to &lt; 0 to disable (default)
/// defragment the KV cache if holes/size &gt; defrag_threshold, Set to <see langword="null"/> or &lt; 0 to disable (default)

/// </summary>
float? DefragThreshold { get; }

Expand Down
2 changes: 1 addition & 1 deletion LLama/Abstractions/IModelParams.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using System;
using System;
using System.Buffers;
using System.Collections;
using System.Collections.Generic;
Expand Down
24 changes: 4 additions & 20 deletions LLama/LLamaContext.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
using LLama.Common;
using System.Runtime.InteropServices;
using System.Threading.Tasks;
using LLama.Extensions;
using LLama.Abstractions;
using LLama.Sampling;
using Microsoft.Extensions.Logging;
Expand Down Expand Up @@ -56,33 +55,22 @@ public sealed class LLamaContext
/// </summary>
public Encoding Encoding { get; }

private uint _generationThreads;
private uint _batchThreads;

/// <summary>
/// Get or set the number of threads to use for generation
/// </summary>
public uint GenerationThreads
{
get => _generationThreads;
set
{
_generationThreads = value;
NativeHandle.SetThreads(_generationThreads, _batchThreads);
}
get => NativeHandle.GenerationThreads;
set => NativeHandle.GenerationThreads = value;
}

/// <summary>
/// Get or set the number of threads to use for batch processing
/// </summary>
public uint BatchThreads
{
get => _batchThreads;
set
{
_batchThreads = value;
NativeHandle.SetThreads(_generationThreads, _batchThreads);
}
get => NativeHandle.BatchThreads;
set => NativeHandle.BatchThreads = value;
}

/// <summary>
Expand Down Expand Up @@ -111,10 +99,6 @@ public LLamaContext(LLamaWeights model, IContextParams @params, ILogger? logger

@params.ToLlamaContextParams(out var lparams);
NativeHandle = SafeLLamaContextHandle.Create(model.NativeHandle, lparams);

// It's not possible to get these values from llama.cpp, store a copy of them here.
_generationThreads = lparams.n_threads;
_batchThreads = lparams.n_threads_batch;
}

/// <summary>
Expand Down
2 changes: 1 addition & 1 deletion LLama/LLamaSharp.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
<Platforms>AnyCPU;x64;Arm64</Platforms>
<AllowUnsafeBlocks>True</AllowUnsafeBlocks>

<Version>0.12.0</Version>
<Version>0.13.0</Version>
<Authors>Rinne, Martin Evans, jlsantiago and all the other contributors in https://github.com/SciSharp/LLamaSharp/graphs/contributors.</Authors>
<Company>SciSharp STACK</Company>
<GeneratePackageOnBuild>true</GeneratePackageOnBuild>
Expand Down
2 changes: 1 addition & 1 deletion LLama/Native/GPUSplitMode.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
namespace LLama.Native;
namespace LLama.Native;

/// <summary>
///
Expand Down
9 changes: 7 additions & 2 deletions LLama/Native/LLamaModelParams.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using System;
using System;
using System.Runtime.InteropServices;

namespace LLama.Native
Expand Down Expand Up @@ -27,7 +27,12 @@ public unsafe struct LLamaModelParams
/// <summary>
/// how to split layers across multiple GPUs (size: <see cref="NativeApi.llama_max_devices"/>)
/// </summary>
public float* tensor_split;
public float* tensor_split;

/// <summary>
/// comma separated list of RPC servers to use for offloading
/// </summary>
public byte* rpc_servers;

/// <summary>
/// called with a progress value between 0 and 1, pass NULL to disable. If the provided progress_callback
Expand Down
25 changes: 16 additions & 9 deletions LLama/Native/LLamaVocabPreType.cs
Original file line number Diff line number Diff line change
@@ -1,17 +1,24 @@
namespace LLama.Native;
namespace LLama.Native;

/// <summary>
///
/// </summary>
/// <remarks>llama_vocab_pre_type</remarks>
internal enum LLamaVocabPreType
{
LLAMA_VOCAB_PRE_TYPE_DEFAULT = 0,
LLAMA_VOCAB_PRE_TYPE_LLAMA3 = 1,
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM = 2,
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER = 3,
LLAMA_VOCAB_PRE_TYPE_FALCON = 4,
LLAMA_VOCAB_PRE_TYPE_MPT = 5,
LLAMA_VOCAB_PRE_TYPE_STARCODER = 6,
LLAMA_VOCAB_PRE_TYPE_GPT2 = 7,
Default = 0,

LLAMA3 = 1,
DEEPSEEK_LLM = 2,
DEEPSEEK_CODER = 3,
FALCON = 4,
MPT = 5,
STARCODER = 6,
GPT2 = 7,
REFACT = 8,
COMMAND_R = 9,
STABLELM2 = 10,
QWEN2 = 11,
OLMO = 12,
DBRX = 13,
}
2 changes: 1 addition & 1 deletion LLama/Native/Load/NativeLibraryMetadata.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@


namespace LLama.Native
{
/// <summary>
Expand Down
37 changes: 36 additions & 1 deletion LLama/Native/SafeLLamaContextHandle.cs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,24 @@ public sealed class SafeLLamaContextHandle
/// </summary>
public uint UBatchSize => llama_n_ubatch(this);

/// <summary>
/// Get or set the number of threads used for generation of a single token.
/// </summary>
public uint GenerationThreads
{
get => llama_n_threads(this);
set => llama_set_n_threads(this, value, BatchThreads);
}

/// <summary>
/// Get or set the number of threads used for prompt and batch processing (multiple token).
/// </summary>
public uint BatchThreads
{
get => llama_n_threads_batch(this);
set => llama_set_n_threads(this, GenerationThreads, value);
}

/// <summary>
/// Get the model which this context is using
/// </summary>
Expand Down Expand Up @@ -157,6 +175,22 @@ static SafeLLamaContextHandle()
[DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
private static extern void llama_set_n_threads(SafeLLamaContextHandle ctx, uint n_threads, uint n_threads_batch);

/// <summary>
/// Get the number of threads used for generation of a single token.
/// </summary>
/// <param name="ctx"></param>
/// <returns></returns>
[DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
private static extern uint llama_n_threads(SafeLLamaContextHandle ctx);

/// <summary>
/// Get the number of threads used for prompt and batch processing (multiple token).
/// </summary>
/// <param name="ctx"></param>
/// <returns></returns>
[DllImport(NativeApi.libraryName, CallingConvention = CallingConvention.Cdecl)]
private static extern uint llama_n_threads_batch(SafeLLamaContextHandle ctx);

/// <summary>
/// Token logits obtained from the last call to llama_decode
/// The logits for the last token are stored in the last row
Expand Down Expand Up @@ -538,6 +572,7 @@ public void SetSeed(uint seed)
/// </summary>
/// <param name="threads">n_threads is the number of threads used for generation (single token)</param>
/// <param name="threadsBatch">n_threads_batch is the number of threads used for prompt and batch processing (multiple tokens)</param>
[Obsolete("Use `GenerationThreads` and `BatchThreads` properties")]
public void SetThreads(uint threads, uint threadsBatch)
{
llama_set_n_threads(this, threads, threadsBatch);
Expand Down Expand Up @@ -613,7 +648,7 @@ public int KvCacheCountTokens()
}

/// <summary>
/// Clear the KV cache
/// Clear the KV cache - both cell info is erased and KV data is zeroed
/// </summary>
public void KvCacheClear()
{
Expand Down
27 changes: 0 additions & 27 deletions LLama/Native/llama_vocab_pre_type.cs

This file was deleted.

Loading
Loading