diff --git a/docs/Examples/BatchDecoding.md b/docs/Examples/BatchDecoding.md new file mode 100644 index 000000000..0b03ed049 --- /dev/null +++ b/docs/Examples/BatchDecoding.md @@ -0,0 +1,170 @@ +# Batch decoding + +```cs +using System.Diagnostics; +using System.Text; +using LLama.Common; +using LLama.Native; +using LLama.Sampling; + +public class BatchedDecoding +{ + private const int n_parallel = 8; + private const int n_len = 32; + + public static async Task Run() + { + Console.Write("Please input your model path: "); + var modelPath = Console.ReadLine(); + + Console.WriteLine("Prompt (leave blank to select automatically):"); + var prompt = Console.ReadLine(); + if (string.IsNullOrWhiteSpace(prompt)) + prompt = "Not many people know that"; + + // Load model + var parameters = new ModelParams(modelPath); + + using var model = LLamaWeights.LoadFromFile(parameters); + + // Tokenize prompt + var prompt_tokens = model.Tokenize(prompt, true, false, Encoding.UTF8); + var n_kv_req = prompt_tokens.Length + (n_len - prompt_tokens.Length) * n_parallel; + + // Create a context + parameters.ContextSize = (uint)model.ContextSize; + parameters.BatchSize = (uint)Math.Max(n_len, n_parallel); + using var context = model.CreateContext(parameters); + + var n_ctx = context.ContextSize; + + // make sure the KV cache is big enough to hold all the prompt and generated tokens + if (n_kv_req > n_ctx) + { + await Console.Error.WriteLineAsync($"error: n_kv_req ({n_kv_req}) > n_ctx, the required KV cache size is not big enough\n"); + await Console.Error.WriteLineAsync(" either reduce n_parallel or increase n_ctx\n"); + return; + } + + var batch = new LLamaBatch(); + + // evaluate the initial prompt + batch.AddRange(prompt_tokens, 0, LLamaSeqId.Zero, true); + + if (await context.DecodeAsync(batch) != DecodeResult.Ok) + { + await Console.Error.WriteLineAsync("llama_decode failed"); + return; + } + + // assign the system KV cache to all parallel sequences + // this way, the parallel sequences will "reuse" the prompt tokens without having to copy them + for (var i = 1; i < n_parallel; ++i) + { + context.NativeHandle.KvCacheSequenceCopy((LLamaSeqId)0, (LLamaSeqId)i, 0, batch.TokenCount); + } + + if (n_parallel > 1) + { + Console.WriteLine(); + Console.WriteLine($"generating {n_parallel} sequences..."); + } + + // remember the batch index of the last token for each parallel sequence + // we need this to determine which logits to sample from + List i_batch = new(); + for (var i = 0; i < n_parallel; i++) + i_batch.Add(batch.TokenCount - 1); + + // Create per-stream decoder and sampler + var decoders = new StreamingTokenDecoder[n_parallel]; + var samplers = new ISamplingPipeline[n_parallel]; + for (var i = 0; i < n_parallel; i++) + { + decoders[i] = new StreamingTokenDecoder(context); + samplers[i] = new DefaultSamplingPipeline + { + Temperature = 0.1f + (float)i / n_parallel, + MinP = 0.25f, + }; + } + + var n_cur = batch.TokenCount; + var n_decode = 0; + + var timer = new Stopwatch(); + timer.Start(); + while (n_cur <= n_len) + { + batch.Clear(); + + for (var i = 0; i < n_parallel; i++) + { + // Skip completed streams + if (i_batch[i] < 0) + continue; + + // Use the sampling pipeline to select a token + var new_token_id = samplers[i].Sample( + context.NativeHandle, + context.NativeHandle.GetLogitsIth(i_batch[i]), + Array.Empty() + ); + + // Finish this stream early if necessary + if (new_token_id == model.EndOfSentenceToken || new_token_id == model.NewlineToken) + { + i_batch[i] = -1; + Console.WriteLine($"Completed Stream {i} early"); + continue; + } + + // Add this token to the decoder, so it will be turned into text + decoders[i].Add(new_token_id); + + i_batch[i] = batch.TokenCount; + + // push this new token for next evaluation + batch.Add(new_token_id, n_cur, (LLamaSeqId)i, true); + + n_decode++; + } + + // Check if all streams are finished + if (batch.TokenCount == 0) + { + break; + } + + n_cur++; + + // evaluate the current batch with the transformer model + if (await context.DecodeAsync(batch) != 0) + { + await Console.Error.WriteLineAsync("failed to eval"); + return; + } + } + + timer.Stop(); + Console.ForegroundColor = ConsoleColor.Yellow; + Console.WriteLine(); + Console.WriteLine($"Decoded {n_decode} tokens in {timer.ElapsedMilliseconds}ms"); + Console.WriteLine($"Rate: {n_decode / timer.Elapsed.TotalSeconds:##.000} tokens/second"); + + var index = 0; + foreach (var stream in decoders) + { + var text = stream.Read(); + + Console.ForegroundColor = ConsoleColor.Green; + Console.Write($"{index++}. {prompt}"); + Console.ForegroundColor = ConsoleColor.Red; + Console.WriteLine(text); + } + + Console.WriteLine("Press any key to exit demo"); + Console.ReadKey(true); + } +} +``` \ No newline at end of file diff --git a/docs/Examples/ChatChineseGB2312.md b/docs/Examples/ChatChineseGB2312.md new file mode 100644 index 000000000..a5a0defef --- /dev/null +++ b/docs/Examples/ChatChineseGB2312.md @@ -0,0 +1,125 @@ +# Chat Chinese + +```cs +using System.Text; +using LLama.Common; + +public class ChatChineseGB2312 +{ + private static string ConvertEncoding(string input, Encoding original, Encoding target) + { + byte[] bytes = original.GetBytes(input); + var convertedBytes = Encoding.Convert(original, target, bytes); + return target.GetString(convertedBytes); + } + + public static async Task Run() + { + // Register provider for GB2312 encoding + Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); + + Console.ForegroundColor = ConsoleColor.Yellow; + Console.WriteLine("This example shows how to use Chinese with gb2312 encoding, which is common in windows. It's recommended" + + " to use https://huggingface.co/hfl/chinese-alpaca-2-7b-gguf/blob/main/ggml-model-q5_0.gguf, which has been verified by LLamaSharp developers."); + Console.ForegroundColor = ConsoleColor.White; + + Console.Write("Please input your model path: "); + var modelPath = Console.ReadLine(); + + var parameters = new ModelParams(modelPath) + { + ContextSize = 1024, + Seed = 1337, + GpuLayerCount = 5, + Encoding = Encoding.UTF8 + }; + using var model = LLamaWeights.LoadFromFile(parameters); + using var context = model.CreateContext(parameters); + var executor = new InteractiveExecutor(context); + + ChatSession session; + if (Directory.Exists("Assets/chat-with-kunkun-chinese")) + { + Console.ForegroundColor = ConsoleColor.Yellow; + Console.WriteLine("Loading session from disk."); + Console.ForegroundColor = ConsoleColor.White; + + session = new ChatSession(executor); + session.LoadSession("Assets/chat-with-kunkun-chinese"); + } + else + { + var chatHistoryJson = File.ReadAllText("Assets/chat-with-kunkun-chinese.json"); + ChatHistory chatHistory = ChatHistory.FromJson(chatHistoryJson) ?? new ChatHistory(); + + session = new ChatSession(executor, chatHistory); + } + + session + .WithHistoryTransform(new LLamaTransforms.DefaultHistoryTransform("用户", "坤坤")); + + InferenceParams inferenceParams = new InferenceParams() + { + Temperature = 0.9f, + AntiPrompts = new List { "用户:" } + }; + + Console.ForegroundColor = ConsoleColor.Yellow; + Console.WriteLine("The chat session has started."); + + // show the prompt + Console.ForegroundColor = ConsoleColor.White; + Console.Write("用户:"); + Console.ForegroundColor = ConsoleColor.Green; + string userInput = Console.ReadLine() ?? ""; + + while (userInput != "exit") + { + // Convert the encoding from gb2312 to utf8 for the language model + // and later saving to the history json file. + userInput = ConvertEncoding(userInput, Encoding.GetEncoding("gb2312"), Encoding.UTF8); + + if (userInput == "save") + { + session.SaveSession("Assets/chat-with-kunkun-chinese"); + Console.ForegroundColor = ConsoleColor.Yellow; + Console.WriteLine("Session saved."); + } + else if (userInput == "regenerate") + { + Console.ForegroundColor = ConsoleColor.Yellow; + Console.WriteLine("Regenerating last response ..."); + + await foreach ( + var text + in session.RegenerateAssistantMessageAsync( + inferenceParams)) + { + Console.ForegroundColor = ConsoleColor.White; + + // Convert the encoding from utf8 to gb2312 for the console output. + Console.Write(ConvertEncoding(text, Encoding.UTF8, Encoding.GetEncoding("gb2312"))); + } + } + else + { + await foreach ( + var text + in session.ChatAsync( + new ChatHistory.Message(AuthorRole.User, userInput), + inferenceParams)) + { + Console.ForegroundColor = ConsoleColor.White; + Console.Write(text); + } + } + + Console.ForegroundColor = ConsoleColor.Green; + userInput = Console.ReadLine() ?? ""; + + Console.ForegroundColor = ConsoleColor.White; + } + } +} + +``` \ No newline at end of file diff --git a/docs/Examples/ChatSessionStripRoleName.md b/docs/Examples/ChatSessionStripRoleName.md index 785db83aa..030001541 100644 --- a/docs/Examples/ChatSessionStripRoleName.md +++ b/docs/Examples/ChatSessionStripRoleName.md @@ -13,24 +13,54 @@ public class ChatSessionStripRoleName public static void Run() { Console.Write("Please input your model path: "); - string modelPath = Console.ReadLine(); - var prompt = File.ReadAllText("Assets/chat-with-bob.txt").Trim(); - InteractiveExecutor ex = new(new LLamaModel(new ModelParams(modelPath, contextSize: 1024, seed: 1337, gpuLayerCount: 5))); - ChatSession session = new ChatSession(ex).WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform(new string[] { "User:", "Bob:" }, redundancyLength: 8)); + var modelPath = Console.ReadLine(); + + var parameters = new ModelParams(modelPath) + { + ContextSize = 1024, + Seed = 1337, + GpuLayerCount = 5 + }; + using var model = LLamaWeights.LoadFromFile(parameters); + using var context = model.CreateContext(parameters); + var executor = new InteractiveExecutor(context); + + var chatHistoryJson = File.ReadAllText("Assets/chat-with-bob.json"); + ChatHistory chatHistory = ChatHistory.FromJson(chatHistoryJson) ?? new ChatHistory(); + + ChatSession session = new(executor, chatHistory); + session.WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform( + new string[] { "User:", "Assistant:" }, + redundancyLength: 8)); + + InferenceParams inferenceParams = new InferenceParams() + { + Temperature = 0.9f, + AntiPrompts = new List { "User:" } + }; Console.ForegroundColor = ConsoleColor.Yellow; - Console.WriteLine("The chat session has started. The role names won't be printed."); - Console.ForegroundColor = ConsoleColor.White; + Console.WriteLine("The chat session has started."); - while (true) + // show the prompt + Console.ForegroundColor = ConsoleColor.Green; + string userInput = Console.ReadLine() ?? ""; + + while (userInput != "exit") { - foreach (var text in session.Chat(prompt, new InferenceParams() { Temperature = 0.6f, AntiPrompts = new List { "User:" } })) + await foreach ( + var text + in session.ChatAsync( + new ChatHistory.Message(AuthorRole.User, userInput), + inferenceParams)) { + Console.ForegroundColor = ConsoleColor.White; Console.Write(text); } Console.ForegroundColor = ConsoleColor.Green; - prompt = Console.ReadLine(); + userInput = Console.ReadLine() ?? ""; + Console.ForegroundColor = ConsoleColor.White; } } diff --git a/docs/Examples/ChatSessionWithHistory.md b/docs/Examples/ChatSessionWithHistory.md new file mode 100644 index 000000000..a07a269e3 --- /dev/null +++ b/docs/Examples/ChatSessionWithHistory.md @@ -0,0 +1,104 @@ +# Chat session with history + +```cs +using LLama.Common; + +namespace LLama.Examples.Examples; + +public class ChatSessionWithHistory +{ + public static async Task Run() + { + Console.Write("Please input your model path: "); + var modelPath = Console.ReadLine(); + + var parameters = new ModelParams(modelPath) + { + ContextSize = 1024, + Seed = 1337, + GpuLayerCount = 5 + }; + using var model = LLamaWeights.LoadFromFile(parameters); + using var context = model.CreateContext(parameters); + var executor = new InteractiveExecutor(context); + + ChatSession session; + if (Directory.Exists("Assets/chat-with-bob")) + { + Console.ForegroundColor = ConsoleColor.Yellow; + Console.WriteLine("Loading session from disk."); + Console.ForegroundColor = ConsoleColor.White; + + session = new ChatSession(executor); + session.LoadSession("Assets/chat-with-bob"); + } + else + { + var chatHistoryJson = File.ReadAllText("Assets/chat-with-bob.json"); + ChatHistory chatHistory = ChatHistory.FromJson(chatHistoryJson) ?? new ChatHistory(); + + session = new ChatSession(executor, chatHistory); + } + + session.WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform( + new string[] { "User:", "Assistant:" }, + redundancyLength: 8)); + + InferenceParams inferenceParams = new InferenceParams() + { + Temperature = 0.9f, + AntiPrompts = new List { "User:" } + }; + + Console.ForegroundColor = ConsoleColor.Yellow; + Console.WriteLine("The chat session has started."); + + // show the prompt + Console.ForegroundColor = ConsoleColor.Green; + string userInput = Console.ReadLine() ?? ""; + + while (userInput != "exit") + { + if (userInput == "save") + { + session.SaveSession("Assets/chat-with-bob"); + Console.ForegroundColor = ConsoleColor.Yellow; + Console.WriteLine("Session saved."); + } + else if (userInput == "regenerate") + { + Console.ForegroundColor = ConsoleColor.Yellow; + Console.WriteLine("Regenerating last response ..."); + + await foreach ( + var text + in session.RegenerateAssistantMessageAsync( + inferenceParams)) + { + Console.ForegroundColor = ConsoleColor.White; + Console.Write(text); + } + } + else + { + await foreach ( + var text + in session.ChatAsync( + new ChatHistory.Message(AuthorRole.User, userInput), + inferenceParams)) + { + Console.ForegroundColor = ConsoleColor.White; + Console.Write(text); + } + } + + Console.ForegroundColor = ConsoleColor.Green; + userInput = Console.ReadLine() ?? ""; + + Console.ForegroundColor = ConsoleColor.White; + } + } +} + + +``` \ No newline at end of file diff --git a/docs/Examples/ChatSessionWithRoleName.md b/docs/Examples/ChatSessionWithRoleName.md index 553327553..4f15ade45 100644 --- a/docs/Examples/ChatSessionWithRoleName.md +++ b/docs/Examples/ChatSessionWithRoleName.md @@ -13,26 +13,51 @@ public class ChatSessionWithRoleName public static void Run() { Console.Write("Please input your model path: "); - string modelPath = Console.ReadLine(); - var prompt = File.ReadAllText("Assets/chat-with-bob.txt").Trim(); - InteractiveExecutor ex = new(new LLamaModel(new ModelParams(modelPath, contextSize: 1024, seed: 1337, gpuLayerCount: 5))); - ChatSession session = new ChatSession(ex); // The only change is to remove the transform for the output text stream. + var modelPath = Console.ReadLine(); + + var parameters = new ModelParams(modelPath) + { + ContextSize = 1024, + Seed = 1337, + GpuLayerCount = 5 + }; + using var model = LLamaWeights.LoadFromFile(parameters); + using var context = model.CreateContext(parameters); + var executor = new InteractiveExecutor(context); + + var chatHistoryJson = File.ReadAllText("Assets/chat-with-bob.json"); + ChatHistory chatHistory = ChatHistory.FromJson(chatHistoryJson) ?? new ChatHistory(); + + ChatSession session = new(executor, chatHistory); + + InferenceParams inferenceParams = new InferenceParams() + { + Temperature = 0.9f, + AntiPrompts = new List { "User:" } + }; Console.ForegroundColor = ConsoleColor.Yellow; - Console.WriteLine("The chat session has started. In this example, the prompt is printed for better visual result."); - Console.ForegroundColor = ConsoleColor.White; + Console.WriteLine("The chat session has started."); // show the prompt - Console.Write(prompt); - while (true) + Console.ForegroundColor = ConsoleColor.Green; + string userInput = Console.ReadLine() ?? ""; + + while (userInput != "exit") { - foreach (var text in session.Chat(prompt, new InferenceParams() { Temperature = 0.6f, AntiPrompts = new List { "User:" } })) + await foreach ( + var text + in session.ChatAsync( + new ChatHistory.Message(AuthorRole.User, userInput), + inferenceParams)) { + Console.ForegroundColor = ConsoleColor.White; Console.Write(text); } Console.ForegroundColor = ConsoleColor.Green; - prompt = Console.ReadLine(); + userInput = Console.ReadLine() ?? ""; + Console.ForegroundColor = ConsoleColor.White; } } diff --git a/docs/Examples/CodingAssistant.md b/docs/Examples/CodingAssistant.md new file mode 100644 index 000000000..3069f3017 --- /dev/null +++ b/docs/Examples/CodingAssistant.md @@ -0,0 +1,97 @@ +# Coding Assistant + +```cs +using LLama.Common; +using System; +using System.Reflection; + +internal class CodingAssistant +{ + const string DefaultModelUri = "https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q4_K_S.gguf"; + + // Source paper with example prompts: + // https://doi.org/10.48550/arXiv.2308.12950 + const string InstructionPrefix = "[INST]"; + const string InstructionSuffix = "[/INST]"; + const string SystemInstruction = "You're an intelligent, concise coding assistant. Wrap code in ``` for readability. Don't repeat yourself. Use best practice and good coding standards."; + private static string ModelsDirectory = Path.Combine(Directory.GetParent(Assembly.GetExecutingAssembly().Location)!.FullName, "Models"); + + public static async Task Run() + { + Console.Write("Please input your model path (if left empty, a default model will be downloaded for you): "); + var modelPath = Console.ReadLine(); + + if(string.IsNullOrWhiteSpace(modelPath) ) + { + modelPath = await GetDefaultModel(); + } + + var parameters = new ModelParams(modelPath) + { + ContextSize = 4096 + }; + using var model = LLamaWeights.LoadFromFile(parameters); + using var context = model.CreateContext(parameters); + var executor = new InstructExecutor(context, InstructionPrefix, InstructionSuffix, null); + + Console.ForegroundColor = ConsoleColor.Yellow; + Console.WriteLine("The executor has been enabled. In this example, the LLM will follow your instructions." + + "\nIt's a 7B Code Llama, so it's trained for programming tasks like \"Write a C# function reading a file name from a given URI\" or \"Write some programming interview questions\"." + + "\nWrite 'exit' to exit"); + Console.ForegroundColor = ConsoleColor.White; + + var inferenceParams = new InferenceParams() { + Temperature = 0.8f, + MaxTokens = -1, + }; + + string instruction = $"{SystemInstruction}\n\n"; + await Console.Out.WriteAsync("Instruction: "); + instruction += Console.ReadLine() ?? "Ask me for instructions."; + while (instruction != "exit") + { + + Console.ForegroundColor = ConsoleColor.Green; + await foreach (var text in executor.InferAsync(instruction + System.Environment.NewLine, inferenceParams)) + { + Console.Write(text); + } + Console.ForegroundColor = ConsoleColor.White; + + await Console.Out.WriteAsync("Instruction: "); + instruction = Console.ReadLine() ?? "Ask me for instructions."; + } + } + + private static async Task GetDefaultModel() + { + var uri = new Uri(DefaultModelUri); + var modelName = uri.Segments[^1]; + await Console.Out.WriteLineAsync($"The following model will be used: {modelName}"); + var modelPath = Path.Combine(ModelsDirectory, modelName); + if(!Directory.Exists(ModelsDirectory)) + { + Directory.CreateDirectory(ModelsDirectory); + } + + if (File.Exists(modelPath)) + { + await Console.Out.WriteLineAsync($"Existing model found, using {modelPath}"); + } + else + { + await Console.Out.WriteLineAsync($"Model not found locally, downloading {DefaultModelUri}..."); + using var http = new HttpClient(); + await using var downloadStream = await http.GetStreamAsync(uri); + await using var fileStream = new FileStream(modelPath, FileMode.Create, FileAccess.Write); + await downloadStream.CopyToAsync(fileStream); + await Console.Out.WriteLineAsync($"Model downloaded and saved to {modelPath}"); + } + + + return modelPath; + } +} + + +``` \ No newline at end of file diff --git a/docs/Examples/GrammerJsonResponse.md b/docs/Examples/GrammerJsonResponse.md new file mode 100644 index 000000000..59672d68f --- /dev/null +++ b/docs/Examples/GrammerJsonResponse.md @@ -0,0 +1,55 @@ +# Grammer json response + +```cs +using LLama.Common; +using LLama.Grammars; + +public class GrammarJsonResponse +{ + public static async Task Run() + { + var gbnf = (await File.ReadAllTextAsync("Assets/json.gbnf")).Trim(); + var grammar = Grammar.Parse(gbnf, "root"); + + Console.Write("Please input your model path: "); + var modelPath = Console.ReadLine(); + + var parameters = new ModelParams(modelPath) + { + ContextSize = 1024, + Seed = 1337, + GpuLayerCount = 5 + }; + using var model = LLamaWeights.LoadFromFile(parameters); + var ex = new StatelessExecutor(model, parameters); + + Console.ForegroundColor = ConsoleColor.Yellow; + Console.WriteLine("The executor has been enabled. In this example, the LLM will follow your instructions and always respond in a JSON format. For example, you can input \"Tell me the attributes of a good dish\""); + Console.ForegroundColor = ConsoleColor.White; + + using var grammarInstance = grammar.CreateInstance(); + var inferenceParams = new InferenceParams() + { + Temperature = 0.6f, + AntiPrompts = new List { "Question:", "#", "Question: ", ".\n" }, + MaxTokens = 50, + Grammar = grammarInstance + }; + + while (true) + { + Console.Write("\nQuestion: "); + Console.ForegroundColor = ConsoleColor.Green; + var prompt = Console.ReadLine(); + Console.ForegroundColor = ConsoleColor.White; + Console.Write("Answer: "); + prompt = $"Question: {prompt?.Trim()} Answer: "; + await foreach (var text in ex.InferAsync(prompt, inferenceParams)) + { + Console.Write(text); + } + } + } +} + +``` \ No newline at end of file diff --git a/docs/Examples/KernelMemory.md b/docs/Examples/KernelMemory.md new file mode 100644 index 000000000..76ac266a8 --- /dev/null +++ b/docs/Examples/KernelMemory.md @@ -0,0 +1,62 @@ +# Kernel memory + +```cs +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using LLamaSharp.KernelMemory; +using Microsoft.KernelMemory; +using Microsoft.KernelMemory.Configuration; +using Microsoft.KernelMemory.Handlers; + +public class KernelMemory +{ + public static async Task Run() + { + Console.WriteLine("Example from: https://github.com/microsoft/kernel-memory/blob/main/examples/101-using-core-nuget/Program.cs"); + Console.Write("Please input your model path: "); + var modelPath = Console.ReadLine(); + var searchClientConfig = new SearchClientConfig + { + MaxMatchesCount = 1, + AnswerTokens = 100, + }; + var memory = new KernelMemoryBuilder() + .WithLLamaSharpDefaults(new LLamaSharpConfig(modelPath) + { + DefaultInferenceParams = new Common.InferenceParams + { + AntiPrompts = new List { "\n\n" } + } + }) + .WithSearchClientConfig(searchClientConfig) + .With(new TextPartitioningOptions + { + MaxTokensPerParagraph = 300, + MaxTokensPerLine = 100, + OverlappingTokens = 30 + }) + .Build(); + + await memory.ImportDocumentAsync(@"./Assets/sample-SK-Readme.pdf", steps: Constants.PipelineWithoutSummary); + + var question = "What's Semantic Kernel?"; + + Console.WriteLine($"\n\nQuestion: {question}"); + + var answer = await memory.AskAsync(question); + + Console.WriteLine($"\nAnswer: {answer.Result}"); + + Console.WriteLine("\n\n Sources:\n"); + + foreach (var x in answer.RelevantSources) + { + Console.WriteLine($" - {x.SourceName} - {x.Link} [{x.Partitions.First().LastUpdate:D}]"); + } + } +} + +``` \ No newline at end of file diff --git a/docs/Examples/SemanticKernelMemory.md b/docs/Examples/SemanticKernelMemory.md new file mode 100644 index 000000000..6ea7bd485 --- /dev/null +++ b/docs/Examples/SemanticKernelMemory.md @@ -0,0 +1,170 @@ +# Semantic kernel memory + +```cs +using LLama.Common; +using Microsoft.SemanticKernel; +using Microsoft.SemanticKernel.Memory; +using LLamaSharp.SemanticKernel.TextEmbedding; +using Microsoft.SemanticKernel.AI.Embeddings; +using Microsoft.SemanticKernel.Plugins.Memory; + + +public class SemanticKernelMemory +{ + private const string MemoryCollectionName = "SKGitHub"; + + public static async Task Run() + { + var loggerFactory = ConsoleLogger.LoggerFactory; + Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/KernelSyntaxExamples/Example14_SemanticMemory.cs"); + Console.Write("Please input your model path: "); + var modelPath = Console.ReadLine(); + + var seed = 1337u; + // Load weights into memory + var parameters = new ModelParams(modelPath) + { + Seed = seed, + EmbeddingMode = true + }; + + using var model = LLamaWeights.LoadFromFile(parameters); + var embedding = new LLamaEmbedder(model, parameters); + + Console.WriteLine("===================================================="); + Console.WriteLine("======== Semantic Memory (volatile, in RAM) ========"); + Console.WriteLine("===================================================="); + + /* You can build your own semantic memory combining an Embedding Generator + * with a Memory storage that supports search by similarity (ie semantic search). + * + * In this example we use a volatile memory, a local simulation of a vector DB. + * + * You can replace VolatileMemoryStore with Qdrant (see QdrantMemoryStore connector) + * or implement your connectors for Pinecone, Vespa, Postgres + pgvector, SQLite VSS, etc. + */ + + var memory = new MemoryBuilder() + .WithTextEmbeddingGeneration(new LLamaSharpEmbeddingGeneration(embedding)) + .WithMemoryStore(new VolatileMemoryStore()) + .Build(); + + await RunExampleAsync(memory); + } + + private static async Task RunExampleAsync(ISemanticTextMemory memory) + { + await StoreMemoryAsync(memory); + + await SearchMemoryAsync(memory, "How do I get started?"); + + /* + Output: + + Query: How do I get started? + + Result 1: + URL: : https://github.com/microsoft/semantic-kernel/blob/main/README.md + Title : README: Installation, getting started, and how to contribute + + Result 2: + URL: : https://github.com/microsoft/semantic-kernel/blob/main/samples/dotnet-jupyter-notebooks/00-getting-started.ipynb + Title : Jupyter notebook describing how to get started with the Semantic Kernel + + */ + + await SearchMemoryAsync(memory, "Can I build a chat with SK?"); + + /* + Output: + + Query: Can I build a chat with SK? + + Result 1: + URL: : https://github.com/microsoft/semantic-kernel/tree/main/samples/skills/ChatSkill/ChatGPT + Title : Sample demonstrating how to create a chat skill interfacing with ChatGPT + + Result 2: + URL: : https://github.com/microsoft/semantic-kernel/blob/main/samples/apps/chat-summary-webapp-react/README.md + Title : README: README associated with a sample chat summary react-based webapp + + */ + + await SearchMemoryAsync(memory, "Jupyter notebook"); + + await SearchMemoryAsync(memory, "README: README associated with a sample chat summary react-based webapp"); + + await SearchMemoryAsync(memory, "Jupyter notebook describing how to pass prompts from a file to a semantic skill or function"); + } + + private static async Task SearchMemoryAsync(ISemanticTextMemory memory, string query) + { + Console.WriteLine("\nQuery: " + query + "\n"); + + var memories = memory.SearchAsync(MemoryCollectionName, query, limit: 10, minRelevanceScore: 0.5); + + int i = 0; + await foreach (MemoryQueryResult result in memories) + { + Console.WriteLine($"Result {++i}:"); + Console.WriteLine(" URL: : " + result.Metadata.Id); + Console.WriteLine(" Title : " + result.Metadata.Description); + Console.WriteLine(" Relevance: " + result.Relevance); + Console.WriteLine(); + } + + Console.WriteLine("----------------------"); + } + + private static async Task StoreMemoryAsync(ISemanticTextMemory memory) + { + /* Store some data in the semantic memory. + * + * When using Azure Cognitive Search the data is automatically indexed on write. + * + * When using the combination of VolatileStore and Embedding generation, SK takes + * care of creating and storing the index + */ + + Console.WriteLine("\nAdding some GitHub file URLs and their descriptions to the semantic memory."); + var githubFiles = SampleData(); + var i = 0; + foreach (var entry in githubFiles) + { + var result = await memory.SaveReferenceAsync( + collection: MemoryCollectionName, + externalSourceName: "GitHub", + externalId: entry.Key, + description: entry.Value, + text: entry.Value); + + Console.WriteLine($"#{++i} saved."); + Console.WriteLine(result); + } + + Console.WriteLine("\n----------------------"); + } + + private static Dictionary SampleData() + { + return new Dictionary + { + ["https://github.com/microsoft/semantic-kernel/blob/main/README.md"] + = "README: Installation, getting started, and how to contribute", + ["https://github.com/microsoft/semantic-kernel/blob/main/dotnet/notebooks/02-running-prompts-from-file.ipynb"] + = "Jupyter notebook describing how to pass prompts from a file to a semantic skill or function", + ["https://github.com/microsoft/semantic-kernel/blob/main/dotnet/notebooks//00-getting-started.ipynb"] + = "Jupyter notebook describing how to get started with the Semantic Kernel", + ["https://github.com/microsoft/semantic-kernel/tree/main/samples/skills/ChatSkill/ChatGPT"] + = "Sample demonstrating how to create a chat skill interfacing with ChatGPT", + ["https://github.com/microsoft/semantic-kernel/blob/main/dotnet/src/SemanticKernel/Memory/VolatileMemoryStore.cs"] + = "C# class that defines a volatile embedding store", + ["https://github.com/microsoft/semantic-kernel/blob/main/samples/dotnet/KernelHttpServer/README.md"] + = "README: How to set up a Semantic Kernel Service API using Azure Function Runtime v4", + ["https://github.com/microsoft/semantic-kernel/blob/main/samples/apps/chat-summary-webapp-react/README.md"] + = "README: README associated with a sample chat summary react-based webapp", + }; + } +} + +``` \ No newline at end of file diff --git a/docs/Examples/SemanticKernelPrompt.md b/docs/Examples/SemanticKernelPrompt.md new file mode 100644 index 000000000..f7589a44e --- /dev/null +++ b/docs/Examples/SemanticKernelPrompt.md @@ -0,0 +1,55 @@ +# Semantic kernel mode + +```cs +using System.Security.Cryptography; +using LLama.Common; +using LLamaSharp.SemanticKernel.ChatCompletion; +using Microsoft.SemanticKernel; +using LLamaSharp.SemanticKernel.TextCompletion; +using Microsoft.SemanticKernel.TextGeneration; +using Microsoft.Extensions.DependencyInjection; + + +public class SemanticKernelPrompt +{ + public static async Task Run() + { + Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/README.md"); + Console.Write("Please input your model path: "); + var modelPath = Console.ReadLine(); + + // Load weights into memory + var parameters = new ModelParams(modelPath); + using var model = LLamaWeights.LoadFromFile(parameters); + var ex = new StatelessExecutor(model, parameters); + + var builder = Kernel.CreateBuilder(); + builder.Services.AddKeyedSingleton("local-llama", new LLamaSharpTextCompletion(ex)); + + var kernel = builder.Build(); + + var prompt = @"{{$input}} + +One line TLDR with the fewest words."; + + ChatRequestSettings settings = new() { MaxTokens = 100 }; + var summarize = kernel.CreateFunctionFromPrompt(prompt, settings); + + string text1 = @" +1st Law of Thermodynamics - Energy cannot be created or destroyed. +2nd Law of Thermodynamics - For a spontaneous process, the entropy of the universe increases. +3rd Law of Thermodynamics - A perfect crystal at zero Kelvin has zero entropy."; + + string text2 = @" +1. An object at rest remains at rest, and an object in motion remains in motion at constant speed and in a straight line unless acted on by an unbalanced force. +2. The acceleration of an object depends on the mass of the object and the amount of force applied. +3. Whenever one object exerts a force on another object, the second object exerts an equal and opposite on the first."; + + Console.WriteLine((await kernel.InvokeAsync(summarize, new() { ["input"] = text1 })).GetValue()); + + Console.WriteLine((await kernel.InvokeAsync(summarize, new() { ["input"] = text2 })).GetValue()); + } +} + + +``` \ No newline at end of file diff --git a/docs/Examples/TalkToYourself.md b/docs/Examples/TalkToYourself.md new file mode 100644 index 000000000..a45676b14 --- /dev/null +++ b/docs/Examples/TalkToYourself.md @@ -0,0 +1,73 @@ +# Talk to yourself + +```cs +using System.Security.Cryptography; +using System.Text; +using LLama.Abstractions; +using LLama.Common; + +public class TalkToYourself +{ + public static async Task Run() + { + Console.Write("Please input your model path: "); + var modelPath = Console.ReadLine(); + + // Load weights into memory + var @params = new ModelParams(modelPath); + using var weights = LLamaWeights.LoadFromFile(@params); + + // Create 2 contexts sharing the same weights + using var aliceCtx = weights.CreateContext(@params); + var alice = new InteractiveExecutor(aliceCtx); + using var bobCtx = weights.CreateContext(@params); + var bob = new InteractiveExecutor(bobCtx); + + // Initial alice prompt + var alicePrompt = "Transcript of a dialog, where the Alice interacts a person named Bob. Alice is friendly, kind, honest and good at writing.\nAlice: Hello"; + var aliceResponse = await Prompt(alice, ConsoleColor.Green, alicePrompt, false, false); + + // Initial bob prompt + var bobPrompt = $"Transcript of a dialog, where the Bob interacts a person named Alice. Bob is smart, intellectual and good at writing.\nAlice: Hello{aliceResponse}"; + var bobResponse = await Prompt(bob, ConsoleColor.Red, bobPrompt, true, true); + + // swap back and forth from Alice to Bob + while (true) + { + aliceResponse = await Prompt(alice, ConsoleColor.Green, bobResponse, false, true); + bobResponse = await Prompt(bob, ConsoleColor.Red, aliceResponse, false, true); + + if (Console.KeyAvailable) + break; + } + } + + private static async Task Prompt(ILLamaExecutor executor, ConsoleColor color, string prompt, bool showPrompt, bool showResponse) + { + var inferenceParams = new InferenceParams + { + Temperature = 0.9f, + AntiPrompts = new List { "Alice:", "Bob:", "User:" }, + MaxTokens = 128, + Mirostat = MirostatType.Mirostat2, + MirostatTau = 10, + }; + + Console.ForegroundColor = ConsoleColor.White; + if (showPrompt) + Console.Write(prompt); + + Console.ForegroundColor = color; + var builder = new StringBuilder(); + await foreach (var text in executor.InferAsync(prompt, inferenceParams)) + { + builder.Append(text); + if (showResponse) + Console.Write(text); + } + + return builder.ToString(); + } +} + +``` \ No newline at end of file