diff --git a/docs/Examples/BatchDecoding.md b/docs/Examples/BatchDecoding.md
new file mode 100644
index 000000000..0b03ed049
--- /dev/null
+++ b/docs/Examples/BatchDecoding.md
@@ -0,0 +1,170 @@
+# Batch decoding
+
+```cs
+using System.Diagnostics;
+using System.Text;
+using LLama.Common;
+using LLama.Native;
+using LLama.Sampling;
+
+public class BatchedDecoding
+{
+    private const int n_parallel = 8;
+    private const int n_len = 32;
+
+    public static async Task Run()
+    {
+        Console.Write("Please input your model path: ");
+        var modelPath = Console.ReadLine();
+
+        Console.WriteLine("Prompt (leave blank to select automatically):");
+        var prompt = Console.ReadLine();
+        if (string.IsNullOrWhiteSpace(prompt))
+            prompt = "Not many people know that";
+
+        // Load model
+        var parameters = new ModelParams(modelPath);
+
+        using var model = LLamaWeights.LoadFromFile(parameters);
+
+        // Tokenize prompt
+        var prompt_tokens = model.Tokenize(prompt, true, false, Encoding.UTF8);
+        var n_kv_req = prompt_tokens.Length + (n_len - prompt_tokens.Length) * n_parallel;
+
+        // Create a context
+        parameters.ContextSize = (uint)model.ContextSize;
+        parameters.BatchSize = (uint)Math.Max(n_len, n_parallel);
+        using var context = model.CreateContext(parameters);
+
+        var n_ctx = context.ContextSize;
+
+        // make sure the KV cache is big enough to hold all the prompt and generated tokens
+        if (n_kv_req > n_ctx)
+        {
+            await Console.Error.WriteLineAsync($"error: n_kv_req ({n_kv_req}) > n_ctx, the required KV cache size is not big enough\n");
+            await Console.Error.WriteLineAsync("        either reduce n_parallel or increase n_ctx\n");
+            return;
+        }
+
+        var batch = new LLamaBatch();
+
+        // evaluate the initial prompt
+        batch.AddRange(prompt_tokens, 0, LLamaSeqId.Zero, true);
+
+        if (await context.DecodeAsync(batch) != DecodeResult.Ok)
+        {
+            await Console.Error.WriteLineAsync("llama_decode failed");
+            return;
+        }
+
+        // assign the system KV cache to all parallel sequences
+        // this way, the parallel sequences will "reuse" the prompt tokens without having to copy them
+        for (var i = 1; i < n_parallel; ++i)
+        {
+            context.NativeHandle.KvCacheSequenceCopy((LLamaSeqId)0, (LLamaSeqId)i, 0, batch.TokenCount);
+        }
+
+        if (n_parallel > 1)
+        {
+            Console.WriteLine();
+            Console.WriteLine($"generating {n_parallel} sequences...");
+        }
+
+        // remember the batch index of the last token for each parallel sequence
+        // we need this to determine which logits to sample from
+        List<int> i_batch = new();
+        for (var i = 0; i < n_parallel; i++)
+            i_batch.Add(batch.TokenCount - 1);
+
+        // Create per-stream decoder and sampler
+        var decoders = new StreamingTokenDecoder[n_parallel];
+        var samplers = new ISamplingPipeline[n_parallel];
+        for (var i = 0; i < n_parallel; i++)
+        {
+            decoders[i] = new StreamingTokenDecoder(context);
+            samplers[i] = new DefaultSamplingPipeline
+            {
+                Temperature = 0.1f + (float)i / n_parallel,
+                MinP = 0.25f,
+            };
+        }
+
+        var n_cur = batch.TokenCount;
+        var n_decode = 0;
+
+        var timer = new Stopwatch();
+        timer.Start();
+        while (n_cur <= n_len)
+        {
+            batch.Clear();
+
+            for (var i = 0; i < n_parallel; i++)
+            {
+                // Skip completed streams
+                if (i_batch[i] < 0)
+                    continue;
+
+                // Use the sampling pipeline to select a token
+                var new_token_id = samplers[i].Sample(
+                    context.NativeHandle,
+                    context.NativeHandle.GetLogitsIth(i_batch[i]),
+                    Array.Empty<LLamaToken>()
+                );
+
+                // Finish this stream early if necessary
+                if (new_token_id == model.EndOfSentenceToken || new_token_id == model.NewlineToken)
+                {
+                    i_batch[i] = -1;
+                    Console.WriteLine($"Completed Stream {i} early");
+                    continue;
+                }
+
+                // Add this token to the decoder, so it will be turned into text
+                decoders[i].Add(new_token_id);
+
+                i_batch[i] = batch.TokenCount;
+
+                // push this new token for next evaluation
+                batch.Add(new_token_id, n_cur, (LLamaSeqId)i, true);
+
+                n_decode++;
+            }
+
+            // Check if all streams are finished
+            if (batch.TokenCount == 0)
+            {
+                break;
+            }
+
+            n_cur++;
+
+            // evaluate the current batch with the transformer model
+            if (await context.DecodeAsync(batch) != 0)
+            {
+                await Console.Error.WriteLineAsync("failed to eval");
+                return;
+            }
+        }
+
+        timer.Stop();
+        Console.ForegroundColor = ConsoleColor.Yellow;
+        Console.WriteLine();
+        Console.WriteLine($"Decoded {n_decode} tokens in {timer.ElapsedMilliseconds}ms");
+        Console.WriteLine($"Rate: {n_decode / timer.Elapsed.TotalSeconds:##.000} tokens/second");
+
+        var index = 0;
+        foreach (var stream in decoders)
+        {
+            var text = stream.Read();
+
+            Console.ForegroundColor = ConsoleColor.Green;
+            Console.Write($"{index++}. {prompt}");
+            Console.ForegroundColor = ConsoleColor.Red;
+            Console.WriteLine(text);
+        }
+
+        Console.WriteLine("Press any key to exit demo");
+        Console.ReadKey(true);
+    }
+}
+```
\ No newline at end of file
diff --git a/docs/Examples/ChatChineseGB2312.md b/docs/Examples/ChatChineseGB2312.md
new file mode 100644
index 000000000..a5a0defef
--- /dev/null
+++ b/docs/Examples/ChatChineseGB2312.md
@@ -0,0 +1,125 @@
+# Chat Chinese
+
+```cs
+using System.Text;
+using LLama.Common;
+
+public class ChatChineseGB2312
+{
+    private static string ConvertEncoding(string input, Encoding original, Encoding target)
+    {
+        byte[] bytes = original.GetBytes(input);
+        var convertedBytes = Encoding.Convert(original, target, bytes);
+        return target.GetString(convertedBytes);
+    }
+
+    public static async Task Run()
+    {
+        // Register provider for GB2312 encoding
+        Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
+
+        Console.ForegroundColor = ConsoleColor.Yellow;
+        Console.WriteLine("This example shows how to use Chinese with gb2312 encoding, which is common in windows. It's recommended" +
+            " to use https://huggingface.co/hfl/chinese-alpaca-2-7b-gguf/blob/main/ggml-model-q5_0.gguf, which has been verified by LLamaSharp developers.");
+        Console.ForegroundColor = ConsoleColor.White;
+
+        Console.Write("Please input your model path: ");
+        var modelPath = Console.ReadLine();
+
+        var parameters = new ModelParams(modelPath)
+        {
+            ContextSize = 1024,
+            Seed = 1337,
+            GpuLayerCount = 5,
+            Encoding = Encoding.UTF8
+        };
+        using var model = LLamaWeights.LoadFromFile(parameters);
+        using var context = model.CreateContext(parameters);
+        var executor = new InteractiveExecutor(context);
+
+        ChatSession session;
+        if (Directory.Exists("Assets/chat-with-kunkun-chinese"))
+        {
+            Console.ForegroundColor = ConsoleColor.Yellow;
+            Console.WriteLine("Loading session from disk.");
+            Console.ForegroundColor = ConsoleColor.White;
+
+            session = new ChatSession(executor);
+            session.LoadSession("Assets/chat-with-kunkun-chinese");
+        }
+        else
+        {
+            var chatHistoryJson = File.ReadAllText("Assets/chat-with-kunkun-chinese.json");
+            ChatHistory chatHistory = ChatHistory.FromJson(chatHistoryJson) ?? new ChatHistory();
+
+            session = new ChatSession(executor, chatHistory);
+        }
+
+        session
+            .WithHistoryTransform(new LLamaTransforms.DefaultHistoryTransform("用户", "坤坤"));
+
+        InferenceParams inferenceParams = new InferenceParams()
+        {
+            Temperature = 0.9f,
+            AntiPrompts = new List<string> { "用户：" }
+        };
+
+        Console.ForegroundColor = ConsoleColor.Yellow;
+        Console.WriteLine("The chat session has started.");
+
+        // show the prompt
+        Console.ForegroundColor = ConsoleColor.White;
+        Console.Write("用户：");
+        Console.ForegroundColor = ConsoleColor.Green;
+        string userInput = Console.ReadLine() ?? "";
+
+        while (userInput != "exit")
+        {
+            // Convert the encoding from gb2312 to utf8 for the language model
+            // and later saving to the history json file.
+            userInput = ConvertEncoding(userInput, Encoding.GetEncoding("gb2312"), Encoding.UTF8);
+
+            if (userInput == "save")
+            {
+                session.SaveSession("Assets/chat-with-kunkun-chinese");
+                Console.ForegroundColor = ConsoleColor.Yellow;
+                Console.WriteLine("Session saved.");
+            }
+            else if (userInput == "regenerate")
+            {
+                Console.ForegroundColor = ConsoleColor.Yellow;
+                Console.WriteLine("Regenerating last response ...");
+
+                await foreach (
+                    var text
+                    in session.RegenerateAssistantMessageAsync(
+                        inferenceParams))
+                {
+                    Console.ForegroundColor = ConsoleColor.White;
+
+                    // Convert the encoding from utf8 to gb2312 for the console output.
+                    Console.Write(ConvertEncoding(text, Encoding.UTF8, Encoding.GetEncoding("gb2312")));
+                }
+            }
+            else
+            {
+                await foreach (
+                    var text
+                    in session.ChatAsync(
+                        new ChatHistory.Message(AuthorRole.User, userInput),
+                        inferenceParams))
+                {
+                    Console.ForegroundColor = ConsoleColor.White;
+                    Console.Write(text);
+                }
+            }
+
+            Console.ForegroundColor = ConsoleColor.Green;
+            userInput = Console.ReadLine() ?? "";
+
+            Console.ForegroundColor = ConsoleColor.White;
+        }
+    }
+}
+
+```
\ No newline at end of file
diff --git a/docs/Examples/ChatSessionStripRoleName.md b/docs/Examples/ChatSessionStripRoleName.md
index 785db83aa..030001541 100644
--- a/docs/Examples/ChatSessionStripRoleName.md
+++ b/docs/Examples/ChatSessionStripRoleName.md
@@ -13,24 +13,54 @@ public class ChatSessionStripRoleName
     public static void Run()
     {
         Console.Write("Please input your model path: ");
-        string modelPath = Console.ReadLine();
-        var prompt = File.ReadAllText("Assets/chat-with-bob.txt").Trim();
-        InteractiveExecutor ex = new(new LLamaModel(new ModelParams(modelPath, contextSize: 1024, seed: 1337, gpuLayerCount: 5)));
-        ChatSession session = new ChatSession(ex).WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform(new string[] { "User:", "Bob:" }, redundancyLength: 8));
+        var modelPath = Console.ReadLine();
+
+        var parameters = new ModelParams(modelPath)
+        {
+            ContextSize = 1024,
+            Seed = 1337,
+            GpuLayerCount = 5
+        };
+        using var model = LLamaWeights.LoadFromFile(parameters);
+        using var context = model.CreateContext(parameters);
+        var executor = new InteractiveExecutor(context);
+
+        var chatHistoryJson = File.ReadAllText("Assets/chat-with-bob.json");
+        ChatHistory chatHistory = ChatHistory.FromJson(chatHistoryJson) ?? new ChatHistory();
+
+        ChatSession session = new(executor, chatHistory);
+        session.WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform(
+            new string[] { "User:", "Assistant:" },
+            redundancyLength: 8));
+
+        InferenceParams inferenceParams = new InferenceParams()
+        {
+            Temperature = 0.9f,
+            AntiPrompts = new List<string> { "User:" }
+        };
 
         Console.ForegroundColor = ConsoleColor.Yellow;
-        Console.WriteLine("The chat session has started. The role names won't be printed.");
-        Console.ForegroundColor = ConsoleColor.White;
+        Console.WriteLine("The chat session has started.");
 
-        while (true)
+        // show the prompt
+        Console.ForegroundColor = ConsoleColor.Green;
+        string userInput = Console.ReadLine() ?? "";
+
+        while (userInput != "exit")
         {
-            foreach (var text in session.Chat(prompt, new InferenceParams() { Temperature = 0.6f, AntiPrompts = new List<string> { "User:" } }))
+            await foreach (
+                var text
+                in session.ChatAsync(
+                    new ChatHistory.Message(AuthorRole.User, userInput),
+                    inferenceParams))
             {
+                Console.ForegroundColor = ConsoleColor.White;
                 Console.Write(text);
             }
 
             Console.ForegroundColor = ConsoleColor.Green;
-            prompt = Console.ReadLine();
+            userInput = Console.ReadLine() ?? "";
+
             Console.ForegroundColor = ConsoleColor.White;
         }
     }
diff --git a/docs/Examples/ChatSessionWithHistory.md b/docs/Examples/ChatSessionWithHistory.md
new file mode 100644
index 000000000..a07a269e3
--- /dev/null
+++ b/docs/Examples/ChatSessionWithHistory.md
@@ -0,0 +1,104 @@
+# Chat session with history
+
+```cs
+using LLama.Common;
+
+namespace LLama.Examples.Examples;
+
+public class ChatSessionWithHistory
+{
+    public static async Task Run()
+    {
+        Console.Write("Please input your model path: ");
+        var modelPath = Console.ReadLine();
+
+        var parameters = new ModelParams(modelPath)
+        {
+            ContextSize = 1024,
+            Seed = 1337,
+            GpuLayerCount = 5
+        };
+        using var model = LLamaWeights.LoadFromFile(parameters);
+        using var context = model.CreateContext(parameters);
+        var executor = new InteractiveExecutor(context);
+
+        ChatSession session;
+        if (Directory.Exists("Assets/chat-with-bob"))
+        {
+            Console.ForegroundColor = ConsoleColor.Yellow;
+            Console.WriteLine("Loading session from disk.");
+            Console.ForegroundColor = ConsoleColor.White;
+
+            session = new ChatSession(executor);
+            session.LoadSession("Assets/chat-with-bob");
+        }
+        else
+        {
+            var chatHistoryJson = File.ReadAllText("Assets/chat-with-bob.json");
+            ChatHistory chatHistory = ChatHistory.FromJson(chatHistoryJson) ?? new ChatHistory();
+
+            session = new ChatSession(executor, chatHistory);
+        }
+
+        session.WithOutputTransform(new LLamaTransforms.KeywordTextOutputStreamTransform(
+            new string[] { "User:", "Assistant:" },
+            redundancyLength: 8));
+
+        InferenceParams inferenceParams = new InferenceParams()
+        {
+            Temperature = 0.9f,
+            AntiPrompts = new List<string> { "User:" }
+        };
+
+        Console.ForegroundColor = ConsoleColor.Yellow;
+        Console.WriteLine("The chat session has started.");
+
+        // show the prompt
+        Console.ForegroundColor = ConsoleColor.Green;
+        string userInput = Console.ReadLine() ?? "";
+
+        while (userInput != "exit")
+        {
+            if (userInput == "save")
+            {
+                session.SaveSession("Assets/chat-with-bob");
+                Console.ForegroundColor = ConsoleColor.Yellow;
+                Console.WriteLine("Session saved.");
+            }
+            else if (userInput == "regenerate")
+            {
+                Console.ForegroundColor = ConsoleColor.Yellow;
+                Console.WriteLine("Regenerating last response ...");
+
+                await foreach (
+                    var text
+                    in session.RegenerateAssistantMessageAsync(
+                        inferenceParams))
+                {
+                    Console.ForegroundColor = ConsoleColor.White;
+                    Console.Write(text);
+                }
+            }
+            else
+            {
+                await foreach (
+                    var text
+                    in session.ChatAsync(
+                        new ChatHistory.Message(AuthorRole.User, userInput),
+                        inferenceParams))
+                {
+                    Console.ForegroundColor = ConsoleColor.White;
+                    Console.Write(text);
+                }
+            }
+
+            Console.ForegroundColor = ConsoleColor.Green;
+            userInput = Console.ReadLine() ?? "";
+
+            Console.ForegroundColor = ConsoleColor.White;
+        }
+    }
+}
+
+
+```
\ No newline at end of file
diff --git a/docs/Examples/ChatSessionWithRoleName.md b/docs/Examples/ChatSessionWithRoleName.md
index 553327553..4f15ade45 100644
--- a/docs/Examples/ChatSessionWithRoleName.md
+++ b/docs/Examples/ChatSessionWithRoleName.md
@@ -13,26 +13,51 @@ public class ChatSessionWithRoleName
     public static void Run()
     {
         Console.Write("Please input your model path: ");
-        string modelPath = Console.ReadLine();
-        var prompt = File.ReadAllText("Assets/chat-with-bob.txt").Trim();
-        InteractiveExecutor ex = new(new LLamaModel(new ModelParams(modelPath, contextSize: 1024, seed: 1337, gpuLayerCount: 5)));
-        ChatSession session = new ChatSession(ex); // The only change is to remove the transform for the output text stream.
+        var modelPath = Console.ReadLine();
+
+        var parameters = new ModelParams(modelPath)
+        {
+            ContextSize = 1024,
+            Seed = 1337,
+            GpuLayerCount = 5
+        };
+        using var model = LLamaWeights.LoadFromFile(parameters);
+        using var context = model.CreateContext(parameters);
+        var executor = new InteractiveExecutor(context);
+
+        var chatHistoryJson = File.ReadAllText("Assets/chat-with-bob.json");
+        ChatHistory chatHistory = ChatHistory.FromJson(chatHistoryJson) ?? new ChatHistory();
+
+        ChatSession session = new(executor, chatHistory);
+
+        InferenceParams inferenceParams = new InferenceParams()
+        {
+            Temperature = 0.9f,
+            AntiPrompts = new List<string> { "User:" }
+        };
 
         Console.ForegroundColor = ConsoleColor.Yellow;
-        Console.WriteLine("The chat session has started. In this example, the prompt is printed for better visual result.");
-        Console.ForegroundColor = ConsoleColor.White;
+        Console.WriteLine("The chat session has started.");
 
         // show the prompt
-        Console.Write(prompt);
-        while (true)
+        Console.ForegroundColor = ConsoleColor.Green;
+        string userInput = Console.ReadLine() ?? "";
+
+        while (userInput != "exit")
         {
-            foreach (var text in session.Chat(prompt, new InferenceParams() { Temperature = 0.6f, AntiPrompts = new List<string> { "User:" } }))
+            await foreach (
+                var text
+                in session.ChatAsync(
+                    new ChatHistory.Message(AuthorRole.User, userInput),
+                    inferenceParams))
             {
+                Console.ForegroundColor = ConsoleColor.White;
                 Console.Write(text);
             }
 
             Console.ForegroundColor = ConsoleColor.Green;
-            prompt = Console.ReadLine();
+            userInput = Console.ReadLine() ?? "";
+
             Console.ForegroundColor = ConsoleColor.White;
         }
     }
diff --git a/docs/Examples/CodingAssistant.md b/docs/Examples/CodingAssistant.md
new file mode 100644
index 000000000..3069f3017
--- /dev/null
+++ b/docs/Examples/CodingAssistant.md
@@ -0,0 +1,97 @@
+# Coding Assistant
+
+```cs
+using LLama.Common;
+using System;
+using System.Reflection;
+
+internal class CodingAssistant
+{
+    const string DefaultModelUri = "https://huggingface.co/TheBloke/CodeLlama-7B-Instruct-GGUF/resolve/main/codellama-7b-instruct.Q4_K_S.gguf";
+
+    // Source paper with example prompts:
+    // https://doi.org/10.48550/arXiv.2308.12950
+    const string InstructionPrefix = "[INST]";
+    const string InstructionSuffix = "[/INST]";
+    const string SystemInstruction = "You're an intelligent, concise coding assistant. Wrap code in ``` for readability. Don't repeat yourself. Use best practice and good coding standards.";
+    private static string ModelsDirectory = Path.Combine(Directory.GetParent(Assembly.GetExecutingAssembly().Location)!.FullName, "Models");
+
+    public static async Task Run()
+    {
+        Console.Write("Please input your model path (if left empty, a default model will be downloaded for you): ");
+        var modelPath = Console.ReadLine();
+
+        if(string.IsNullOrWhiteSpace(modelPath) )
+        {
+            modelPath = await GetDefaultModel();
+        }
+
+        var parameters = new ModelParams(modelPath)
+        {
+            ContextSize = 4096
+        };
+        using var model = LLamaWeights.LoadFromFile(parameters);
+        using var context = model.CreateContext(parameters);
+        var executor = new InstructExecutor(context, InstructionPrefix, InstructionSuffix, null);
+
+        Console.ForegroundColor = ConsoleColor.Yellow;
+        Console.WriteLine("The executor has been enabled. In this example, the LLM will follow your instructions." +
+            "\nIt's a 7B Code Llama, so it's trained for programming tasks like \"Write a C# function reading a file name from a given URI\" or \"Write some programming interview questions\"." +
+            "\nWrite 'exit' to exit");
+        Console.ForegroundColor = ConsoleColor.White;
+
+        var inferenceParams = new InferenceParams() { 
+            Temperature = 0.8f, 
+            MaxTokens = -1,
+        };
+
+        string instruction = $"{SystemInstruction}\n\n";
+        await Console.Out.WriteAsync("Instruction: ");
+        instruction += Console.ReadLine() ?? "Ask me for instructions.";
+        while (instruction != "exit")
+        {
+
+            Console.ForegroundColor = ConsoleColor.Green;
+            await foreach (var text in executor.InferAsync(instruction + System.Environment.NewLine, inferenceParams))
+            {
+                Console.Write(text);
+            }
+            Console.ForegroundColor = ConsoleColor.White;
+
+            await Console.Out.WriteAsync("Instruction: ");
+            instruction = Console.ReadLine() ?? "Ask me for instructions.";
+        }
+    }
+
+    private static async Task<string> GetDefaultModel()
+    {
+        var uri = new Uri(DefaultModelUri);
+        var modelName = uri.Segments[^1];
+        await Console.Out.WriteLineAsync($"The following model will be used: {modelName}");
+        var modelPath = Path.Combine(ModelsDirectory, modelName);
+        if(!Directory.Exists(ModelsDirectory))
+        {
+            Directory.CreateDirectory(ModelsDirectory);
+        }
+
+        if (File.Exists(modelPath))
+        {
+            await Console.Out.WriteLineAsync($"Existing model found, using {modelPath}");
+        }
+        else
+        {
+            await Console.Out.WriteLineAsync($"Model not found locally, downloading {DefaultModelUri}...");
+            using var http = new HttpClient();
+            await using var downloadStream = await http.GetStreamAsync(uri);
+            await using var fileStream = new FileStream(modelPath, FileMode.Create, FileAccess.Write);
+            await downloadStream.CopyToAsync(fileStream);
+            await Console.Out.WriteLineAsync($"Model downloaded and saved to {modelPath}");
+        }
+
+
+        return modelPath;
+    }
+}
+
+
+```
\ No newline at end of file
diff --git a/docs/Examples/GrammerJsonResponse.md b/docs/Examples/GrammerJsonResponse.md
new file mode 100644
index 000000000..59672d68f
--- /dev/null
+++ b/docs/Examples/GrammerJsonResponse.md
@@ -0,0 +1,55 @@
+# Grammer json response
+
+```cs
+using LLama.Common;
+using LLama.Grammars;
+
+public class GrammarJsonResponse
+{
+    public static async Task Run()
+    {
+        var gbnf = (await File.ReadAllTextAsync("Assets/json.gbnf")).Trim();
+        var grammar = Grammar.Parse(gbnf, "root");
+
+        Console.Write("Please input your model path: ");
+        var modelPath = Console.ReadLine();
+
+        var parameters = new ModelParams(modelPath)
+        {
+            ContextSize = 1024,
+            Seed = 1337,
+            GpuLayerCount = 5
+        };
+        using var model = LLamaWeights.LoadFromFile(parameters);
+        var ex = new StatelessExecutor(model, parameters);
+
+        Console.ForegroundColor = ConsoleColor.Yellow;
+        Console.WriteLine("The executor has been enabled. In this example, the LLM will follow your instructions and always respond in a JSON format. For example, you can input \"Tell me the attributes of a good dish\"");
+        Console.ForegroundColor = ConsoleColor.White;
+
+        using var grammarInstance = grammar.CreateInstance();
+        var inferenceParams = new InferenceParams() 
+        { 
+            Temperature = 0.6f, 
+            AntiPrompts = new List<string> { "Question:", "#", "Question: ", ".\n" }, 
+            MaxTokens = 50,
+            Grammar = grammarInstance
+        };
+
+        while (true)
+        {
+            Console.Write("\nQuestion: ");
+            Console.ForegroundColor = ConsoleColor.Green;
+            var prompt = Console.ReadLine();
+            Console.ForegroundColor = ConsoleColor.White;
+            Console.Write("Answer: ");
+            prompt = $"Question: {prompt?.Trim()} Answer: ";
+            await foreach (var text in ex.InferAsync(prompt, inferenceParams))
+            {
+                Console.Write(text);
+            }
+        }
+    }
+}
+
+```
\ No newline at end of file
diff --git a/docs/Examples/KernelMemory.md b/docs/Examples/KernelMemory.md
new file mode 100644
index 000000000..76ac266a8
--- /dev/null
+++ b/docs/Examples/KernelMemory.md
@@ -0,0 +1,62 @@
+# Kernel memory
+
+```cs
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using LLamaSharp.KernelMemory;
+using Microsoft.KernelMemory;
+using Microsoft.KernelMemory.Configuration;
+using Microsoft.KernelMemory.Handlers;
+
+public class KernelMemory
+{
+    public static async Task Run()
+    {
+        Console.WriteLine("Example from: https://github.com/microsoft/kernel-memory/blob/main/examples/101-using-core-nuget/Program.cs");
+        Console.Write("Please input your model path: ");
+        var modelPath = Console.ReadLine();
+        var searchClientConfig = new SearchClientConfig
+        {
+            MaxMatchesCount = 1,
+            AnswerTokens = 100,
+        };
+        var memory = new KernelMemoryBuilder()
+                .WithLLamaSharpDefaults(new LLamaSharpConfig(modelPath)
+                {
+                    DefaultInferenceParams = new Common.InferenceParams
+                    {
+                        AntiPrompts = new List<string> { "\n\n" }
+                    }
+                })
+                .WithSearchClientConfig(searchClientConfig)
+                .With(new TextPartitioningOptions
+                {
+                    MaxTokensPerParagraph = 300,
+                    MaxTokensPerLine = 100,
+                    OverlappingTokens = 30
+                })
+            .Build();
+
+        await memory.ImportDocumentAsync(@"./Assets/sample-SK-Readme.pdf", steps: Constants.PipelineWithoutSummary);
+
+        var question = "What's Semantic Kernel?";
+
+        Console.WriteLine($"\n\nQuestion: {question}");
+
+        var answer = await memory.AskAsync(question);
+
+        Console.WriteLine($"\nAnswer: {answer.Result}");
+
+        Console.WriteLine("\n\n  Sources:\n");
+
+        foreach (var x in answer.RelevantSources)
+        {
+            Console.WriteLine($"  - {x.SourceName}  - {x.Link} [{x.Partitions.First().LastUpdate:D}]");
+        }
+    }
+}
+
+```
\ No newline at end of file
diff --git a/docs/Examples/SemanticKernelMemory.md b/docs/Examples/SemanticKernelMemory.md
new file mode 100644
index 000000000..6ea7bd485
--- /dev/null
+++ b/docs/Examples/SemanticKernelMemory.md
@@ -0,0 +1,170 @@
+# Semantic kernel memory
+
+```cs
+using LLama.Common;
+using Microsoft.SemanticKernel;
+using Microsoft.SemanticKernel.Memory;
+using LLamaSharp.SemanticKernel.TextEmbedding;
+using Microsoft.SemanticKernel.AI.Embeddings;
+using Microsoft.SemanticKernel.Plugins.Memory;
+
+
+public class SemanticKernelMemory
+{
+    private const string MemoryCollectionName = "SKGitHub";
+
+    public static async Task Run()
+    {
+        var loggerFactory = ConsoleLogger.LoggerFactory;
+        Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/samples/KernelSyntaxExamples/Example14_SemanticMemory.cs");
+        Console.Write("Please input your model path: ");
+        var modelPath = Console.ReadLine();
+
+        var seed = 1337u;
+        // Load weights into memory
+        var parameters = new ModelParams(modelPath)
+        {
+            Seed = seed,
+            EmbeddingMode = true
+        };
+
+        using var model = LLamaWeights.LoadFromFile(parameters);
+        var embedding = new LLamaEmbedder(model, parameters);
+
+        Console.WriteLine("====================================================");
+        Console.WriteLine("======== Semantic Memory (volatile, in RAM) ========");
+        Console.WriteLine("====================================================");
+
+        /* You can build your own semantic memory combining an Embedding Generator
+            * with a Memory storage that supports search by similarity (ie semantic search).
+            *
+            * In this example we use a volatile memory, a local simulation of a vector DB.
+            *
+            * You can replace VolatileMemoryStore with Qdrant (see QdrantMemoryStore connector)
+            * or implement your connectors for Pinecone, Vespa, Postgres + pgvector, SQLite VSS, etc.
+            */
+
+        var memory = new MemoryBuilder()
+            .WithTextEmbeddingGeneration(new LLamaSharpEmbeddingGeneration(embedding))
+            .WithMemoryStore(new VolatileMemoryStore())
+            .Build();
+
+        await RunExampleAsync(memory);
+    }
+
+    private static async Task RunExampleAsync(ISemanticTextMemory memory)
+    {
+        await StoreMemoryAsync(memory);
+
+        await SearchMemoryAsync(memory, "How do I get started?");
+
+        /*
+        Output:
+
+        Query: How do I get started?
+
+        Result 1:
+            URL:     : https://github.com/microsoft/semantic-kernel/blob/main/README.md
+            Title    : README: Installation, getting started, and how to contribute
+
+        Result 2:
+            URL:     : https://github.com/microsoft/semantic-kernel/blob/main/samples/dotnet-jupyter-notebooks/00-getting-started.ipynb
+            Title    : Jupyter notebook describing how to get started with the Semantic Kernel
+
+        */
+
+        await SearchMemoryAsync(memory, "Can I build a chat with SK?");
+
+        /*
+        Output:
+
+        Query: Can I build a chat with SK?
+
+        Result 1:
+            URL:     : https://github.com/microsoft/semantic-kernel/tree/main/samples/skills/ChatSkill/ChatGPT
+            Title    : Sample demonstrating how to create a chat skill interfacing with ChatGPT
+
+        Result 2:
+            URL:     : https://github.com/microsoft/semantic-kernel/blob/main/samples/apps/chat-summary-webapp-react/README.md
+            Title    : README: README associated with a sample chat summary react-based webapp
+
+        */
+
+        await SearchMemoryAsync(memory, "Jupyter notebook");
+
+        await SearchMemoryAsync(memory, "README: README associated with a sample chat summary react-based webapp");
+
+        await SearchMemoryAsync(memory, "Jupyter notebook describing how to pass prompts from a file to a semantic skill or function");
+    }
+
+    private static async Task SearchMemoryAsync(ISemanticTextMemory memory, string query)
+    {
+        Console.WriteLine("\nQuery: " + query + "\n");
+
+        var memories = memory.SearchAsync(MemoryCollectionName, query, limit: 10, minRelevanceScore: 0.5);
+
+        int i = 0;
+        await foreach (MemoryQueryResult result in memories)
+        {
+            Console.WriteLine($"Result {++i}:");
+            Console.WriteLine("  URL:     : " + result.Metadata.Id);
+            Console.WriteLine("  Title    : " + result.Metadata.Description);
+            Console.WriteLine("  Relevance: " + result.Relevance);
+            Console.WriteLine();
+        }
+
+        Console.WriteLine("----------------------");
+    }
+
+    private static async Task StoreMemoryAsync(ISemanticTextMemory memory)
+    {
+        /* Store some data in the semantic memory.
+            *
+            * When using Azure Cognitive Search the data is automatically indexed on write.
+            *
+            * When using the combination of VolatileStore and Embedding generation, SK takes
+            * care of creating and storing the index
+            */
+
+        Console.WriteLine("\nAdding some GitHub file URLs and their descriptions to the semantic memory.");
+        var githubFiles = SampleData();
+        var i = 0;
+        foreach (var entry in githubFiles)
+        {
+            var result = await memory.SaveReferenceAsync(
+                collection: MemoryCollectionName,
+                externalSourceName: "GitHub",
+                externalId: entry.Key,
+                description: entry.Value,
+                text: entry.Value);
+
+            Console.WriteLine($"#{++i} saved.");
+            Console.WriteLine(result);
+        }
+
+        Console.WriteLine("\n----------------------");
+    }
+
+    private static Dictionary<string, string> SampleData()
+    {
+        return new Dictionary<string, string>
+        {
+            ["https://github.com/microsoft/semantic-kernel/blob/main/README.md"]
+                = "README: Installation, getting started, and how to contribute",
+            ["https://github.com/microsoft/semantic-kernel/blob/main/dotnet/notebooks/02-running-prompts-from-file.ipynb"]
+                = "Jupyter notebook describing how to pass prompts from a file to a semantic skill or function",
+            ["https://github.com/microsoft/semantic-kernel/blob/main/dotnet/notebooks//00-getting-started.ipynb"]
+                = "Jupyter notebook describing how to get started with the Semantic Kernel",
+            ["https://github.com/microsoft/semantic-kernel/tree/main/samples/skills/ChatSkill/ChatGPT"]
+                = "Sample demonstrating how to create a chat skill interfacing with ChatGPT",
+            ["https://github.com/microsoft/semantic-kernel/blob/main/dotnet/src/SemanticKernel/Memory/VolatileMemoryStore.cs"]
+                = "C# class that defines a volatile embedding store",
+            ["https://github.com/microsoft/semantic-kernel/blob/main/samples/dotnet/KernelHttpServer/README.md"]
+                = "README: How to set up a Semantic Kernel Service API using Azure Function Runtime v4",
+            ["https://github.com/microsoft/semantic-kernel/blob/main/samples/apps/chat-summary-webapp-react/README.md"]
+                = "README: README associated with a sample chat summary react-based webapp",
+        };
+    }
+}
+
+```
\ No newline at end of file
diff --git a/docs/Examples/SemanticKernelPrompt.md b/docs/Examples/SemanticKernelPrompt.md
new file mode 100644
index 000000000..f7589a44e
--- /dev/null
+++ b/docs/Examples/SemanticKernelPrompt.md
@@ -0,0 +1,55 @@
+# Semantic kernel mode
+
+```cs
+using System.Security.Cryptography;
+using LLama.Common;
+using LLamaSharp.SemanticKernel.ChatCompletion;
+using Microsoft.SemanticKernel;
+using LLamaSharp.SemanticKernel.TextCompletion;
+using Microsoft.SemanticKernel.TextGeneration;
+using Microsoft.Extensions.DependencyInjection;
+
+
+public class SemanticKernelPrompt
+{
+    public static async Task Run()
+    {
+        Console.WriteLine("Example from: https://github.com/microsoft/semantic-kernel/blob/main/dotnet/README.md");
+        Console.Write("Please input your model path: ");
+        var modelPath = Console.ReadLine();
+
+        // Load weights into memory
+        var parameters = new ModelParams(modelPath);
+        using var model = LLamaWeights.LoadFromFile(parameters);
+        var ex = new StatelessExecutor(model, parameters);
+
+        var builder = Kernel.CreateBuilder();
+        builder.Services.AddKeyedSingleton<ITextGenerationService>("local-llama", new LLamaSharpTextCompletion(ex));
+
+        var kernel = builder.Build();
+
+        var prompt = @"{{$input}}
+
+One line TLDR with the fewest words.";
+
+        ChatRequestSettings settings = new() { MaxTokens = 100 };
+        var summarize = kernel.CreateFunctionFromPrompt(prompt, settings);
+
+        string text1 = @"
+1st Law of Thermodynamics - Energy cannot be created or destroyed.
+2nd Law of Thermodynamics - For a spontaneous process, the entropy of the universe increases.
+3rd Law of Thermodynamics - A perfect crystal at zero Kelvin has zero entropy.";
+
+        string text2 = @"
+1. An object at rest remains at rest, and an object in motion remains in motion at constant speed and in a straight line unless acted on by an unbalanced force.
+2. The acceleration of an object depends on the mass of the object and the amount of force applied.
+3. Whenever one object exerts a force on another object, the second object exerts an equal and opposite on the first.";
+
+        Console.WriteLine((await kernel.InvokeAsync(summarize, new() { ["input"] = text1 })).GetValue<string>());
+
+        Console.WriteLine((await kernel.InvokeAsync(summarize, new() { ["input"] = text2 })).GetValue<string>());
+    }
+}
+
+
+```
\ No newline at end of file
diff --git a/docs/Examples/TalkToYourself.md b/docs/Examples/TalkToYourself.md
new file mode 100644
index 000000000..a45676b14
--- /dev/null
+++ b/docs/Examples/TalkToYourself.md
@@ -0,0 +1,73 @@
+# Talk to yourself
+
+```cs
+using System.Security.Cryptography;
+using System.Text;
+using LLama.Abstractions;
+using LLama.Common;
+
+public class TalkToYourself
+{
+    public static async Task Run()
+    {
+        Console.Write("Please input your model path: ");
+        var modelPath = Console.ReadLine();
+
+        // Load weights into memory
+        var @params = new ModelParams(modelPath);
+        using var weights = LLamaWeights.LoadFromFile(@params);
+
+        // Create 2 contexts sharing the same weights
+        using var aliceCtx = weights.CreateContext(@params);
+        var alice = new InteractiveExecutor(aliceCtx);
+        using var bobCtx = weights.CreateContext(@params);
+        var bob = new InteractiveExecutor(bobCtx);
+
+        // Initial alice prompt
+        var alicePrompt = "Transcript of a dialog, where the Alice interacts a person named Bob. Alice is friendly, kind, honest and good at writing.\nAlice: Hello";
+        var aliceResponse = await Prompt(alice, ConsoleColor.Green, alicePrompt, false, false);
+
+        // Initial bob prompt
+        var bobPrompt = $"Transcript of a dialog, where the Bob interacts a person named Alice. Bob is smart, intellectual and good at writing.\nAlice: Hello{aliceResponse}";
+        var bobResponse = await Prompt(bob, ConsoleColor.Red, bobPrompt, true, true);
+
+        // swap back and forth from Alice to Bob
+        while (true)
+        {
+            aliceResponse = await Prompt(alice, ConsoleColor.Green, bobResponse, false, true);
+            bobResponse = await Prompt(bob, ConsoleColor.Red, aliceResponse, false, true);
+
+            if (Console.KeyAvailable)
+                break;
+        }
+    }
+
+    private static async Task<string> Prompt(ILLamaExecutor executor, ConsoleColor color, string prompt, bool showPrompt, bool showResponse)
+    {
+        var inferenceParams = new InferenceParams
+        {
+            Temperature = 0.9f,
+            AntiPrompts = new List<string> { "Alice:", "Bob:", "User:" },
+            MaxTokens = 128,
+            Mirostat = MirostatType.Mirostat2,
+            MirostatTau = 10,
+        };
+
+        Console.ForegroundColor = ConsoleColor.White;
+        if (showPrompt)
+            Console.Write(prompt);
+
+        Console.ForegroundColor = color;
+        var builder = new StringBuilder();
+        await foreach (var text in executor.InferAsync(prompt, inferenceParams))
+        {
+            builder.Append(text);
+            if (showResponse)
+                Console.Write(text);
+        }
+
+        return builder.ToString();
+    }
+}
+
+```
\ No newline at end of file