diff --git a/1_instruction_tuning/student_examples/KevinMenpara/additional_requirements.sh b/1_instruction_tuning/student_examples/KevinMenpara/additional_requirements.sh
new file mode 100644
index 00000000..4504db4f
--- /dev/null
+++ b/1_instruction_tuning/student_examples/KevinMenpara/additional_requirements.sh
@@ -0,0 +1 @@
+pip install ipywidgets
\ No newline at end of file
diff --git a/1_instruction_tuning/student_examples/KevinMenpara/chat_templates_example.ipynb b/1_instruction_tuning/student_examples/KevinMenpara/chat_templates_example.ipynb
new file mode 100644
index 00000000..556c1511
--- /dev/null
+++ b/1_instruction_tuning/student_examples/KevinMenpara/chat_templates_example.ipynb
@@ -0,0 +1,501 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "vZAvFVIAtFlq"
+ },
+ "source": [
+ "# Exploring Chat Templates with SmolLM2\n",
+ "\n",
+ "This notebook demonstrates how to use chat templates with the `SmolLM2` model. Chat templates help structure interactions between users and AI models, ensuring consistent and contextually appropriate responses."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "id": "K-lZu8JvtwUN"
+ },
+ "outputs": [
+ {
+ "data": {
+ "application/vnd.jupyter.widget-view+json": {
+ "model_id": "534ba01e4dff442987849c62c8dde24e",
+ "version_major": 2,
+ "version_minor": 0
+ },
+ "text/plain": [
+ "VBox(children=(HTML(value='
user\n",
+ "Hello, how are you?<|im_end|>\n",
+ "<|im_start|>assistant\n",
+ "I'm doing well, thank you! How can I assist you today?<|im_end|>\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "input_text = tokenizer.apply_chat_template(messages, tokenize=False)\n",
+ "\n",
+ "print(\"Conversation with template:\", input_text)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "sfvdglOqtFls"
+ },
+ "source": [
+ "# Decode the conversation\n",
+ "\n",
+ "Note that the conversation is represented as above but with a further assistant message.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "mXUVdPeytFls",
+ "outputId": "16c52bee-aa58-4a4c-bd9e-fe0cf3def3e9"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Conversation decoded: <|im_start|>user\n",
+ "Hello, how are you?<|im_end|>\n",
+ "<|im_start|>assistant\n",
+ "I'm doing well, thank you! How can I assist you today?<|im_end|>\n",
+ "<|im_start|>assistant\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "input_text = tokenizer.apply_chat_template(\n",
+ " messages, tokenize=True, add_generation_prompt=True\n",
+ ")\n",
+ "\n",
+ "print(\"Conversation decoded:\", tokenizer.decode(token_ids=input_text))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "UcZQpspEtFlt"
+ },
+ "source": [
+ "# Tokenize the conversation\n",
+ "\n",
+ "Of course, the tokenizer also tokenizes the conversation and special token as ids that relate to the model's vocabulary.\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "jc2PLxAMtFlt",
+ "outputId": "450fab64-ad81-4aae-b180-697edc3312ad"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Conversation tokenized: [1, 4093, 198, 19556, 28, 638, 359, 346, 47, 2, 198, 1, 520, 9531, 198, 57, 5248, 2567, 876, 28, 9984, 346, 17, 1073, 416, 339, 4237, 346, 1834, 47, 2, 198, 1, 520, 9531, 198]\n"
+ ]
+ }
+ ],
+ "source": [
+ "input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)\n",
+ "\n",
+ "print(\"Conversation tokenized:\", input_text)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "m3eNp9a0tFlt"
+ },
+ "source": [
+ "\n",
+ "
Exercise: Process a dataset for SFT
\n",
+ "
Take a dataset from the Hugging Face hub and process it for SFT.
\n",
+ "
Difficulty Levels
\n",
+ "
đ˘ Convert the `HuggingFaceTB/smoltalk` dataset into chatml format.
\n",
+ "
đ Convert the `openai/gsm8k` dataset into chatml format.
\n",
+ "
"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {
+ "collapsed": true,
+ "id": "qbkXV2_ItFlt"
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\Users\\kevin menpara\\AppData\\Local\\Temp\\ipykernel_26164\\4068687702.py:1: DeprecationWarning: Importing display from IPython.core.display is deprecated since IPython 7.14, please import from IPython.display\n",
+ " from IPython.core.display import display, HTML\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "from IPython.core.display import display, HTML\n",
+ "\n",
+ "display(\n",
+ " HTML(\n",
+ " \"\"\"\n",
+ "\"\"\"\n",
+ " )\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "4p3atw4_tFlu",
+ "outputId": "c90a92f0-2bb7-4691-e1e8-a5d2a8d641af"
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Map: 100%|ââââââââââ| 2260/2260 [00:00<00:00, 10706.27 examples/s]\n",
+ "Map: 100%|ââââââââââ| 119/119 [00:00<00:00, 2495.12 examples/s]\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "{'full_topic': 'Work/Career development/Mentorship',\n",
+ " 'messages': [{'content': 'Hi', 'role': 'user'},\n",
+ " {'content': 'Hello! How can I help you today?', 'role': 'assistant'},\n",
+ " {'content': \"I'm looking for career advice. I want to find a new job, but I'm not sure what I want to do.\",\n",
+ " 'role': 'user'},\n",
+ " {'content': 'Career development can be challenging. What are your current skills and interests that might help narrow down some options?',\n",
+ " 'role': 'assistant'},\n",
+ " {'content': \"I have experience in marketing and enjoy working with people. I'm also interested in learning more about data analysis.\",\n",
+ " 'role': 'user'},\n",
+ " {'content': \"That's a great combination. You might consider roles like marketing analyst or business development, which combine people skills with data analysis. I can provide more information on those careers if you'd like.\",\n",
+ " 'role': 'assistant'},\n",
+ " {'content': 'That sounds helpful. Can you also suggest some resources for learning data analysis?',\n",
+ " 'role': 'user'},\n",
+ " {'content': 'Absolutely. Online courses like Coursera, edX, and LinkedIn Learning offer a wide range of data analysis courses. Additionally, you can explore professional networks like LinkedIn groups or attend industry events to learn from others in the field.',\n",
+ " 'role': 'assistant'}]}"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from datasets import load_dataset\n",
+ "\n",
+ "ds = load_dataset(\"HuggingFaceTB/smoltalk\", \"everyday-conversations\")\n",
+ "\n",
+ "\n",
+ "def process_dataset(sample):\n",
+ " # TODO: đ˘ Convert the sample into a chat format\n",
+ " \n",
+ " return sample\n",
+ "\n",
+ "\n",
+ "ds = ds.map(process_dataset)\n",
+ "ds[\"train\"][1]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {
+ "id": "81fQeazltFlu"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "display(\n",
+ " HTML(\n",
+ " \"\"\"\n",
+ "\"\"\"\n",
+ " )\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "collapsed": true,
+ "id": "bWUSv7NMtFlu",
+ "outputId": "e3d5bba4-4356-44fd-bad8-57f56099d7fe"
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Map: 100%|ââââââââââ| 7473/7473 [00:10<00:00, 705.25 examples/s]\n",
+ "Map: 100%|ââââââââââ| 1319/1319 [00:01<00:00, 741.44 examples/s]\n"
+ ]
+ }
+ ],
+ "source": [
+ "ds = load_dataset(\"openai/gsm8k\", \"main\")\n",
+ "# print(ds['train'][0])\n",
+ "#Dataset -> '\n",
+ " # train' , 'test'->\n",
+ " # 'features' -> \n",
+ " # 'question' , 'answer' \n",
+ "\n",
+ "\n",
+ "def process_dataset(sample):\n",
+ " # Create a message column with the role and content\n",
+ " messages = []\n",
+ " if sample['question']:\n",
+ " messages.append({\n",
+ " 'role': 'user',\n",
+ " 'content': sample['question']\n",
+ " })\n",
+ " if sample['answer']:\n",
+ " messages.append({\n",
+ " 'role': 'assistant',\n",
+ " 'content': sample['answer']\n",
+ " })\n",
+ " \n",
+ " sample['messages'] = messages\n",
+ " \n",
+ " # Tokenized data \n",
+ " tokenized_data = tokenizer.apply_chat_template(\n",
+ " messages,\n",
+ " add_generation_prompt=True\n",
+ " )\n",
+ "\n",
+ " return {\n",
+ " 'tokenized_messages': tokenized_data\n",
+ " }\n",
+ "\n",
+ "# Apply the process_dataset function to the training set\n",
+ "ds = ds.map(process_dataset)\n",
+ "\n",
+ "# Access the first tokenized message\n",
+ "# print(ds['train'][0]['tokenized_messages'])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "qlXCuRKotFlu"
+ },
+ "source": [
+ "## Conclusion\n",
+ "\n",
+ "This notebook demonstrated how to apply chat templates to different models, `SmolLM2`. By structuring interactions with chat templates, we can ensure that AI models provide consistent and contextually relevant responses.\n",
+ "\n",
+ "In the exercise you tried out converting a dataset into chatml format. Luckily, TRL will do this for you, but it's useful to understand what's going on under the hood."
+ ]
+ }
+ ],
+ "metadata": {
+ "accelerator": "GPU",
+ "colab": {
+ "gpuType": "T4",
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": ".venv",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}