diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..4c3fa87
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,29 @@
+
+FROM python:3.8-slim-buster
+
+# Install the security updates.
+RUN apt-get update
+RUN apt-get -y upgrade
+
+# Dependencies to build requires packages
+RUN apt-get -y install gcc
+
+# Remove all cached file. Get a smaller image.
+RUN apt-get clean
+RUN rm -rf /var/lib/apt/lists/*
+
+EXPOSE 3978
+
+# Copy the application.
+COPY . /opt/app
+WORKDIR /opt/app
+
+# Install the app librairies.
+RUN pip install -r requirements.txt
+
+# Install SpaCy small model
+RUN python -m spacy download en_core_web_sm
+
+# Start the app.
+ENTRYPOINT [ "python" ]
+CMD [ "main.py" ]
\ No newline at end of file
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..69d5e44
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2021 Joffrey Bienvenu
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..63a9fee
--- /dev/null
+++ b/README.md
@@ -0,0 +1,80 @@
+# chatbot
+
+
+
+
+
+## Cross-plateforme implementation
+
+
+## fonctionnalités du Bot :
+
+### Traitement des inputs "utilisateurs"
+
+- Bert ?
+
+### Possible réponses
+
+- Accueillire
+  - Décrire les fonctionnalités du bot
+  - Répondre 
+- Donner les heures d'ouvertures
+- Afficher une liste d'objet, de produit 
+
+### Nice to have
+
+- Réserver un service / un produit 
+  - Gérer un agenda
+  - vérifier la disponibilité
+  - 
+- 
+
+    "**In English**, the bot should be able to :\n",
+    "\n",
+    "- Understand phrases related to a room reservation.\n",
+    "Example that the bot will have to understand: \n",
+    "\n",
+    "\t\t> I wish to reserve a room for 2 people.\n",
+    "\t\t> I wish to reserve a room for 4 days\n",
+    "\t\t> Do you have rooms available from July 23rd?\n",
+    "\t\t> I would like to reserve a room for two days and for two people\n",
+    "\n",
+    "- Understand phrases related to a table reservation for the restaurant. \n",
+    "\n",
+    "\t\t> I would like to make a reservation for tonight.\n",
+    "\t\t> I'd like to reserve a table for four people.\n",
+    "\n",
+    "- Must ensure a continuous and ongoing conversation. Example of a complete conversation : \n",
+    "\n",
+    "\t\t> Customer : Hello !\n",
+    "\t\t> Bot : Hello, how can I help you? \n",
+    "\t\t> Customer: I would like to reserve a table for 4 people ? \n",
+    "\t\t> Bot : For which date would you like to reserve your table?\n",
+    "\t\t> Customer : Today at 7:00 pm\n",
+    "\t\t> Bot : What name should I make the reservation under?\n",
+    "\t\t> Customer  : My name is Mr. Dupont! \n",
+    "\t\t> Bot : Very well Mr Dupont, I confirm you the reservation of a table for 4 people tonight at 7:00 pm. \n",
+    "\t\t> Bot : Can I help you with something else?\n",
+    "\t\t> Customer : No thanks\n",
+    "\t\t> Bot: Have a nice day. \n",
+    "\n",
+    "- Understand when the client is angry. In this case, the bot will indicate that it is transmitting the conversation to a human. \n",
+    "\n",
+    "\t\t> You're incompetent!\n",
+    "\t\t> My room is dirty! This is outrageous!\n",
+    "\t\t> I want to talk to a human. \n",
+    "\n",
+    "### Nice-to-have features\n",
+    "- Create an API of your bot to make it cross-platform \n",
+    "- Use Docker\n",
+
+
+
+
+## Hébergement du Bot
+
+Timeline:
+- Etablir l'objectif (déployer bot cross-plateforme + créer propre modele)
+- Trouver un framework >> MSBotFramework
+- Créer un dataset 
+- Deployer dummy bot
\ No newline at end of file
diff --git a/assets/conversation_simple.svg b/assets/conversation_simple.svg
new file mode 100644
index 0000000..4bc7af6
--- /dev/null
+++ b/assets/conversation_simple.svg
@@ -0,0 +1,3 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" width="401px" height="451px" viewBox="-0.5 -0.5 401 451" content="&lt;mxfile host=&quot;app.diagrams.net&quot; modified=&quot;2021-02-06T15:46:22.019Z&quot; agent=&quot;5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.146 Safari/537.36&quot; etag=&quot;V9uvSWWVMhQvaqV5nhh1&quot; version=&quot;14.2.9&quot; type=&quot;github&quot;&gt;&lt;diagram id=&quot;7fwvKr51Cng7JdYTOsqx&quot; name=&quot;Page-1&quot;&gt;1ZbBcpswEIafpQcdM4Mkm9jHhibpoe1M40PaU0dGi1EiWEYIY/fpKwUZTEg67TTjJjM+SP+uEPr2txbCk2J3bUSVf0YJmrBI7gj/QBijM8aI/0Vy3ylLGnfCxigZkgZhpX5CEKOgNkpCPUq0iNqqaiymWJaQ2pEmjMF2nJahHu9aiQ1MhFUq9FS9VdLmnbpg54P+EdQmP+xM42UXKcQhOZykzoXE9kjil4QnBtF2o2KXgPbwDly6dVfPRPsXM1DaP1mQffka3/64TuTN2adv2zy+/07Pz8JTtkI34cAXWN5hYwhLnH4HPoiNNELVbmhIwsn7ZQ1mCy4lakqfkOaiWBsIx7T7AzuDTSnBbx8RftHmysKqEqmPts4tTsttod2MumF4ETAWds+ekPbcnOEAC7Bm71LCglkgvR9P26FuLA5aflSzedBEsMqmf/BA0w0C0L+Ay34PN8ViraD0fw5PsQJTexN7zoRf/RvNTGmdoEbzsJZLAYssdXptDd7DUSROF7DOXoZ/b/VQAMqeqAA7ZQX4pAL8tbmURVNI81Mymk0YJWNjlo2yb9iUy0e8F1Pe9KTA5xPg7LWZksf/2ZTxhNEN+K4jrEJvTMgy1+2brh1Bd5+u/e3pm5a7Ycs+Er17g6alj0w7e6qXvVBB3HT4CHmIHX3K8ctf&lt;/diagram&gt;&lt;/mxfile&gt;" resource="https://app.diagrams.net/#HJoffreybvn%2Fresa-chatbot%2Fmaster%2Fassets%2Fconversation_simple.svg"><defs/><g><rect x="0" y="0" width="260" height="50" fill="#ffffff" stroke="#000000" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject style="overflow: visible; text-align: left;" pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 258px; height: 1px; padding-top: 25px; margin-left: 1px;"><div style="box-sizing: border-box; font-size: 0; text-align: center; "><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: #000000; line-height: 1.2; pointer-events: all; white-space: normal; word-wrap: normal; ">Bonjour, je voudrais réserver une chambre</div></div></div></foreignObject><text x="130" y="29" fill="#000000" font-family="Helvetica" font-size="12px" text-anchor="middle">Bonjour, je voudrais réserver une chambre</text></switch></g><rect x="180" y="80" width="220" height="50" fill="#dae8fc" stroke="#6c8ebf" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject style="overflow: visible; text-align: left;" pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 218px; height: 1px; padding-top: 105px; margin-left: 181px;"><div style="box-sizing: border-box; font-size: 0; text-align: center; "><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: #000000; line-height: 1.2; pointer-events: all; white-space: normal; word-wrap: normal; ">Bonjour, combien de personnes ?</div></div></div></foreignObject><text x="290" y="109" fill="#000000" font-family="Helvetica" font-size="12px" text-anchor="middle">Bonjour, combien de personnes ?</text></switch></g><rect x="0" y="160" width="50" height="50" fill="#ffffff" stroke="#000000" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject style="overflow: visible; text-align: left;" pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 48px; height: 1px; padding-top: 185px; margin-left: 1px;"><div style="box-sizing: border-box; font-size: 0; text-align: center; "><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: #000000; line-height: 1.2; pointer-events: all; white-space: normal; word-wrap: normal; ">3</div></div></div></foreignObject><text x="25" y="189" fill="#000000" font-family="Helvetica" font-size="12px" text-anchor="middle">3</text></switch></g><rect x="250" y="240" width="150" height="50" fill="#dae8fc" stroke="#6c8ebf" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject style="overflow: visible; text-align: left;" pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 148px; height: 1px; padding-top: 265px; margin-left: 251px;"><div style="box-sizing: border-box; font-size: 0; text-align: center; "><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: #000000; line-height: 1.2; pointer-events: all; white-space: normal; word-wrap: normal; ">Combien de nuits ?</div></div></div></foreignObject><text x="325" y="269" fill="#000000" font-family="Helvetica" font-size="12px" text-anchor="middle">Combien de nuits ?</text></switch></g><rect x="0" y="320" width="50" height="50" fill="#ffffff" stroke="#000000" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject style="overflow: visible; text-align: left;" pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 48px; height: 1px; padding-top: 345px; margin-left: 1px;"><div style="box-sizing: border-box; font-size: 0; text-align: center; "><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: #000000; line-height: 1.2; pointer-events: all; white-space: normal; word-wrap: normal; ">2</div></div></div></foreignObject><text x="25" y="349" fill="#000000" font-family="Helvetica" font-size="12px" text-anchor="middle">2</text></switch></g><rect x="150" y="400" width="250" height="50" fill="#dae8fc" stroke="#6c8ebf" pointer-events="all"/><g transform="translate(-0.5 -0.5)"><switch><foreignObject style="overflow: visible; text-align: left;" pointer-events="none" width="100%" height="100%" requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility"><div xmlns="http://www.w3.org/1999/xhtml" style="display: flex; align-items: unsafe center; justify-content: unsafe center; width: 248px; height: 1px; padding-top: 425px; margin-left: 151px;"><div style="box-sizing: border-box; font-size: 0; text-align: center; "><div style="display: inline-block; font-size: 12px; font-family: Helvetica; color: #000000; line-height: 1.2; pointer-events: all; white-space: normal; word-wrap: normal; ">Reservation effectuée, bonne journée !</div></div></div></foreignObject><text x="275" y="429" fill="#000000" font-family="Helvetica" font-size="12px" text-anchor="middle">Reservation effectuée, bonne journée !</text></switch></g></g><switch><g requiredFeatures="http://www.w3.org/TR/SVG11/feature#Extensibility"/><a transform="translate(0,-5)" xlink:href="https://www.diagrams.net/doc/faq/svg-export-text-problems" target="_blank"><text text-anchor="middle" font-size="10px" x="50%" y="100%">Viewer does not support full SVG 1.1</text></a></switch></svg>
\ No newline at end of file
diff --git a/assets/images/profile_large.png b/assets/images/profile_large.png
new file mode 100644
index 0000000..d1f5247
Binary files /dev/null and b/assets/images/profile_large.png differ
diff --git a/assets/images/profile_small.png b/assets/images/profile_small.png
new file mode 100644
index 0000000..d86ebfb
Binary files /dev/null and b/assets/images/profile_small.png differ
diff --git a/assets/model/.empty b/assets/model/.empty
new file mode 100644
index 0000000..e69de29
diff --git a/config.py b/config.py
new file mode 100644
index 0000000..09437ea
--- /dev/null
+++ b/config.py
@@ -0,0 +1,32 @@
+
+from os import environ
+
+
+class Config:
+    """Bot configuration class."""
+
+    # Deployment
+    PORT = int(environ.get("PORT", 3978))
+
+    # Azure deployment
+    APP_ID = environ.get("MS_APP_ID", "")
+    APP_PASSWORD = environ.get("MS_APP_PASSWORD", "")
+
+    # Models
+    MODEL_PREPROCESS = "en_core_web_sm"  # SpaCy smallest model - For preprocess
+    MODEL_MATCHING = "TF-IDF"  # PolyFuzz lightest model - Optimized for matching
+    MODEL_CLASSIFIER = "bert-base-uncased"  # HuggingFace smallest BERT model - For tokenization and classifying
+
+    # Remote files
+    s3_base_url = environ.get("S3_BASE_URL", "")
+
+    weight_file = "resa_BERT_model.pt"
+    MODEL_WEIGHT_URL = f"{s3_base_url}/{weight_file}"  # Fine-tuned weights for BERT model
+    MODEL_WEIGHT_LOCAL_COPY = f"./assets/model/{weight_file}"
+
+    classes_file = "labels.pickle"
+    MODEL_CLASSES_URL = f"{s3_base_url}/{classes_file}"
+    MODEL_CLASSES_LOCAL_COPY = f"./assets/model/{classes_file}"
+
+    # Filters
+    FILTERS_TOML = "./filters.toml"
diff --git a/filters.toml b/filters.toml
new file mode 100644
index 0000000..3c02fa0
--- /dev/null
+++ b/filters.toml
@@ -0,0 +1,15 @@
+# TOML document to store filters
+
+[longtalk_make_reservation]
+
+    # Size of the room: How many people ?
+    [longtalk_make_reservation.people]
+    words = ["pearson", "people"]
+    regex = '''(?P<people>\d)\W%s'''
+    threshold = 0.85
+
+    # Duration of the book: How long ?
+    [longtalk_make_reservation.duration]
+    words = ["day", "night"]
+    regex = '''(?P<duration>\d)\W%s'''
+    threshold = 0.85
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..6a24ddc
--- /dev/null
+++ b/main.py
@@ -0,0 +1,114 @@
+
+import sys
+import traceback
+from datetime import datetime
+from http import HTTPStatus
+
+from aiohttp import web
+from aiohttp.web import Request, Response, json_response
+from botbuilder.core import BotFrameworkAdapterSettings, TurnContext, BotFrameworkAdapter, ConversationState, MemoryStorage, UserState
+from botbuilder.core.integration import aiohttp_error_middleware
+from botbuilder.schema import Activity, ActivityTypes
+
+from src.dialogs import MainDialog, BookingRoomDialog
+from src.nlu import NLU
+from src import Bot
+from config import Config
+
+# Load the config and create the bot
+config = Config()
+
+# Init a Bot adapter https://aka.ms/about-bot-adapter
+settings = BotFrameworkAdapterSettings(config.APP_ID, config.APP_PASSWORD)
+ADAPTER = BotFrameworkAdapter(settings)
+
+
+# Catch-all for errors
+async def on_error(context: TurnContext, error_: Exception):
+    """
+    Catch-all functions to write out errors on console log.
+    NOTE: In production environment, logging should be done
+    to Azure application insights.
+    """
+
+    # Print the error into the logs
+    print(f"\n [on_turn_error] unhandled error: {error_}", file=sys.stderr)
+    traceback.print_exc()
+
+    # Send a message to the user
+    await context.send_activity("The bot encountered an error or bug.")
+
+    # If the bot is run from the Bot Framework Emulator (dev environment),
+    # print a more complete error log.
+    if context.activity.channel_id == "emulator":
+
+        trace_activity = Activity(
+            label="TurnError",
+            name="on_turn_error Trace",
+            timestamp=datetime.utcnow(),
+            type=ActivityTypes.trace,
+            value=f"{error_}",
+            value_type="https://www.botframework.com/schemas/error",
+        )
+        await context.send_activity(trace_activity)
+
+        # Clear out state
+        await CONVERSATION_STATE.delete(context)
+
+
+# Set the error handler on the Adapter.
+ADAPTER.on_turn_error = on_error
+
+# Create MemoryStorage, UserState and ConversationState
+MEMORY = MemoryStorage()
+CONVERSATION_STATE = ConversationState(MEMORY)
+USER_STATE = UserState(MEMORY)
+
+# Load the NLU recognizer
+nlu = NLU()
+
+# Create the dialogs
+dialog_room_reservation = BookingRoomDialog(nlu, USER_STATE)
+dialog_main = MainDialog(nlu, USER_STATE, dialog_room_reservation)
+
+# Create the bot
+bot = Bot(CONVERSATION_STATE, USER_STATE, dialog_main)
+
+
+# Direct message API
+async def messages(req: Request) -> Response:
+    """
+    Main bot function: Listen for incoming API request.
+    Route: '/api/messages'.
+    """
+
+    # Filter only JSON requests
+    if "application/json" in req.headers["Content-Type"]:
+        body = await req.json()
+    else:
+        return Response(status=HTTPStatus.UNSUPPORTED_MEDIA_TYPE)
+
+    # Deserialize the JSON
+    activity = Activity().deserialize(body)
+
+    # Retrieve the authorization code if sent
+    auth_header = ""
+    if "Authorization" in req.headers:
+        auth_header = req.headers["Authorization"]
+
+    # Call the bot and send back its response
+    response = await ADAPTER.process_activity(activity, auth_header, bot.on_turn)
+    if response:
+        return json_response(data=response.body, status=response.status)
+
+    # Return HTTP-200 if no response is send back
+    return Response(status=HTTPStatus.OK)
+
+# Init and open routes for direct API call
+app = web.Application(middlewares=[aiohttp_error_middleware])
+app.router.add_post("/api/messages", messages)
+
+
+if __name__ == "__main__":
+
+    web.run_app(app, host="0.0.0.0", port=config.PORT)
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..2d23dee
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,26 @@
+
+# MS Bot Framework
+botbuilder-core==4.11.0
+botbuilder-integration-aiohttp==4.11.0
+botbuilder-schema==4.11.0
+botframework-connector==4.11.0
+botbuilder-dialogs==4.11.0
+aiohttp==3.6.2
+
+# Preprocessing
+beautifulsoup4==4.9.3
+spacy==3.0.1
+Unidecode==1.1.2
+word2number==1.1
+contractions==0.0.45
+
+# Classification
+transformers==4.2.2
+torch==1.7.1
+requests==2.23.0
+
+# Matching
+polyfuzz==0.2.2
+toml==0.10.2
+pandas==1.2.1
+
diff --git a/src/__init__.py b/src/__init__.py
new file mode 100644
index 0000000..b00d0e0
--- /dev/null
+++ b/src/__init__.py
@@ -0,0 +1,4 @@
+
+from .bot import Bot
+
+__all__ = ["Bot"]
diff --git a/src/bot.py b/src/bot.py
new file mode 100644
index 0000000..e486862
--- /dev/null
+++ b/src/bot.py
@@ -0,0 +1,39 @@
+
+from botbuilder.schema import ChannelAccount
+from botbuilder.core import ActivityHandler, TurnContext, ConversationState, UserState
+from botbuilder.dialogs import Dialog
+
+from .dialogs.utils import Emoji
+from .dialogs.helpers import DialogHelper
+
+
+class Bot(ActivityHandler):
+
+    def __init__(self, conversation_state: ConversationState, user_state: UserState, dialog: Dialog):
+
+        self.conversation_state = conversation_state
+        self.user_state = user_state
+        self.dialog = dialog
+
+    async def on_members_added_activity(self, members_added: [ChannelAccount], turn_context: TurnContext):
+
+        # Send an "Hello" to any new user connected to the bot
+        for member in members_added:
+            if member.id != turn_context.activity.recipient.id:
+                await turn_context.send_activity(f"Hello {Emoji.WAVING_HAND.value}")
+
+    async def on_turn(self, turn_context: TurnContext):
+
+        await super().on_turn(turn_context)
+
+        # Save any state changes that might have occurred during the turn.
+        await self.conversation_state.save_changes(turn_context)
+        await self.user_state.save_changes(turn_context)
+
+    async def on_message_activity(self, turn_context: TurnContext):
+
+        await DialogHelper.run_dialog(
+            self.dialog,
+            turn_context,
+            self.conversation_state.create_property("DialogState"),
+        )
diff --git a/src/bot/dialog.py b/src/bot/dialog.py
new file mode 100644
index 0000000..0efb20c
--- /dev/null
+++ b/src/bot/dialog.py
@@ -0,0 +1,49 @@
+
+from botbuilder.core import ActivityHandler, ConversationState, TurnContext, UserState
+from botbuilder.dialogs import Dialog
+from helpers.dialog_helper import DialogHelper
+
+
+class DialogBot(ActivityHandler):
+    """
+    This Bot implementation can run any type of Dialog. The use of type parameterization is to allows multiple
+    different bots to be run at different endpoints within the same project. This can be achieved by defining distinct
+    Controller types each with dependency on distinct Bot types. The ConversationState is used by the Dialog system. The
+    UserState isn't, however, it might have been used in a Dialog implementation, and the requirement is that all
+    BotState objects are saved at the end of a turn.
+    """
+
+    def __init__(
+        self,
+        conversation_state: ConversationState,
+        user_state: UserState,
+        dialog: Dialog,
+    ):
+        if conversation_state is None:
+            raise TypeError(
+                "[DialogBot]: Missing parameter. conversation_state is required but None was given"
+            )
+        if user_state is None:
+            raise TypeError(
+                "[DialogBot]: Missing parameter. user_state is required but None was given"
+            )
+        if dialog is None:
+            raise Exception("[DialogBot]: Missing parameter. dialog is required")
+
+        self.conversation_state = conversation_state
+        self.user_state = user_state
+        self.dialog = dialog
+
+    async def on_turn(self, turn_context: TurnContext):
+        await super().on_turn(turn_context)
+
+        # Save any state changes that might have ocurred during the turn.
+        await self.conversation_state.save_changes(turn_context)
+        await self.user_state.save_changes(turn_context)
+
+    async def on_message_activity(self, turn_context: TurnContext):
+        await DialogHelper.run_dialog(
+            self.dialog,
+            turn_context,
+            self.conversation_state.create_property("DialogState"),
+        )
\ No newline at end of file
diff --git a/src/dialogs/__init__.py b/src/dialogs/__init__.py
new file mode 100644
index 0000000..a928908
--- /dev/null
+++ b/src/dialogs/__init__.py
@@ -0,0 +1,5 @@
+
+from .booking_room_dialog import BookingRoomDialog
+from .main_dialog import MainDialog
+
+__all__ = ["BookingRoomDialog", "MainDialog"]
diff --git a/src/dialogs/booking_room_dialog.py b/src/dialogs/booking_room_dialog.py
new file mode 100644
index 0000000..d24867e
--- /dev/null
+++ b/src/dialogs/booking_room_dialog.py
@@ -0,0 +1,179 @@
+
+from botbuilder.schema import ChannelAccount, CardAction, ActionTypes, SuggestedActions, Activity, ActivityTypes
+from botbuilder.dialogs import ComponentDialog, WaterfallDialog, WaterfallStepContext, DialogTurnResult
+from botbuilder.dialogs.prompts import TextPrompt, NumberPrompt, ChoicePrompt, ConfirmPrompt, AttachmentPrompt, PromptOptions, PromptValidatorContext
+from botbuilder.dialogs.choices import Choice
+from botbuilder.core import MessageFactory, UserState
+
+from src.nlu import Intent, NLU
+from .utils import Emoji
+from .helpers import NLUHelper
+from .data_models import RoomReservation
+
+
+class BookingRoomDialog(ComponentDialog):
+
+    def __init__(self, nlu_recognizer: NLU, user_state: UserState):
+        super(BookingRoomDialog, self).__init__(BookingRoomDialog.__name__)
+
+        # Load the NLU module
+        self._nlu_recognizer = nlu_recognizer
+
+        # Load the RoomReservation class
+        self.room_reservation_accessor = user_state.create_property("RoomReservation")
+
+        # Setup the waterfall dialog
+        self.add_dialog(WaterfallDialog("WFBookingDialog", [
+            self.people_step,
+            self.duration_step,
+            self.breakfast_step,
+            self.summary_step,
+        ]))
+
+        # Append the prompts and custom prompts
+        self.add_dialog(NumberPrompt("PeoplePrompt", BookingRoomDialog.people_prompt_validator))
+        self.add_dialog(NumberPrompt("DurationPrompt", BookingRoomDialog.duration_prompt_validator))
+        self.add_dialog(ConfirmPrompt("IsTakingBreakfastPrompt"))
+
+        self.initial_dialog_id = "WFBookingDialog"
+
+    @staticmethod
+    async def people_step(step_context: WaterfallStepContext) -> DialogTurnResult:
+        """Ask the user: how many people to make the reservation?"""
+
+        # Retrieve the booking keywords
+        booking_keywords: dict = step_context.options
+        step_context.values['booking_keywords'] = booking_keywords
+
+        # If the keyword 'people' exists and is filled, pass the question
+        if 'people' in booking_keywords and booking_keywords['people'] is not None:
+            return await step_context.next(booking_keywords['people'])
+
+        # Give user suggestions (1 or 2 people).
+        # The user can still write a custom number of people [1, 4].
+        options = PromptOptions(
+            prompt=Activity(
+
+                type=ActivityTypes.message,
+                text="Would you like a single or a double room?",
+
+                suggested_actions=SuggestedActions(
+                    actions=[
+                        CardAction(
+                            title="Single",
+                            type=ActionTypes.im_back,
+                            value="Single room (1 people)"
+                        ),
+                        CardAction(
+                            title="Double",
+                            type=ActionTypes.im_back,
+                            value="Double room (2 peoples)"
+                        )
+                    ]
+                )
+            ),
+            retry_prompt=MessageFactory.text(
+                "Reservations can be made for one to four people only."
+            )
+        )
+
+        # NumberPrompt - How many people ?
+        return await step_context.prompt(
+            "PeoplePrompt",
+            options
+        )
+
+    @staticmethod
+    async def duration_step(step_context: WaterfallStepContext) -> DialogTurnResult:
+        """Ask the user: how many night to reserve?"""
+
+        # Save the number of people
+        step_context.values["people"] = step_context.result
+
+        # Retrieve the keywords
+        booking_keywords: dict = step_context.values["booking_keywords"]
+
+        # If the keyword 'duration' exists and is filled, pass the question
+        if 'duration' in booking_keywords and booking_keywords['duration'] is not None:
+            return await step_context.next(booking_keywords['duration'])
+
+        # NumberPrompt - How many nights ? (duration)
+        return await step_context.prompt(
+            "DurationPrompt",
+            PromptOptions(
+                prompt=MessageFactory.text("How long do you want to stay?"),
+                retry_prompt=MessageFactory.text(
+                    "It is only possible to book from 1 to 7 nights"
+                ),
+            ),
+        )
+
+    @staticmethod
+    async def breakfast_step(step_context: WaterfallStepContext) -> DialogTurnResult:
+
+        # Save the number of nights
+        step_context.values["duration"] = step_context.result
+
+        # Confirm people and duration
+        await step_context.context.send_activity(
+            MessageFactory.text(
+                f"Okay, so {step_context.values['people']} people for {step_context.values['duration']} nights"
+            )
+        )
+
+        # ConfirmPrompt - Is taking breakfast ?
+        return await step_context.prompt(
+            "IsTakingBreakfastPrompt",
+            PromptOptions(
+                prompt=MessageFactory.text("Will you be having breakfast?")
+            ),
+        )
+
+    async def summary_step(self, step_context: WaterfallStepContext) -> DialogTurnResult:
+
+        # Save if the user take the breakfast (bool)
+        step_context.values["breakfast"] = step_context.result
+
+        # If the user said "Yes":
+        if step_context.result:
+
+            # Confirm breakfast hour
+            await step_context.context.send_activity(
+                MessageFactory.text(f"Perfect, breakfast is from 6am to 10am")
+            )
+
+        # Save information to Reservation object
+        room_reservation = await self.room_reservation_accessor.get(
+            step_context.context, RoomReservation
+        )
+
+        room_reservation.people = step_context.values["people"]
+        room_reservation.duration = step_context.values["duration"]
+        room_reservation.breakfast = step_context.values["breakfast"]
+
+        # End the dialog
+        await step_context.context.send_activity(
+            MessageFactory.text("Your booking has been made !")
+        )
+
+        return await step_context.end_dialog()
+
+    @staticmethod
+    async def people_prompt_validator(prompt_context: PromptValidatorContext) -> bool:
+        """Validate the number of people entered by the user."""
+
+        # Restrict people between [1 and 4].
+        return (
+                prompt_context.recognized.succeeded
+                and 1 <= prompt_context.recognized.value <= 4
+        )
+
+    @staticmethod
+    async def duration_prompt_validator(prompt_context: PromptValidatorContext) -> bool:
+        """Validate the number of nights entered by the user."""
+
+        # Restrict nights between [1 and 7].
+        return (
+                prompt_context.recognized.succeeded
+                and 1 <= prompt_context.recognized.value <= 7
+        )
diff --git a/src/dialogs/data_models/__init__.py b/src/dialogs/data_models/__init__.py
new file mode 100644
index 0000000..6b200e2
--- /dev/null
+++ b/src/dialogs/data_models/__init__.py
@@ -0,0 +1,4 @@
+
+from .room_reservation import RoomReservation
+
+__all__ = ["RoomReservation"]
diff --git a/src/dialogs/data_models/room_reservation.py b/src/dialogs/data_models/room_reservation.py
new file mode 100644
index 0000000..1c31644
--- /dev/null
+++ b/src/dialogs/data_models/room_reservation.py
@@ -0,0 +1,9 @@
+
+class RoomReservation:
+    """Hotel's room reservation state."""
+
+    def __init__(self, people: int = None, duration: int = None, breakfast: bool = None):
+
+        self.people: int = people  # Number of people
+        self.duration: int = duration  # Number of nights
+        self.breakfast: bool = breakfast  # If they take breakfast
diff --git a/src/dialogs/helpers/__init__.py b/src/dialogs/helpers/__init__.py
new file mode 100644
index 0000000..62e19ca
--- /dev/null
+++ b/src/dialogs/helpers/__init__.py
@@ -0,0 +1,5 @@
+
+from .dialogs_helper import DialogHelper
+from .nlu_helper import NLUHelper
+
+__all__ = ["DialogHelper", "NLUHelper"]
diff --git a/src/dialogs/helpers/dialogs_helper.py b/src/dialogs/helpers/dialogs_helper.py
new file mode 100644
index 0000000..531b325
--- /dev/null
+++ b/src/dialogs/helpers/dialogs_helper.py
@@ -0,0 +1,18 @@
+
+from botbuilder.core import StatePropertyAccessor, TurnContext
+from botbuilder.dialogs import Dialog, DialogSet, DialogTurnStatus
+
+
+class DialogHelper:
+
+    @staticmethod
+    async def run_dialog(dialog: Dialog, turn_context: TurnContext, accessor: StatePropertyAccessor):
+
+        dialog_set = DialogSet(accessor)
+        dialog_set.add(dialog)
+
+        dialog_context = await dialog_set.create_context(turn_context)
+        results = await dialog_context.continue_dialog()
+
+        if results.status == DialogTurnStatus.Empty:
+            await dialog_context.begin_dialog(dialog.id)
diff --git a/src/dialogs/helpers/nlu_helper.py b/src/dialogs/helpers/nlu_helper.py
new file mode 100644
index 0000000..0ece14a
--- /dev/null
+++ b/src/dialogs/helpers/nlu_helper.py
@@ -0,0 +1,10 @@
+
+from src.nlu import Intent, NLU
+
+
+class NLUHelper:
+
+    @staticmethod
+    async def execute_nlu_query(nlu_recognizer: NLU, message: str) -> (Intent, dict):
+
+        return nlu_recognizer.get_intent(message)
diff --git a/src/dialogs/main_dialog.py b/src/dialogs/main_dialog.py
new file mode 100644
index 0000000..51706ac
--- /dev/null
+++ b/src/dialogs/main_dialog.py
@@ -0,0 +1,98 @@
+
+from botbuilder.schema import InputHints
+from botbuilder.dialogs import ComponentDialog, WaterfallDialog, WaterfallStepContext, DialogTurnResult
+from botbuilder.dialogs.prompts import TextPrompt, PromptOptions
+from botbuilder.core import MessageFactory, UserState
+
+from src.nlu import Intent, NLU
+from . import BookingRoomDialog
+from .utils import Emoji
+from .helpers import NLUHelper
+
+
+class MainDialog(ComponentDialog):
+
+    def __init__(self, nlu_recognizer: NLU, user_state: UserState,
+                 booking_room_dialog: BookingRoomDialog):
+
+        super(MainDialog, self).__init__(MainDialog.__name__)
+
+        # Load the NLU module
+        self._nlu_recognizer = nlu_recognizer
+
+        # Load the sub-dialogs
+        self._booking_dialog_id = booking_room_dialog.id
+
+        # Setup the waterfall dialog
+        self.add_dialog(WaterfallDialog(WaterfallDialog.__name__, [
+            self.intro_step,
+            self.act_step,
+            self.final_step
+        ]))
+
+        # Append the prompts and custom dialogs, used in the waterfall
+        self.add_dialog(TextPrompt("ActPrompt"))
+        self.add_dialog(booking_room_dialog)
+
+        self.initial_dialog_id = WaterfallDialog.__name__
+
+    @staticmethod
+    async def intro_step(step_context: WaterfallStepContext) -> DialogTurnResult:
+        """
+        Intro step. Triggered upon any interaction from the user to this bot.
+        """
+
+        # Ask what to do
+        message = (
+            str(step_context.options)
+            if step_context.options
+            else "What can I help you with today?"
+        )
+
+        # TextPromp - How can I help you ?
+        return await step_context.prompt(
+            "ActPrompt",
+            PromptOptions(
+                prompt=MessageFactory.text(message)
+            ),
+        )
+
+    async def act_step(self, step_context: WaterfallStepContext) -> DialogTurnResult:
+        """
+        Act step. Take user response and infer its intention.
+        Dispatch to the desired sub-dialog
+        """
+
+        intent, keywords = await NLUHelper.execute_nlu_query(
+            self._nlu_recognizer, step_context.result
+        )
+
+        # Run the BookingRoomDialog, passing it keywords from nlu
+        if intent == Intent.BOOK_ROOM:
+            return await step_context.begin_dialog(self._booking_dialog_id, keywords)
+
+        # If no intent was understood, return a didn't understand message
+        else:
+            didnt_understand_text = (
+                "Sorry, I didn't get that. Please try asking in a different way"
+            )
+
+            await step_context.context.send_activity(
+                MessageFactory.text(
+                    didnt_understand_text, didnt_understand_text, InputHints.ignoring_input
+                )
+            )
+
+        return await step_context.next(None)
+
+    async def final_step(self, step_context: WaterfallStepContext) -> DialogTurnResult:
+        """
+        Final step. Triggered upon sub-dialog completion. Replace the current
+        dialog by the main dialog to start a new loop of conversation.
+        """
+
+        # Replace the current dialog back to main dialog
+        return await step_context.replace_dialog(
+            self.id,
+            "What else can I do for you?"
+        )
diff --git a/src/dialogs/utils/__init__.py b/src/dialogs/utils/__init__.py
new file mode 100644
index 0000000..d43a4a5
--- /dev/null
+++ b/src/dialogs/utils/__init__.py
@@ -0,0 +1,4 @@
+
+from .emoji import Emoji
+
+__all__ = ['Emoji']
diff --git a/src/dialogs/utils/emoji.py b/src/dialogs/utils/emoji.py
new file mode 100644
index 0000000..3d1dae2
--- /dev/null
+++ b/src/dialogs/utils/emoji.py
@@ -0,0 +1,7 @@
+
+from enum import Enum
+
+
+class Emoji(Enum):
+
+    WAVING_HAND = "\U0001F44B"
diff --git a/src/nlu/__init__.py b/src/nlu/__init__.py
new file mode 100644
index 0000000..8e8fbe6
--- /dev/null
+++ b/src/nlu/__init__.py
@@ -0,0 +1,5 @@
+
+from .intent import Intent
+from .nlu import NLU
+
+__all__ = ["NLU", "Intent"]
diff --git a/src/nlu/classifying/__init__.py b/src/nlu/classifying/__init__.py
new file mode 100644
index 0000000..453d49a
--- /dev/null
+++ b/src/nlu/classifying/__init__.py
@@ -0,0 +1,4 @@
+
+from .classifier import Classifier
+
+__all__ = ["Classifier"]
diff --git a/src/nlu/classifying/classifier.py b/src/nlu/classifying/classifier.py
new file mode 100644
index 0000000..26c0278
--- /dev/null
+++ b/src/nlu/classifying/classifier.py
@@ -0,0 +1,86 @@
+
+import pickle
+
+import requests
+import torch
+from transformers import BertTokenizer, BertForSequenceClassification
+
+from src.nlu import Intent
+from config import Config
+config = Config()
+
+# Set the device to cpu
+device = torch.device("cpu")
+
+
+class Classifier:
+
+    def __init__(self):
+
+        # Load the classes and the model
+        self.labels = self._load_labels()
+        self.model = self._load_model()
+
+    @staticmethod
+    def __load_remote_file(url: str, local: str):
+
+        # Open the URL and a local file
+        with requests.get(url, stream=True) as response:
+            with open(local, 'wb') as handle:
+
+                # Stream the model to the local file
+                for chunk in response.iter_content(chunk_size=8192):
+                    handle.write(chunk)
+
+    def _load_labels(self) -> dict:
+        """
+        Load the dictionary labels from a remote pickle file and return it.
+        """
+
+        # Download and save the pickle locally
+        self.__load_remote_file(config.MODEL_CLASSES_URL, config.MODEL_CLASSES_LOCAL_COPY)
+
+        # Load and return a dictionary
+        with open(config.MODEL_CLASSES_LOCAL_COPY, 'rb') as handle:
+            return pickle.load(handle)
+
+    def _load_model(self) -> BertForSequenceClassification:
+        """
+        Load the weight of the model from a remote file (around 500 Mo),
+        instantiate and return the model.
+        """
+
+        # Download and save the weights locally
+        self.__load_remote_file(config.MODEL_WEIGHT_URL, config.MODEL_WEIGHT_LOCAL_COPY)
+
+        # Instantiate the model
+        model = BertForSequenceClassification.from_pretrained(
+            config.MODEL_CLASSIFIER,
+            num_labels=len(self.labels),
+            output_attentions=False,
+            output_hidden_states=False
+        )
+        model.to(device)
+
+        # Load and append the weights
+        model.load_state_dict(
+            torch.load(config.MODEL_WEIGHT_LOCAL_COPY, map_location=device)
+        )
+
+        return model
+
+    def predict(self, dataset: BertTokenizer) -> Intent:
+        """Make a prediction and return the class."""
+
+        # Make the prediction, get an array of probabilities
+        probabilities = self.model(
+            input_ids=dataset.input_ids,
+            token_type_ids=None,
+            attention_mask=dataset.attention_mask
+        )
+
+        # Get the predicted class index
+        _, predicted_index = torch.max(probabilities[0], dim=1)
+
+        # Return the intent
+        return Intent(self.labels[predicted_index[0].item()])
diff --git a/src/nlu/intent.py b/src/nlu/intent.py
new file mode 100644
index 0000000..adce4a1
--- /dev/null
+++ b/src/nlu/intent.py
@@ -0,0 +1,15 @@
+
+from enum import Enum
+
+
+class Intent(Enum):
+
+    # Yes/No
+    YES = "smalltalk_confirmation_yes"
+    NO = "smalltalk_confirmation_no"
+
+    # Small talks
+    GREETINGS = "smalltalk_greetings_hello"
+
+    # Hotel long talks
+    BOOK_ROOM = "longtalk_make_reservation"
diff --git a/src/nlu/matching/__init__.py b/src/nlu/matching/__init__.py
new file mode 100644
index 0000000..fa2ea14
--- /dev/null
+++ b/src/nlu/matching/__init__.py
@@ -0,0 +1,5 @@
+
+from .filter import Filter
+from .matcher import Matcher
+
+__all__ = ["Filter", "Matcher"]
diff --git a/src/nlu/matching/filter.py b/src/nlu/matching/filter.py
new file mode 100644
index 0000000..264b073
--- /dev/null
+++ b/src/nlu/matching/filter.py
@@ -0,0 +1,26 @@
+
+from typing import List
+
+
+class Filter:
+    """
+    Filter object, storing values of a filter. Used in the Matcher
+    class in a RegEx to extract keywords from a given text.
+    """
+
+    def __init__(self, name: str, words: List[str], regex: str, threshold: float = 0.95):
+
+        self.name = name
+        self.words = words
+        self.regex = self.set_regex(regex)
+        self.threshold = threshold
+
+    @staticmethod
+    def set_regex(regex: str) -> str:
+        """
+        Setter for _regex. Clean the regex string and remove double
+        backslash due to TOML file formatting.
+        """
+
+        regex.replace('\\\\', '\\')
+        return regex
diff --git a/src/nlu/matching/matcher.py b/src/nlu/matching/matcher.py
new file mode 100644
index 0000000..1f39ccc
--- /dev/null
+++ b/src/nlu/matching/matcher.py
@@ -0,0 +1,89 @@
+
+import re
+from typing import Dict, List
+
+import toml
+import pandas as pd
+from polyfuzz import PolyFuzz
+
+from . import Filter
+from config import Config
+config = Config()
+
+
+class Matcher:
+
+    def __init__(self):
+
+        # Load PolyFuzz model for matching. Default: TF-IDF
+        self.model = PolyFuzz(config.MODEL_MATCHING)
+
+        # Load the filters
+        self.filters: Dict[str, List[Filter]] = self.__load_filters()
+
+    @staticmethod
+    def __load_filters() -> dict:
+        """
+        Load the filters from filters.toml (by default), create Filter
+        objects, and return a dictionary of these object classified by
+        intent.
+        """
+        filters = {}
+
+        # Load the raw filter
+        toml_file = toml.load(config.FILTERS_TOML, _dict=dict)
+
+        # Loop over each intent
+        for intent, raw_filters in toml_file.items():
+            filter_list = []
+
+            # Loop over each filter in this intent
+            for name, content in raw_filters.items():
+
+                # Create and append a Filter object
+                filter_list.append(
+                    Filter(
+                        name=name,
+                        words=content['words'],
+                        regex=content['regex'],
+                        threshold=content['threshold']
+                    )
+                )
+
+            # Save the filters to the main dictionary
+            filters[intent] = filter_list
+
+        return filters
+
+    def get_keywords(self, text: str, intent: str) -> dict:
+
+        keywords = {}
+        if intent in self.filters:
+
+            # Split the text into a list of words
+            entries = text.split(" ")
+
+            for filter_ in self.filters[intent]:
+
+                # Math similarities between the filter and the given text
+                self.model.match(entries, filter_.words)
+                matches: pd.DataFrame = self.model.get_matches()
+
+                try:
+                    # Get the word with the maximum similarity
+                    thresholds = matches[matches['Similarity'] >= filter_.threshold]
+                    keyword = thresholds[thresholds['Similarity'] == thresholds['Similarity'].max()].iloc[0, 0]
+
+                except Exception:
+                    # If there's no match, set the filter as None
+                    keywords[filter_.name] = None
+
+                else:
+                    # Use the keyword to retrieve and save its chained-data
+                    if result := re.search(filter_.regex % keyword, text):
+                        keywords[filter_.name] = result.group(filter_.name)
+
+                    else:
+                        keywords[filter_.name] = None
+
+        return keywords
diff --git a/src/nlu/nlu.py b/src/nlu/nlu.py
new file mode 100644
index 0000000..c46a4e8
--- /dev/null
+++ b/src/nlu/nlu.py
@@ -0,0 +1,33 @@
+
+from . import Intent
+from .matching import Matcher
+from .preprocessing import Preprocessor, Tokenizer
+from .classifying import Classifier
+
+
+class NLU:
+
+    def __init__(self):
+
+        # Preprocessing
+        self.preprocessor = Preprocessor()
+        self.tokenizer = Tokenizer()
+
+        # Classifier
+        self.classifier = Classifier()
+        self.matcher = Matcher()
+
+    def get_intent(self, message: str) -> (Intent, dict):
+        """
+        Return the intention and the keywords of a given message.
+        """
+
+        # Clean the message and create a dataset of tokens
+        preprocessed_text = self.preprocessor.preprocess(message)
+        dataset = self.tokenizer.get_dataset(preprocessed_text)
+
+        # Get the intention
+        intent = self.classifier.predict(dataset)
+        keywords = self.matcher.get_keywords(preprocessed_text, intent.value)
+
+        return intent, keywords
diff --git a/src/nlu/preprocessing/__init__.py b/src/nlu/preprocessing/__init__.py
new file mode 100644
index 0000000..59813be
--- /dev/null
+++ b/src/nlu/preprocessing/__init__.py
@@ -0,0 +1,5 @@
+
+from .tokenizer import Tokenizer
+from .preprocessor import Preprocessor
+
+__all__ = ["Tokenizer", "Preprocessor"]
diff --git a/src/nlu/preprocessing/preprocessor.py b/src/nlu/preprocessing/preprocessor.py
new file mode 100644
index 0000000..593f429
--- /dev/null
+++ b/src/nlu/preprocessing/preprocessor.py
@@ -0,0 +1,81 @@
+
+from bs4 import BeautifulSoup
+import spacy
+import unidecode
+from word2number import w2n
+import contractions
+
+from config import Config
+config = Config()
+
+
+class Preprocessor:
+
+    def __init__(self):
+
+        # Load SpaCy model for preprocess. Default: en_core_web_sm
+        self.nlp = spacy.load(config.MODEL_PREPROCESS)
+
+    @staticmethod
+    def strip_html_tags(text: str) -> str:
+        """Remove html tags from the document."""
+
+        soup = BeautifulSoup(text, "html.parser")
+        return soup.get_text(separator=" ")
+
+    @staticmethod
+    def expand_contractions(text: str) -> str:
+        """Expand shortened words, e.g. 'don't' to 'do not'."""
+
+        return contractions.fix(text)
+
+    @staticmethod
+    def remove_accented_chars(text: str) -> str:
+        """Remove accented characters from text, e.g. café."""
+
+        return unidecode.unidecode(text)
+
+    @staticmethod
+    def remove_whitespace(text: str) -> str:
+        """Remove extra whitespaces from text."""
+
+        text = text.strip()
+        return " ".join(text.split())
+
+    @staticmethod
+    def limit_n_words(text: str, limit: int = 256):
+        """Limit a text to n-words. Default: 256."""
+
+        text = text.split()[:limit]
+        return " ".join(text)
+
+    def preprocess(self, text: str) -> str:
+        """Apply a preprocess pipeline to a given text."""
+
+        # Apply all preformatting
+        text = self.strip_html_tags(text)
+        text = self.expand_contractions(text)
+        text = self.remove_accented_chars(text)
+        text = self.expand_contractions(text)
+        text = self.limit_n_words(text)
+        text = text.lower()
+
+        # Tokenize the text
+        document = self.nlp(text)
+        clean_text = []
+
+        for token in document:
+
+            # Convert number words to numeric numbers
+            if token.pos_ == 'NUM':
+                clean_text.append(w2n.word_to_num(token.text))
+
+            # Convert tokens to base form
+            elif token.lemma_ != "-PRON-":
+                clean_text.append(token.lemma_)
+
+            # Append the token if no modification was applied
+            else:
+                clean_text.append(token)
+
+        return ' '.join(map(str, clean_text))
diff --git a/src/nlu/preprocessing/tokenizer.py b/src/nlu/preprocessing/tokenizer.py
new file mode 100644
index 0000000..d9b8909
--- /dev/null
+++ b/src/nlu/preprocessing/tokenizer.py
@@ -0,0 +1,17 @@
+
+from transformers import BertTokenizer
+
+from config import Config
+config = Config()
+
+
+class Tokenizer:
+
+    def __init__(self):
+        self.tokenizer = BertTokenizer.from_pretrained(config.MODEL_CLASSIFIER)
+
+    def get_dataset(self, text: str):
+        """Return a torch Dataset from a given text."""
+
+        # Tokenize the text and return it
+        return self.tokenizer(text, return_tensors="pt")