diff --git a/.github/scripts/mr_generate_summary.py b/.github/scripts/mr_generate_summary.py
index 20d7a7b46e46..e1e039afaf76 100644
--- a/.github/scripts/mr_generate_summary.py
+++ b/.github/scripts/mr_generate_summary.py
@@ -58,7 +58,13 @@ def read_results(file):
     with open(file) as json_file:
         data = json.load(json_file)
 
-        keys = ["accuracy", "weighted avg", "macro avg", "micro avg", "conversation_accuracy"]
+        keys = [
+            "accuracy",
+            "weighted avg",
+            "macro avg",
+            "micro avg",
+            "conversation_accuracy",
+        ]
         result = {key: data[key] for key in keys if key in data}
 
     return result
diff --git a/.github/scripts/mr_publish_results.py b/.github/scripts/mr_publish_results.py
index 7f0af454081c..cb301ff3e784 100644
--- a/.github/scripts/mr_publish_results.py
+++ b/.github/scripts/mr_publish_results.py
@@ -69,7 +69,13 @@ def read_results(file):
     with open(file) as json_file:
         data = json.load(json_file)
 
-        keys = ["accuracy", "weighted avg", "macro avg", "micro avg", "conversation_accuracy"]
+        keys = [
+            "accuracy",
+            "weighted avg",
+            "macro avg",
+            "micro avg",
+            "conversation_accuracy",
+        ]
         result = {key: data[key] for key in keys if key in data}
 
     return result
diff --git a/poetry.lock b/poetry.lock
index ca1fef01c4a4..247fbf2f32fc 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -3157,7 +3157,7 @@ transformers = ["transformers"]
 [metadata]
 lock-version = "1.1"
 python-versions = ">=3.6,<3.9"
-content-hash = "8319a632c0f0f25e3574887786ba9fb51277f7cc7010a02f8866933065506563"
+content-hash = "80436920af7907d20d9d32586e178c2ea66bb31ebb3c85c42a7219f77e6e8301"
 
 [metadata.files]
 absl-py = [
diff --git a/pyproject.toml b/pyproject.toml
index 103346159d6d..6fa8e1d22731 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -156,6 +156,7 @@ types-requests = "^2.25.0"
 types-setuptools = "^57.0.0"
 memory-profiler = "^0.58.0"
 psutil = "^5.8.0"
+mypy-extensions = "^0.4.3"
 
 [tool.poetry.extras]
 spacy = [ "spacy",]
diff --git a/rasa/cli/train.py b/rasa/cli/train.py
index a85dbed7b4d0..51b8ad9d4f07 100644
--- a/rasa/cli/train.py
+++ b/rasa/cli/train.py
@@ -165,6 +165,7 @@ def run_core_training(
         rasa.utils.common.run_in_loop(
             do_compare_training(args, story_file, additional_arguments)
         )
+        return None
 
 
 def run_nlu_training(
diff --git a/rasa/cli/x.py b/rasa/cli/x.py
index 4dad62163880..60fdb139ecf2 100644
--- a/rasa/cli/x.py
+++ b/rasa/cli/x.py
@@ -2,7 +2,8 @@
 import asyncio
 import importlib.util
 import logging
-from multiprocessing import get_context, Process
+from multiprocessing.process import BaseProcess
+from multiprocessing import get_context
 import os
 import signal
 import sys
@@ -198,7 +199,7 @@ def _is_correct_event_broker(event_broker: EndpointConfig) -> bool:
 
 def start_rasa_for_local_rasa_x(
     args: argparse.Namespace, rasa_x_token: Text
-) -> Process:
+) -> BaseProcess:
     """Starts the Rasa X API with Rasa as a background process."""
     credentials_path, endpoints_path = _get_credentials_and_endpoints_paths(args)
     endpoints = AvailableEndpoints.read_endpoints(endpoints_path)
diff --git a/rasa/core/actions/action.py b/rasa/core/actions/action.py
index 03c8344f0b7c..28eb8568eb6a 100644
--- a/rasa/core/actions/action.py
+++ b/rasa/core/actions/action.py
@@ -510,7 +510,7 @@ async def run(
         domain: "Domain",
     ) -> List[Event]:
         """Runs action. Please see parent class for the full docstring."""
-        _events = [SessionStarted(metadata=self.metadata)]
+        _events: List[Event] = [SessionStarted(metadata=self.metadata)]
 
         if domain.session_config.carry_over_slots:
             _events.extend(self._slot_set_events_from_tracker(tracker))
@@ -690,7 +690,7 @@ async def run(
 
             events_json = response.get("events", [])
             responses = response.get("responses", [])
-            bot_messages = await self._utter_responses(
+            bot_messages: List[Event] = await self._utter_responses(
                 responses, output_channel, nlg, tracker
             )
 
diff --git a/rasa/core/actions/forms.py b/rasa/core/actions/forms.py
index 4adcca6a2520..b1091fb507ba 100644
--- a/rasa/core/actions/forms.py
+++ b/rasa/core/actions/forms.py
@@ -425,7 +425,7 @@ async def validate_slots(
         domain: Domain,
         output_channel: OutputChannel,
         nlg: NaturalLanguageGenerator,
-    ) -> List[Event]:
+    ) -> List[Union[SlotSet, Event]]:
         """Validate the extracted slots.
 
         If a custom action is available for validating the slots, we call it to validate
@@ -445,7 +445,7 @@ async def validate_slots(
             for the validated slots.
         """
         logger.debug(f"Validating extracted slots: {slot_candidates}")
-        events = [
+        events: List[Union[SlotSet, Event]] = [
             SlotSet(slot_name, value) for slot_name, value in slot_candidates.items()
         ]
 
@@ -506,7 +506,7 @@ async def validate(
         domain: Domain,
         output_channel: OutputChannel,
         nlg: NaturalLanguageGenerator,
-    ) -> List[Event]:
+    ) -> List[Union[SlotSet, Event]]:
         """Extract and validate value of requested slot.
 
         If nothing was extracted reject execution of the form action.
@@ -560,9 +560,9 @@ async def request_next_slot(
         output_channel: OutputChannel,
         nlg: NaturalLanguageGenerator,
         events_so_far: List[Event],
-    ) -> List[Event]:
+    ) -> List[Union[SlotSet, Event]]:
         """Request the next slot and response if needed, else return `None`."""
-        request_slot_events = []
+        request_slot_events: List[Union[SlotSet, Event]] = []
 
         if await self.is_done(output_channel, nlg, tracker, domain, events_so_far):
             # The custom action for slot validation decided to stop the form early
diff --git a/rasa/core/actions/two_stage_fallback.py b/rasa/core/actions/two_stage_fallback.py
index feb845395cec..0d4251c696bf 100644
--- a/rasa/core/actions/two_stage_fallback.py
+++ b/rasa/core/actions/two_stage_fallback.py
@@ -104,7 +104,8 @@ async def deactivate(
             return await self._give_up(output_channel, nlg, tracker, domain)
 
         # revert fallback events
-        return [UserUtteranceReverted()] + _message_clarification(tracker)
+        reverted_event: List[Event] = [UserUtteranceReverted()]
+        return reverted_event + _message_clarification(tracker)
 
     async def _give_up(
         self,
@@ -137,7 +138,7 @@ def _two_fallbacks_in_a_row(tracker: DialogueStateTracker) -> bool:
 
 def _last_n_intent_names(
     tracker: DialogueStateTracker, number_of_last_intent_names: int
-) -> List[Text]:
+) -> List[Optional[Text]]:
     intent_names = []
     for i in range(number_of_last_intent_names):
         message = tracker.get_last_event_for(
diff --git a/rasa/core/agent.py b/rasa/core/agent.py
index 70d9fedf58a8..ab5f0b81adb7 100644
--- a/rasa/core/agent.py
+++ b/rasa/core/agent.py
@@ -4,7 +4,16 @@
 import shutil
 import tempfile
 from pathlib import Path
-from typing import Any, Callable, Dict, List, Optional, Text, Tuple, Union
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    List,
+    Optional,
+    Text,
+    Tuple,
+    Union,
+)
 import uuid
 
 import aiohttp
@@ -52,6 +61,8 @@
 from rasa.utils.endpoints import EndpointConfig
 import rasa.utils.io
 
+from rasa.shared.core.generator import TrackerWithCachedStates
+
 logger = logging.getLogger(__name__)
 
 
@@ -679,7 +690,7 @@ def _are_all_featurizers_using_a_max_history(self) -> bool:
         """Check if all featurizers are MaxHistoryTrackerFeaturizer."""
 
         def has_max_history_featurizer(policy: Policy) -> bool:
-            return (
+            return bool(
                 policy.featurizer
                 and hasattr(policy.featurizer, "max_history")
                 and policy.featurizer.max_history is not None
@@ -700,9 +711,8 @@ async def load_data(
         use_story_concatenation: bool = True,
         debug_plots: bool = False,
         exclusion_percentage: Optional[int] = None,
-    ) -> List[DialogueStateTracker]:
+    ) -> List["TrackerWithCachedStates"]:
         """Load training data from a resource."""
-
         max_history = self._max_history()
 
         if unique_last_num_states is None:
@@ -769,7 +779,6 @@ def _clear_model_directory(model_path: Text) -> None:
         Only removes files if the directory seems to contain a previously
         persisted model. Otherwise does nothing to avoid deleting
         `/` by accident."""
-
         if not os.path.exists(model_path):
             return
 
diff --git a/rasa/core/channels/botframework.py b/rasa/core/channels/botframework.py
index 43378645883c..fed9e28c4cba 100644
--- a/rasa/core/channels/botframework.py
+++ b/rasa/core/channels/botframework.py
@@ -75,6 +75,7 @@ async def _get_headers(self) -> Optional[Dict[Text, Any]]:
                 return BotFramework.headers
             else:
                 logger.error("Could not get BotFramework token")
+                return None
         else:
             return BotFramework.headers
 
diff --git a/rasa/core/channels/channel.py b/rasa/core/channels/channel.py
index fd8cc65ca054..d002fc69e5b1 100644
--- a/rasa/core/channels/channel.py
+++ b/rasa/core/channels/channel.py
@@ -200,6 +200,8 @@ def decode_bearer_token(
     except Exception:
         logger.exception("Failed to decode bearer token.")
 
+    return None
+
 
 class OutputChannel:
     """Output channel base class.
diff --git a/rasa/core/channels/console.py b/rasa/core/channels/console.py
index 42eec83dfe2d..86a89630dbf8 100644
--- a/rasa/core/channels/console.py
+++ b/rasa/core/channels/console.py
@@ -47,9 +47,10 @@ def print_buttons(
             rasa.shared.utils.cli.print_color(
                 cli_utils.button_to_string(button, idx), color=color
             )
+        return None
 
 
-def print_bot_output(
+def _print_bot_output(
     message: Dict[Text, Any],
     is_latest_message: bool = False,
     color: Text = rasa.shared.utils.io.bcolors.OKBLUE,
@@ -90,17 +91,19 @@ def print_bot_output(
             json.dumps(message.get("custom"), indent=2), color=color
         )
 
+    return None
 
-def get_user_input(previous_response: Optional[Dict[str, Any]]) -> Optional[Text]:
+
+def _get_user_input(previous_response: Optional[Dict[str, Any]]) -> Optional[Text]:
     button_response = None
     if previous_response is not None:
-        button_response = print_bot_output(previous_response, is_latest_message=True)
+        button_response = _print_bot_output(previous_response, is_latest_message=True)
 
     if button_response is not None:
         response = cli_utils.payload_from_button_question(button_response)
         if response == cli_utils.FREE_TEXT_INPUT_PROMPT:
             # Re-prompt user with a free text input
-            response = get_user_input({})
+            response = _get_user_input({})
     else:
         response = questionary.text(
             "",
@@ -169,7 +172,7 @@ async def record_messages(
     previous_response = None
     await asyncio.sleep(0.5)  # Wait for server to start
     while not utils.is_limit_reached(num_messages, max_message_limit):
-        text = get_user_input(previous_response)
+        text = _get_user_input(previous_response)
 
         if text == exit_text or text is None:
             break
@@ -181,7 +184,7 @@ async def record_messages(
             previous_response = None
             async for response in bot_responses:
                 if previous_response is not None:
-                    print_bot_output(previous_response)
+                    _print_bot_output(previous_response)
                 previous_response = response
         else:
             bot_responses = await send_message_receive_block(
@@ -190,7 +193,7 @@ async def record_messages(
             previous_response = None
             for response in bot_responses:
                 if previous_response is not None:
-                    print_bot_output(previous_response)
+                    _print_bot_output(previous_response)
                 previous_response = response
 
         num_messages += 1
diff --git a/rasa/core/channels/hangouts.py b/rasa/core/channels/hangouts.py
index c65273c856b8..339f87a8eb49 100644
--- a/rasa/core/channels/hangouts.py
+++ b/rasa/core/channels/hangouts.py
@@ -63,7 +63,7 @@ def _text_button_card(text: Text, buttons: List) -> Union[Dict, None]:
                 logger.error(
                     "Buttons must be a list of dicts with 'title' and 'payload' as keys"
                 )
-                return
+                return None
 
             hangouts_buttons.append(
                 {
@@ -246,10 +246,11 @@ def _extract_message(self, req: Request) -> Text:
 
     @staticmethod
     def _extract_room(req: Request) -> Union[Text, None]:
-
         if req.json["space"]["type"] == "ROOM":
             return req.json["space"]["displayName"]
 
+        return None
+
     def _extract_input_channel(self) -> Text:
         return self.name()
 
diff --git a/rasa/core/channels/slack.py b/rasa/core/channels/slack.py
index 0b01c49050da..0f2c99bf656f 100644
--- a/rasa/core/channels/slack.py
+++ b/rasa/core/channels/slack.py
@@ -308,7 +308,6 @@ def _is_interactive_message(payload: Dict) -> bool:
     @staticmethod
     def _get_interactive_response(action: Dict) -> Optional[Text]:
         """Parse the payload for the response value."""
-
         if action["type"] == "button":
             return action.get("value")
         elif action["type"] == "select":
@@ -328,6 +327,8 @@ def _get_interactive_response(action: Dict) -> Optional[Text]:
         elif action["type"] == "datepicker":
             return action.get("selected_date")
 
+        return None
+
     async def process_message(
         self,
         request: Request,
diff --git a/rasa/core/channels/socketio.py b/rasa/core/channels/socketio.py
index 0a65d2acebbc..52fdf8c0289f 100644
--- a/rasa/core/channels/socketio.py
+++ b/rasa/core/channels/socketio.py
@@ -170,12 +170,13 @@ def get_output_channel(self) -> Optional["OutputChannel"]:
                 "Please use a different channel for external events in these "
                 "scenarios."
             )
-            return
+            return None
         return SocketIOOutput(self.sio, self.bot_message_evt)
 
     def blueprint(
         self, on_new_message: Callable[[UserMessage], Awaitable[Any]]
     ) -> Blueprint:
+        """Defines a Sanic blueprint."""
         # Workaround so that socketio works with requests from other origins.
         # https://github.com/miguelgrinberg/python-socketio/issues/205#issuecomment-493769183
         sio = AsyncServer(async_mode="sanic", cors_allowed_origins=[])
@@ -191,9 +192,7 @@ async def health(_: Request) -> HTTPResponse:
             return response.json({"status": "ok"})
 
         @sio.on("connect", namespace=self.namespace)
-        async def connect(
-            sid: Text, environ: Dict, auth: Optional[Dict]
-        ) -> Optional[bool]:
+        async def connect(sid: Text, environ: Dict, auth: Optional[Dict]) -> bool:
             if self.jwt_key:
                 jwt_payload = None
                 if auth and auth.get("token"):
@@ -203,10 +202,12 @@ async def connect(
 
                 if jwt_payload:
                     logger.debug(f"User {sid} connected to socketIO endpoint.")
+                    return True
                 else:
                     return False
             else:
                 logger.debug(f"User {sid} connected to socketIO endpoint.")
+                return True
 
         @sio.on("disconnect", namespace=self.namespace)
         async def disconnect(sid: Text) -> None:
diff --git a/rasa/core/featurizers/tracker_featurizers.py b/rasa/core/featurizers/tracker_featurizers.py
index 985ae879684f..49108c84ca03 100644
--- a/rasa/core/featurizers/tracker_featurizers.py
+++ b/rasa/core/featurizers/tracker_featurizers.py
@@ -560,7 +560,9 @@ def training_states_labels_and_entities(
         domain: Domain,
         omit_unset_slots: bool = False,
         ignore_action_unlikely_intent: bool = False,
-    ) -> Tuple[List[List[State]], List[List[Text]], List[List[Dict[Text, Any]]]]:
+    ) -> Tuple[
+        List[List[State]], List[List[Optional[Text]]], List[List[Dict[Text, Any]]]
+    ]:
         """Transforms trackers to states, action labels, and entity data.
 
         Args:
diff --git a/rasa/core/lock_store.py b/rasa/core/lock_store.py
index 30a2453f8a4c..ac78da3df30f 100644
--- a/rasa/core/lock_store.py
+++ b/rasa/core/lock_store.py
@@ -259,7 +259,10 @@ def get_lock(self, conversation_id: Text) -> Optional[TicketLock]:
         if serialised_lock:
             return TicketLock.from_dict(json.loads(serialised_lock))
 
+        return None
+
     def delete_lock(self, conversation_id: Text) -> None:
+        """Deletes lock for conversation ID."""
         deletion_successful = self.red.delete(self.key_prefix + conversation_id)
         self._log_deletion(conversation_id, deletion_successful)
 
diff --git a/rasa/core/nlg/callback.py b/rasa/core/nlg/callback.py
index 721e8d5bc30f..895c56043fef 100644
--- a/rasa/core/nlg/callback.py
+++ b/rasa/core/nlg/callback.py
@@ -77,7 +77,7 @@ async def generate(
             method="post", json=body, timeout=DEFAULT_REQUEST_TIMEOUT
         )
 
-        if self.validate_response(response):
+        if isinstance(response, dict) and self.validate_response(response):
             return response
         else:
             raise RasaException("NLG web endpoint returned an invalid response.")
diff --git a/rasa/core/policies/form_policy.py b/rasa/core/policies/form_policy.py
index 57c9812657e5..817f4500f197 100644
--- a/rasa/core/policies/form_policy.py
+++ b/rasa/core/policies/form_policy.py
@@ -1,5 +1,5 @@
 import logging
-from typing import List, Dict, Text, Optional, Any, Union, Tuple
+from typing import List, Dict, Text, Optional, Any
 
 import rasa.shared.utils.common
 import rasa.shared.utils.io
@@ -12,7 +12,7 @@
     ACTIVE_LOOP,
     LOOP_REJECTED,
 )
-from rasa.shared.core.domain import State, Domain
+from rasa.shared.core.domain import State, Domain, SubStateValue
 from rasa.shared.core.events import LoopInterrupted
 from rasa.core.featurizers.tracker_featurizers import TrackerFeaturizer
 from rasa.shared.nlu.interpreter import NaturalLanguageInterpreter
@@ -55,9 +55,7 @@ def __init__(
         )
 
     @staticmethod
-    def _get_active_form_name(
-        state: State,
-    ) -> Optional[Union[Text, Tuple[Union[float, Text]]]]:
+    def _get_active_form_name(state: State,) -> Optional[SubStateValue]:
         return state.get(ACTIVE_LOOP, {}).get(LOOP_NAME)
 
     @staticmethod
@@ -87,8 +85,8 @@ def _create_lookup_from_states(
         self,
         trackers_as_states: List[List[State]],
         trackers_as_actions: List[List[Text]],
-    ) -> Dict[Text, Text]:
-        """Add states to lookup dict"""
+    ) -> Dict[Text, SubStateValue]:
+        """Add states to lookup dict."""
         lookup = {}
         for states in trackers_as_states:
             active_form = self._get_active_form_name(states[-1])
diff --git a/rasa/core/policies/memoization.py b/rasa/core/policies/memoization.py
index 197765784321..0e4b1703ce59 100644
--- a/rasa/core/policies/memoization.py
+++ b/rasa/core/policies/memoization.py
@@ -308,12 +308,12 @@ def _back_to_the_future(
         # use first action, if we went first time and second action, if we went again
         idx_to_use = idx_of_second_action if again else idx_of_first_action
         if idx_to_use is None:
-            return
+            return None
 
         # make second ActionExecuted the first one
         events = tracker.applied_events()[idx_to_use:]
         if not events:
-            return
+            return None
 
         mcfly_tracker = tracker.init_copy()
         for e in events:
diff --git a/rasa/core/policies/rule_policy.py b/rasa/core/policies/rule_policy.py
index 815bcd2f58bc..b0f63a4df8a3 100644
--- a/rasa/core/policies/rule_policy.py
+++ b/rasa/core/policies/rule_policy.py
@@ -193,7 +193,7 @@ def _create_feature_key(self, states: List[State]) -> Optional[Text]:
             new_states.insert(0, state)
 
         if not new_states:
-            return
+            return None
 
         # we sort keys to make sure that the same states
         # represented as dictionaries have the same json strings
@@ -421,7 +421,7 @@ def _get_slots_loops_from_states(
         for states in trackers_as_states:
             for state in states:
                 slots.update(set(state.get(SLOTS, {}).keys()))
-                active_loop = state.get(ACTIVE_LOOP, {}).get(LOOP_NAME)
+                active_loop: Optional[Text] = state.get(ACTIVE_LOOP, {}).get(LOOP_NAME)
                 if active_loop:
                     loops.add(active_loop)
         return slots, loops
@@ -592,7 +592,7 @@ def _run_prediction_on_trackers(
         trackers: List[TrackerWithCachedStates],
         domain: Domain,
         collect_sources: bool,
-    ) -> Tuple[List[Text], Set[Text]]:
+    ) -> Tuple[List[Text], Set[Optional[Text]]]:
         if collect_sources:
             self._rules_sources = defaultdict(list)
 
@@ -665,7 +665,7 @@ def _collect_rule_sources(
 
     def _find_contradicting_and_used_in_stories_rules(
         self, trackers: List[TrackerWithCachedStates], domain: Domain
-    ) -> Tuple[List[Text], Set[Text]]:
+    ) -> Tuple[List[Text], Set[Optional[Text]]]:
         return self._run_prediction_on_trackers(trackers, domain, collect_sources=False)
 
     def _analyze_rules(
@@ -1080,7 +1080,7 @@ def predict_action_probabilities(
 
     def _predict(
         self, tracker: DialogueStateTracker, domain: Domain
-    ) -> Tuple[PolicyPrediction, Text]:
+    ) -> Tuple[PolicyPrediction, Optional[Text]]:
         (
             rules_action_name_from_text,
             prediction_source_from_text,
diff --git a/rasa/core/policies/sklearn_policy.py b/rasa/core/policies/sklearn_policy.py
index 01736258de20..cf110177e441 100644
--- a/rasa/core/policies/sklearn_policy.py
+++ b/rasa/core/policies/sklearn_policy.py
@@ -347,7 +347,7 @@ def load(
                 f"Failed to load dialogue model. Path {filename.absolute()} "
                 f"doesn't exist."
             )
-            return
+            return cls()
 
         featurizer = TrackerFeaturizer.load(path)
         assert isinstance(featurizer, MaxHistoryTrackerFeaturizer), (
diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py
index 5c824db14e49..e596e02c11d6 100644
--- a/rasa/core/policies/ted_policy.py
+++ b/rasa/core/policies/ted_policy.py
@@ -462,7 +462,7 @@ def _create_data_for_entities(
         self, entity_tags: Optional[List[List[Dict[Text, List["Features"]]]]]
     ) -> Optional[Data]:
         if not self.config[ENTITY_RECOGNITION]:
-            return
+            return None
 
         # check that there are real entity tags
         if entity_tags and self._should_extract_entities(entity_tags):
@@ -476,6 +476,8 @@ def _create_data_for_entities(
         )
         self.config[ENTITY_RECOGNITION] = False
 
+        return None
+
     def _create_model_data(
         self,
         tracker_state_features: List[List[Dict[Text, List["Features"]]]],
@@ -821,11 +823,11 @@ def _create_optional_event_for_entities(
             # entities belong only to the last user message
             # and only if user text was used for prediction,
             # a user message always comes after action listen
-            return
+            return None
 
         if not self.config[ENTITY_RECOGNITION]:
             # entity recognition is not turned on, no entities can be predicted
-            return
+            return None
 
         # The batch dimension of entity prediction is not the same as batch size,
         # rather it is the number of last (if max history featurizer else all)
@@ -841,7 +843,7 @@ def _create_optional_event_for_entities(
 
         if ENTITY_ATTRIBUTE_TYPE not in predicted_tags:
             # no entities detected
-            return
+            return None
 
         # entities belong to the last message of the tracker
         # convert the predicted tags to actual entities
diff --git a/rasa/core/test.py b/rasa/core/test.py
index b7b292a2128c..0981c9fd27a3 100644
--- a/rasa/core/test.py
+++ b/rasa/core/test.py
@@ -280,14 +280,14 @@ def serialise(self) -> Tuple[PredictionList, PredictionList]:
                 filter(
                     lambda x: x.get(ENTITY_ATTRIBUTE_TEXT) == text, self.entity_targets
                 ),
-                key=lambda x: x.get(ENTITY_ATTRIBUTE_START),
+                key=lambda x: x[ENTITY_ATTRIBUTE_START],
             )
             entity_predictions = sorted(
                 filter(
                     lambda x: x.get(ENTITY_ATTRIBUTE_TEXT) == text,
                     self.entity_predictions,
                 ),
-                key=lambda x: x.get(ENTITY_ATTRIBUTE_START),
+                key=lambda x: x[ENTITY_ATTRIBUTE_START],
             )
 
             i_pred, i_target = 0, 0
@@ -447,7 +447,7 @@ def _clean_entity_results(
     cleaned_entities = []
 
     for r in tuple(entity_results):
-        cleaned_entity = {ENTITY_ATTRIBUTE_TEXT: text}
+        cleaned_entity: EntityPrediction = {ENTITY_ATTRIBUTE_TEXT: text}
         for k in (
             ENTITY_ATTRIBUTE_START,
             ENTITY_ATTRIBUTE_END,
@@ -596,13 +596,14 @@ def _get_e2e_entity_evaluation_result(
                 return EntityEvaluationResult(
                     entity_targets, entities_predicted_by_policies, tokens, text
                 )
+    return None
 
 
 def _run_action_prediction(
     processor: "MessageProcessor",
     partial_tracker: DialogueStateTracker,
     expected_action: Text,
-) -> Tuple[Text, PolicyPrediction, EntityEvaluationResult]:
+) -> Tuple[Text, PolicyPrediction, Optional[EntityEvaluationResult]]:
     action, prediction = processor.predict_next_action(partial_tracker)
     predicted_action = action.name()
 
diff --git a/rasa/core/training/interactive.py b/rasa/core/training/interactive.py
index bfe0480f1a7d..a55a3fecf20e 100644
--- a/rasa/core/training/interactive.py
+++ b/rasa/core/training/interactive.py
@@ -15,6 +15,7 @@
     Tuple,
     Union,
     Set,
+    cast,
 )
 
 from sanic import Sanic, response
@@ -90,6 +91,8 @@
 # still works.
 import rasa.utils.io as io_utils
 
+from rasa.shared.core.generator import TrackerWithCachedStates
+
 logger = logging.getLogger(__name__)
 
 PATHS = {
@@ -148,9 +151,8 @@ async def send_message(
     conversation_id: Text,
     message: Text,
     parse_data: Optional[Dict[Text, Any]] = None,
-) -> Dict[Text, Any]:
+) -> Optional[Any]:
     """Send a user message to a conversation."""
-
     payload = {
         "sender": UserUttered.type_name,
         "text": message,
@@ -166,25 +168,22 @@ async def send_message(
 
 async def request_prediction(
     endpoint: EndpointConfig, conversation_id: Text
-) -> Dict[Text, Any]:
+) -> Optional[Any]:
     """Request the next action prediction from core."""
-
     return await endpoint.request(
         method="post", subpath=f"/conversations/{conversation_id}/predict"
     )
 
 
-async def retrieve_domain(endpoint: EndpointConfig) -> Dict[Text, Any]:
+async def retrieve_domain(endpoint: EndpointConfig) -> Optional[Any]:
     """Retrieve the domain from core."""
-
     return await endpoint.request(
         method="get", subpath="/domain", headers={"Accept": "application/json"}
     )
 
 
-async def retrieve_status(endpoint: EndpointConfig) -> Dict[Text, Any]:
+async def retrieve_status(endpoint: EndpointConfig) -> Optional[Any]:
     """Retrieve the status from core."""
-
     return await endpoint.request(method="get", subpath="/status")
 
 
@@ -194,12 +193,15 @@ async def retrieve_tracker(
     verbosity: EventVerbosity = EventVerbosity.ALL,
 ) -> Dict[Text, Any]:
     """Retrieve a tracker from core."""
-
     path = f"/conversations/{conversation_id}/tracker?include_events={verbosity.name}"
-    return await endpoint.request(
+    result = await endpoint.request(
         method="get", subpath=path, headers={"Accept": "application/json"}
     )
 
+    # If the request wasn't successful the previous call had already raised. Hence,
+    # we can be sure we have the tracker in the right format.
+    return cast(Dict[Text, Any], result)
+
 
 async def send_action(
     endpoint: EndpointConfig,
@@ -208,9 +210,8 @@ async def send_action(
     policy: Optional[Text] = None,
     confidence: Optional[float] = None,
     is_new_action: bool = False,
-) -> Dict[Text, Any]:
+) -> Optional[Any]:
     """Log an action to a conversation."""
-
     payload = ActionExecuted(action_name, policy, confidence).as_dict()
 
     subpath = f"/conversations/{conversation_id}/execute"
@@ -252,9 +253,8 @@ async def send_event(
     endpoint: EndpointConfig,
     conversation_id: Text,
     evt: Union[List[Dict[Text, Any]], Dict[Text, Any]],
-) -> Dict[Text, Any]:
+) -> Optional[Any]:
     """Log an event to a conversation."""
-
     subpath = f"/conversations/{conversation_id}/tracker/events"
 
     return await endpoint.request(json=evt, method="post", subpath=subpath)
@@ -262,7 +262,6 @@ async def send_event(
 
 def format_bot_output(message: BotUttered) -> Text:
     """Format a bot response to be displayed in the history table."""
-
     # First, add text to output
     output = message.text or ""
 
@@ -301,7 +300,6 @@ def format_bot_output(message: BotUttered) -> Text:
 
 def latest_user_message(events: List[Dict[Text, Any]]) -> Optional[Dict[Text, Any]]:
     """Return most recent user message."""
-
     for i, e in enumerate(reversed(events)):
         if e.get("event") == UserUttered.type_name:
             return e
@@ -374,7 +372,6 @@ async def _request_free_text_action(
 async def _request_free_text_utterance(
     conversation_id: Text, endpoint: EndpointConfig, action: Text
 ) -> Text:
-
     question = questionary.text(
         message=(f"Please type the message for your new bot response '{action}':"),
         validate=io_utils.not_empty_validator("Please enter a response"),
@@ -650,16 +647,16 @@ async def _ask_if_quit(conversation_id: Text, endpoint: EndpointConfig) -> bool:
         # this is also the default answer if the user presses Ctrl-C
         await _write_data_to_file(conversation_id, endpoint)
         raise Abort()
-    elif answer == "continue":
-        # in this case we will just return, and the original
-        # question will get asked again
-        return True
     elif answer == "undo":
         raise UndoLastStep()
     elif answer == "fork":
         raise ForkTracker()
     elif answer == "restart":
         raise RestartConversation()
+    else:  # `continue` or no answer
+        # in this case we will just return, and the original
+        # question will get asked again
+        return True
 
 
 async def _request_action_from_user(
@@ -1584,7 +1581,7 @@ async def _get_tracker_events_to_plot(
 
 async def _get_training_trackers(
     file_importer: TrainingDataImporter, domain: Dict[str, Any]
-) -> List[DialogueStateTracker]:
+) -> List[TrackerWithCachedStates]:
     from rasa.core import training
 
     return await training.load_data(
diff --git a/rasa/core/utils.py b/rasa/core/utils.py
index 2156b5ec53c1..3b94f849a20c 100644
--- a/rasa/core/utils.py
+++ b/rasa/core/utils.py
@@ -88,10 +88,8 @@ def dump_obj_as_yaml_to_file(
     )
 
 
-def list_routes(app: Sanic) -> Text:
-    """List all the routes of a sanic application.
-
-    Mainly used for debugging."""
+def list_routes(app: Sanic) -> Dict[Text, Text]:
+    """List all the routes of a sanic application. Mainly used for debugging."""
     from urllib.parse import unquote
 
     output = {}
diff --git a/rasa/model.py b/rasa/model.py
index 78be4034060c..456dca7da184 100644
--- a/rasa/model.py
+++ b/rasa/model.py
@@ -8,7 +8,16 @@
 import tempfile
 import typing
 from pathlib import Path
-from typing import Any, Text, Tuple, Union, Optional, List, Dict, NamedTuple
+from typing import (
+    Any,
+    Text,
+    Tuple,
+    Union,
+    Optional,
+    List,
+    Dict,
+    NamedTuple,
+)
 
 from packaging import version
 
@@ -621,7 +630,7 @@ async def update_model_with_new_domain(
 
 def get_model_for_finetuning(
     previous_model_file: Optional[Union[Path, Text]]
-) -> Optional[Text]:
+) -> Optional[Union[Path, Text]]:
     """Gets validated path for model to finetune.
 
     Args:
diff --git a/rasa/model_training.py b/rasa/model_training.py
index 39b7538e779b..3acabed1089f 100644
--- a/rasa/model_training.py
+++ b/rasa/model_training.py
@@ -482,7 +482,7 @@ async def train_core_async(
             "No stories given. Please provide stories in order to "
             "train a Rasa Core model using the '--stories' argument."
         )
-        return
+        return None
 
     return await _train_core_with_validated_data(
         file_importer,
@@ -570,7 +570,7 @@ async def _train_core_with_validated_data(
                 model_prefix="core-",
             )
 
-        return _train_path
+    return _train_path
 
 
 async def _core_model_for_finetuning(
@@ -677,7 +677,7 @@ async def train_nlu_async(
             "No NLU data given. Please provide NLU data in order to train "
             "a Rasa NLU model using the '--nlu' argument."
         )
-        return
+        return None
 
     # training NLU only hence the training files still have to be selected
     file_importer = TrainingDataImporter.load_nlu_importer_from_config(
@@ -691,7 +691,7 @@ async def train_nlu_async(
             f"Please verify the data format. "
             f"The NLU model training will be skipped now."
         )
-        return
+        return None
 
     return await _train_nlu_with_validated_data(
         file_importer,
@@ -780,7 +780,7 @@ async def _train_nlu_with_validated_data(
                 model_prefix="nlu-",
             )
 
-        return _train_path
+    return _train_path
 
 
 async def _nlu_model_for_finetuning(
@@ -812,11 +812,11 @@ async def _nlu_model_for_finetuning(
             )
 
         config = await file_importer.get_config()
-        model_to_finetune = Interpreter.load(
+        loaded_model_to_finetune = Interpreter.load(
             old_nlu,
             new_config=config,
             finetuning_epoch_fraction=finetuning_epoch_fraction,
         )
-        if not model_to_finetune:
+        if not loaded_model_to_finetune:
             return None
-    return model_to_finetune
+    return loaded_model_to_finetune
diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py
index 0ce0a86552ca..ffbf38bffbcc 100644
--- a/rasa/nlu/classifiers/diet_classifier.py
+++ b/rasa/nlu/classifiers/diet_classifier.py
@@ -8,7 +8,7 @@
 import scipy.sparse
 import tensorflow as tf
 
-from typing import Any, Dict, List, Optional, Text, Tuple, Union, Type
+from typing import Any, Dict, List, Optional, Text, Tuple, TypeVar, Union, Type
 
 import rasa.shared.utils.io
 import rasa.utils.io as io_utils
@@ -1046,16 +1046,18 @@ def persist(self, file_name: Text, model_dir: Text) -> Dict[Text, Any]:
 
         return {"file": file_name}
 
+    T = TypeVar("T")
+
     @classmethod
     def load(
-        cls,
+        cls: T,
         meta: Dict[Text, Any],
         model_dir: Text,
         model_metadata: Metadata = None,
         cached_component: Optional["DIETClassifier"] = None,
         should_finetune: bool = False,
         **kwargs: Any,
-    ) -> "DIETClassifier":
+    ) -> T:
         """Loads the trained model from the provided directory."""
         if not meta.get("file"):
             logger.debug(
diff --git a/rasa/nlu/components.py b/rasa/nlu/components.py
index 89e37394119e..79efb2136f3f 100644
--- a/rasa/nlu/components.py
+++ b/rasa/nlu/components.py
@@ -477,7 +477,8 @@ def name(self) -> Text:
         will be a proper pipeline definition where `ComponentA`
         is the name of the first component of the pipeline.
         """
-        return type(self).name
+        # cast due to https://github.com/python/mypy/issues/7945
+        return typing.cast(str, type(self).name)
 
     @property
     def unique_name(self) -> Text:
@@ -822,9 +823,6 @@ def can_handle_language(cls, language: Hashable) -> bool:
             return language not in not_supported_language_list
 
 
-C = typing.TypeVar("C", bound=Component)
-
-
 class ComponentBuilder:
     """Creates trainers and interpreters based on configurations.
 
@@ -875,7 +873,7 @@ def load_component(
         model_dir: Text,
         model_metadata: "Metadata",
         **context: Any,
-    ) -> Component:
+    ) -> Optional[Component]:
         """Loads a component.
 
         Tries to retrieve a component from the cache, else calls
@@ -892,7 +890,6 @@ def load_component(
         Returns:
             The loaded component.
         """
-
         from rasa.nlu import registry
 
         try:
@@ -928,7 +925,6 @@ def create_component(
         Returns:
             The created component.
         """
-
         from rasa.nlu import registry
         from rasa.nlu.model import Metadata
 
@@ -945,12 +941,3 @@ def create_component(
                 f"Failed to create component '{component_config['name']}'. "
                 f"Error: {e}"
             )
-
-    def create_component_from_class(self, component_class: Type[C], **cfg: Any) -> C:
-        """Create a component based on a class and a configuration.
-
-        Mainly used to make use of caching when instantiating component classes."""
-
-        component_config = {"name": component_class.name}
-
-        return self.create_component(component_config, RasaNLUModelConfig(cfg))
diff --git a/rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py b/rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py
index b100e926fb87..d09d040903fd 100644
--- a/rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py
+++ b/rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py
@@ -3,7 +3,16 @@
 from pathlib import Path
 
 import numpy as np
-from typing import Any, Dict, Optional, Text, List, Type, Union, Callable
+from typing import (
+    Any,
+    Dict,
+    Optional,
+    Text,
+    List,
+    Type,
+    Union,
+    Callable,
+)
 
 from rasa.nlu.tokenizers.spacy_tokenizer import POS_TAG_KEY
 from rasa.shared.constants import DOCS_URL_COMPONENTS
@@ -145,9 +154,7 @@ def _map_features_to_indices(
         return feature_to_idx_dict
 
     @staticmethod
-    def _build_feature_vocabulary(
-        features: List[List[Dict[Text, Any]]]
-    ) -> Dict[Text, List[Text]]:
+    def _build_feature_vocabulary(features: List[List[Dict[Text, Any]]]) -> OrderedDict:
         feature_vocabulary = defaultdict(set)
 
         for sentence_features in features:
@@ -156,13 +163,12 @@ def _build_feature_vocabulary(
                     feature_vocabulary[feature_name].add(feature_value)
 
         # sort items to ensure same order every time (for tests)
-        feature_vocabulary = OrderedDict(sorted(feature_vocabulary.items()))
+        ordered_feature_vocabulary = OrderedDict(sorted(feature_vocabulary.items()))
 
-        return feature_vocabulary
+        return ordered_feature_vocabulary
 
     def _create_sparse_features(self, message: Message) -> None:
-        """Convert incoming messages into sparse features using the configured
-        features."""
+        """Convert incoming messages into sparse features."""
         import scipy.sparse
 
         tokens = message.get(TOKENS_NAMES[TEXT])
@@ -254,7 +260,7 @@ def _get_feature_value(
         token_idx: int,
         pointer_position: int,
         token_length: int,
-    ) -> Union[bool, int, Text]:
+    ) -> Union[bool, int, Text, None]:
         if feature == END_OF_SENTENCE:
             return token_idx + pointer_position == token_length - 1
 
@@ -273,6 +279,7 @@ def _get_feature_value(
                 f"Invalid value '{value}' for feature '{feature}'."
                 f" Feature is ignored."
             )
+
         return value
 
     @classmethod
diff --git a/rasa/nlu/registry.py b/rasa/nlu/registry.py
index 4f0891ddda9f..472b0c61f8f2 100644
--- a/rasa/nlu/registry.py
+++ b/rasa/nlu/registry.py
@@ -7,8 +7,9 @@
 import logging
 import traceback
 import typing
-from typing import Any, Dict, Optional, Text, Type
+from typing import Any, Dict, Optional, Text, Type, List
 
+from rasa.nlu.components import Component
 from rasa.nlu.classifiers.diet_classifier import DIETClassifier
 from rasa.nlu.classifiers.fallback_classifier import FallbackClassifier
 from rasa.nlu.classifiers.keyword_intent_classifier import KeywordIntentClassifier
@@ -49,7 +50,6 @@
 from rasa.shared.constants import DOCS_URL_COMPONENTS
 
 if typing.TYPE_CHECKING:
-    from rasa.nlu.components import Component
     from rasa.nlu.config import RasaNLUModelConfig
 
 logger = logging.getLogger(__name__)
@@ -57,7 +57,7 @@
 
 # Classes of all known components. If a new component should be added,
 # its class name should be listed here.
-component_classes = [
+component_classes: List[Type[Component]] = [
     # utils
     SpacyNLP,
     MitieNLP,
@@ -95,7 +95,9 @@
 ]
 
 # Mapping from a components name to its class to allow name based lookup.
-registered_components = {c.name: c for c in component_classes}
+registered_components: Dict[Text, Type[Component]] = {
+    c.name: c for c in component_classes
+}
 
 
 class ComponentNotFoundException(ModuleNotFoundError, RasaException):
@@ -181,12 +183,11 @@ def load_component_by_meta(
 
 def create_component_by_config(
     component_config: Dict[Text, Any], config: "RasaNLUModelConfig"
-) -> Optional["Component"]:
+) -> "Component":
     """Resolves a component and calls it's create method.
 
     Inits it based on a previously persisted model.
     """
-
     # try to get class name first, else create by name
     component_name = component_config.get("class", component_config["name"])
     component_class = get_component_class(component_name)
diff --git a/rasa/nlu/selectors/response_selector.py b/rasa/nlu/selectors/response_selector.py
index a01b2163465b..bfa465929375 100644
--- a/rasa/nlu/selectors/response_selector.py
+++ b/rasa/nlu/selectors/response_selector.py
@@ -630,8 +630,7 @@ def load(
         **kwargs: Any,
     ) -> "ResponseSelector":
         """Loads the trained model from the provided directory."""
-
-        model = super().load(
+        model: ResponseSelector = super().load(
             meta, model_dir, model_metadata, cached_component, **kwargs
         )
         if not meta.get("file"):
diff --git a/rasa/nlu/test.py b/rasa/nlu/test.py
index 56dd3dacf3e2..6931d47ea55a 100644
--- a/rasa/nlu/test.py
+++ b/rasa/nlu/test.py
@@ -49,7 +49,6 @@
     NO_ENTITY_TAG,
     INTENT_NAME_KEY,
     PREDICTED_CONFIDENCE_KEY,
-    ENTITY_ATTRIBUTE_TEXT,
 )
 from rasa.model import get_model
 from rasa.nlu.components import ComponentBuilder
@@ -67,7 +66,7 @@
     EntityPrediction = TypedDict(
         "EntityPrediction",
         {
-            ENTITY_ATTRIBUTE_TEXT: Text,
+            "text": Text,
             "entities": List[Dict[Text, Any]],
             "predicted_entities": List[Dict[Text, Any]],
         },
@@ -759,13 +758,12 @@ def collect_incorrect_entity_predictions(
     for entity_result in entity_results:
         for i in range(offset, offset + len(entity_result.tokens)):
             if merged_targets[i] != merged_predictions[i]:
-                errors.append(
-                    {
-                        "text": entity_result.message,
-                        "entities": entity_result.entity_targets,
-                        "predicted_entities": entity_result.entity_predictions,
-                    }
-                )
+                prediction: EntityPrediction = {
+                    "text": entity_result.message,
+                    "entities": entity_result.entity_targets,
+                    "predicted_entities": entity_result.entity_predictions,
+                }
+                errors.append(prediction)
                 break
         offset += len(entity_result.tokens)
     return errors
@@ -821,13 +819,12 @@ def collect_successful_entity_predictions(
                 merged_targets[i] == merged_predictions[i]
                 and merged_targets[i] != NO_ENTITY
             ):
-                successes.append(
-                    {
-                        "text": entity_result.message,
-                        "entities": entity_result.entity_targets,
-                        "predicted_entities": entity_result.entity_predictions,
-                    }
-                )
+                prediction: EntityPrediction = {
+                    "text": entity_result.message,
+                    "entities": entity_result.entity_targets,
+                    "predicted_entities": entity_result.entity_predictions,
+                }
+                successes.append(prediction)
                 break
         offset += len(entity_result.tokens)
     return successes
@@ -1381,12 +1378,13 @@ def is_response_selector_present(interpreter: Interpreter) -> bool:
     return response_selectors != []
 
 
-def get_available_response_selector_types(interpreter: Interpreter) -> List[Text]:
+def get_available_response_selector_types(
+    interpreter: Interpreter,
+) -> List[Optional[Text]]:
     """Gets all available response selector types."""
-
     from rasa.nlu.selectors.response_selector import ResponseSelector
 
-    response_selector_types = [
+    response_selector_types: List[Optional[Text]] = [
         c.retrieval_intent
         for c in interpreter.pipeline
         if isinstance(c, ResponseSelector)
diff --git a/rasa/nlu/train.py b/rasa/nlu/train.py
index e9273cda4c32..82d0bd72545d 100644
--- a/rasa/nlu/train.py
+++ b/rasa/nlu/train.py
@@ -59,7 +59,11 @@ async def load_data_from_endpoint(
 
         return training_data
     except Exception as e:
-        logger.warning(f"Could not retrieve training data from URL:\n{e}")
+        logger.warning(
+            f"Could not retrieve training data from URL. Using empty "
+            f"training data instead. Error details:\n{e}"
+        )
+        return TrainingData()
 
 
 def create_persistor(persistor: Optional[Text]) -> Optional["Persistor"]:
diff --git a/rasa/nlu/utils/hugging_face/transformers_pre_post_processors.py b/rasa/nlu/utils/hugging_face/transformers_pre_post_processors.py
index 62ab42bffd76..a716ebb78658 100644
--- a/rasa/nlu/utils/hugging_face/transformers_pre_post_processors.py
+++ b/rasa/nlu/utils/hugging_face/transformers_pre_post_processors.py
@@ -22,6 +22,8 @@ def cleanup_tokens(
     # remove empty strings
     token_ids_string = [(id, string) for id, string in token_ids_string if string]
 
+    token_ids: List[int]
+    token_strings: List[Text]
     # return as individual token ids and token strings
     token_ids, token_strings = zip(*token_ids_string)
     return token_ids, token_strings
diff --git a/rasa/server.py b/rasa/server.py
index e41eea908f05..c3c6d4331090 100644
--- a/rasa/server.py
+++ b/rasa/server.py
@@ -33,6 +33,7 @@
 
 import rasa
 import rasa.core.utils
+from rasa.nlu.emulators.emulator import Emulator
 import rasa.utils.common
 import rasa.shared.utils.common
 import rasa.shared.utils.io
@@ -78,9 +79,12 @@
 if TYPE_CHECKING:
     from ssl import SSLContext  # noqa: F401
     from rasa.core.processor import MessageProcessor
-    from mypy_extensions import VarArg, KwArg
+    from mypy_extensions import Arg, VarArg, KwArg
 
-    SanicView = Callable[[Request, VarArg(), KwArg()], response.BaseHTTPResponse]
+    SanicView = Callable[
+        [Arg(Request, "request"), VarArg(), KwArg()],  # noqa: F821
+        response.BaseHTTPResponse,
+    ]
 
 
 logger = logging.getLogger(__name__)
@@ -165,10 +169,10 @@ def decorated(*args: Any, **kwargs: Any) -> Any:
     return decorator
 
 
-def ensure_conversation_exists() -> "SanicView":
+def ensure_conversation_exists() -> Callable[["SanicView"], "SanicView"]:
     """Wraps a request handler ensuring the conversation exists."""
 
-    def decorator(f: "SanicView") -> HTTPResponse:
+    def decorator(f: "SanicView") -> "SanicView":
         @wraps(f)
         def decorated(request: Request, *args: Any, **kwargs: Any) -> HTTPResponse:
             conversation_id = kwargs["conversation_id"]
@@ -440,10 +444,11 @@ def create_ssl_context(
         return None
 
 
-def _create_emulator(mode: Optional[Text]) -> NoEmulator:
+def _create_emulator(mode: Optional[Text]) -> Emulator:
     """Create emulator for specified mode.
-    If no emulator is specified, we will use the Rasa NLU format."""
 
+    If no emulator is specified, we will use the Rasa NLU format.
+    """
     if mode is None:
         return NoEmulator()
     elif mode.lower() == "wit":
diff --git a/rasa/shared/core/domain.py b/rasa/shared/core/domain.py
index 33c58183736d..3ebb96d463d5 100644
--- a/rasa/shared/core/domain.py
+++ b/rasa/shared/core/domain.py
@@ -80,7 +80,8 @@
 # State is a dictionary with keys (USER, PREVIOUS_ACTION, SLOTS, ACTIVE_LOOP)
 # representing the origin of a SubState;
 # the values are SubStates, that contain the information needed for featurization
-SubState = Dict[Text, Union[Text, Tuple[Union[float, Text]]]]
+SubStateValue = Union[Text, Tuple[Union[float, Text]]]
+SubState = Dict[Text, SubStateValue]
 State = Dict[Text, SubState]
 
 logger = logging.getLogger(__name__)
@@ -966,7 +967,7 @@ def _get_featurized_entities(self, latest_message: UserUttered) -> Set[Text]:
 
     def _get_user_sub_state(
         self, tracker: "DialogueStateTracker"
-    ) -> Dict[Text, Union[Text, Tuple[Text]]]:
+    ) -> Dict[Text, Union[None, Text, List[Optional[Text]], Tuple[str, ...]]]:
         """Turns latest UserUttered event into a substate.
 
         The substate will contain intent, text, and entities (if any are present).
@@ -982,7 +983,9 @@ def _get_user_sub_state(
         if not latest_message or latest_message.is_empty():
             return {}
 
-        sub_state = latest_message.as_sub_state()
+        sub_state: Dict[
+            Text, Union[None, Text, List[Optional[Text]], Tuple[str, ...]]
+        ] = latest_message.as_sub_state()
 
         # Filter entities based on intent config. We need to convert the set into a
         # tuple because sub_state will be later transformed into a frozenset (so it can
@@ -1017,7 +1020,7 @@ def _get_slots_sub_state(
         Returns:
             a dictionary mapping slot names to their featurization
         """
-        slots = {}
+        slots: Dict[Text, Union[Text, Tuple[float]]] = {}
         for slot_name, slot in tracker.slots.items():
             if slot is not None and slot.as_feature():
                 if omit_unset_slots and not slot.has_been_set:
@@ -1033,8 +1036,9 @@ def _get_slots_sub_state(
     @staticmethod
     def _get_prev_action_sub_state(
         tracker: "DialogueStateTracker",
-    ) -> Dict[Text, Text]:
+    ) -> Optional[Dict[Text, Text]]:
         """Turn the previous taken action into a state name.
+
         Args:
             tracker: dialog state tracker containing the dialog so far
         Returns:
@@ -1315,7 +1319,9 @@ def get_responses_with_multilines(
 
         return final_responses
 
-    def _transform_intents_for_file(self) -> List[Union[Text, Dict[Text, Any]]]:
+    def _transform_intents_for_file(
+        self,
+    ) -> List[Dict[Text, Dict[Text, Union[bool, List[Text]]]]]:
         """Transform intent properties for displaying or writing into a domain file.
 
         Internally, there is a property `used_entities` that lists all entities to be
@@ -1361,7 +1367,7 @@ def _transform_entities_for_file(self) -> List[Union[Text, Dict[Text, Any]]]:
         Returns:
             The entity properties as they are used in domain files.
         """
-        entities_for_file = []
+        entities_for_file: List[Union[Text, Dict[Text, Any]]] = []
 
         for entity in self.entities:
             if entity in self.roles and entity in self.groups:
diff --git a/rasa/shared/core/events.py b/rasa/shared/core/events.py
index a4b5bffe5097..7d5213e3ff9b 100644
--- a/rasa/shared/core/events.py
+++ b/rasa/shared/core/events.py
@@ -544,7 +544,7 @@ def as_dict(self) -> Dict[Text, Any]:
         )
         return _dict
 
-    def as_sub_state(self) -> Dict[Text, Union[None, Text, List[Optional[Text]]]]:
+    def as_sub_state(self,) -> Dict[Text, Union[None, Text, List[Optional[Text]]]]:
         """Turns a UserUttered event into features.
 
         The substate contains information about entities, intent and text of the
@@ -571,7 +571,7 @@ def as_sub_state(self) -> Dict[Text, Union[None, Text, List[Optional[Text]]]]:
             if ENTITY_ATTRIBUTE_GROUP in entity
         )
 
-        out = {}
+        out: Dict[Text, Union[None, Text, List[Optional[Text]]]] = {}
         # During training we expect either intent_name or text to be set.
         # During prediction both will be set.
         if self.text and (
@@ -959,7 +959,9 @@ def as_story_string(self) -> Text:
         return f"{self.type_name}{props}"
 
     @classmethod
-    def _from_story_string(cls, parameters: Dict[Text, Any]) -> Optional[List[Event]]:
+    def _from_story_string(
+        cls, parameters: Dict[Text, Any]
+    ) -> Optional[List["SlotSet"]]:
 
         slots = []
         for slot_key, slot_val in parameters.items():
@@ -1526,7 +1528,7 @@ def __repr__(self) -> Text:
             self.action_name, self.policy, self.confidence
         )
 
-    def __str__(self) -> Text:
+    def __str__(self) -> Optional[Text]:
         """Returns event as human readable string."""
         return self.action_name or self.action_text
 
@@ -1541,7 +1543,7 @@ def __eq__(self, other: Any) -> bool:
 
         return self.__members__() == other.__members__()
 
-    def as_story_string(self) -> Text:
+    def as_story_string(self) -> Optional[Text]:
         """Returns event in Markdown format."""
         if self.action_text:
             raise UnsupportedFeatureException(
diff --git a/rasa/shared/core/generator.py b/rasa/shared/core/generator.py
index 1d6f8e2e8a35..66c99e928778 100644
--- a/rasa/shared/core/generator.py
+++ b/rasa/shared/core/generator.py
@@ -103,13 +103,14 @@ def past_states_for_hashing(
 
         # if don't have it cached, we use the domain to calculate the states
         # from the events
-        if self._states_for_hashing is None:
+        states_for_hashing = self._states_for_hashing
+        if states_for_hashing is None:
             states = super().past_states(domain, omit_unset_slots=omit_unset_slots)
-            self._states_for_hashing = deque(
-                self.freeze_current_state(s) for s in states
-            )
+            states_for_hashing = deque(self.freeze_current_state(s) for s in states)
+
+        self._states_for_hashing = states_for_hashing
 
-        return self._states_for_hashing
+        return states_for_hashing
 
     @staticmethod
     def _unfreeze_states(frozen_states: Deque[FrozenState]) -> List[State]:
@@ -218,7 +219,7 @@ def update(self, event: Event, skip_states: bool = False) -> None:
 
 
 # define types
-TrackerLookupDict = Dict[Optional[Text], List[TrackerWithCachedStates]]
+TrackerLookupDict = Dict[Text, List[TrackerWithCachedStates]]
 
 TrackersTuple = Tuple[List[TrackerWithCachedStates], List[TrackerWithCachedStates]]
 
diff --git a/rasa/shared/core/trackers.py b/rasa/shared/core/trackers.py
index 4f9ab8c975a0..999ea0e314ce 100644
--- a/rasa/shared/core/trackers.py
+++ b/rasa/shared/core/trackers.py
@@ -878,7 +878,9 @@ def latest_action_name(self) -> Optional[Text]:
         )
 
 
-def get_active_loop_name(state: State) -> Optional[Text]:
+def get_active_loop_name(
+    state: State,
+) -> Optional[Union[Text, Tuple[Union[float, Text]]]]:
     """Get the name of current active loop.
 
     Args:
@@ -891,7 +893,7 @@ def get_active_loop_name(state: State) -> Optional[Text]:
         not state.get(ACTIVE_LOOP)
         or state[ACTIVE_LOOP].get(LOOP_NAME) == SHOULD_NOT_BE_SET
     ):
-        return
+        return None
 
     return state[ACTIVE_LOOP].get(LOOP_NAME)
 
diff --git a/rasa/shared/core/training_data/story_reader/yaml_story_reader.py b/rasa/shared/core/training_data/story_reader/yaml_story_reader.py
index 609273203a5e..51cbf5532681 100644
--- a/rasa/shared/core/training_data/story_reader/yaml_story_reader.py
+++ b/rasa/shared/core/training_data/story_reader/yaml_story_reader.py
@@ -437,7 +437,7 @@ def _parse_raw_user_utterance(self, step: Dict[Text, Any]) -> Optional[UserUtter
     @staticmethod
     def _parse_raw_entities(
         raw_entities: Union[List[Dict[Text, Text]], List[Text]]
-    ) -> List[Dict[Text, Text]]:
+    ) -> List[Dict[Text, Optional[Text]]]:
         final_entities = []
         for entity in raw_entities:
             if isinstance(entity, dict):
diff --git a/rasa/shared/core/training_data/story_writer/yaml_story_writer.py b/rasa/shared/core/training_data/story_writer/yaml_story_writer.py
index 542d1e5336b3..96b0269141c3 100644
--- a/rasa/shared/core/training_data/story_writer/yaml_story_writer.py
+++ b/rasa/shared/core/training_data/story_writer/yaml_story_writer.py
@@ -1,6 +1,13 @@
 from collections import OrderedDict
 from pathlib import Path
-from typing import Any, Dict, List, Text, Union, Optional
+from typing import (
+    Any,
+    Dict,
+    List,
+    Text,
+    Union,
+    Optional,
+)
 
 from ruamel import yaml
 from ruamel.yaml.comments import CommentedMap
@@ -287,14 +294,14 @@ def process_action(action: ActionExecuted) -> Optional[OrderedDict]:
         return result
 
     @staticmethod
-    def process_slot(event: SlotSet) -> Dict[Text, List[Dict]]:
+    def process_slot(event: SlotSet) -> OrderedDict:
         """Converts a single `SlotSet` event into an ordered dict.
 
         Args:
             event: Original `SlotSet` event.
 
         Returns:
-            Dict with an `SlotSet` event.
+            OrderedDict with an `SlotSet` event.
         """
         return OrderedDict([(KEY_SLOT_NAME, [{event.key: event.value}])])
 
diff --git a/rasa/shared/core/training_data/structures.py b/rasa/shared/core/training_data/structures.py
index 3ea111e666cf..8216029d8f9f 100644
--- a/rasa/shared/core/training_data/structures.py
+++ b/rasa/shared/core/training_data/structures.py
@@ -254,7 +254,7 @@ def _add_action_listen(self, events: List[Event]) -> None:
 
     def explicit_events(
         self, domain: Domain, should_append_final_listen: bool = True
-    ) -> List[Union[Event, List[Event]]]:
+    ) -> List[Event]:
         """Returns events contained in the story step including implicit events.
 
         Not all events are always listed in the story dsl. This
@@ -262,8 +262,7 @@ def explicit_events(
         set slots. This functions makes these events explicit and
         returns them with the rest of the steps events.
         """
-
-        events = []
+        events: List[Event] = []
 
         for e in self.events:
             if isinstance(e, UserUttered):
@@ -473,13 +472,11 @@ def merge(self, other: Optional["StoryGraph"]) -> "StoryGraph":
     def overlapping_checkpoint_names(
         cps: List[Checkpoint], other_cps: List[Checkpoint]
     ) -> Set[Text]:
-        """Find overlapping checkpoints names"""
-
+        """Find overlapping checkpoints names."""
         return {cp.name for cp in cps} & {cp.name for cp in other_cps}
 
     def with_cycles_removed(self) -> "StoryGraph":
         """Create a graph with the cyclic edges removed from this graph."""
-
         story_end_checkpoints = self.story_end_checkpoints.copy()
         cyclic_edge_ids = self.cyclic_edge_ids
         # we need to remove the start steps and replace them with steps ending
@@ -638,9 +635,8 @@ def _is_checkpoint_in_list(
     @staticmethod
     def _find_unused_checkpoints(
         story_steps: ValuesView[StoryStep], story_end_checkpoints: Dict[Text, Text]
-    ) -> Set[Text]:
+    ) -> Set[Optional[Text]]:
         """Finds all unused checkpoints."""
-
         collected_start = {STORY_END, STORY_START}
         collected_end = {STORY_END, STORY_START}
 
diff --git a/rasa/shared/importers/importer.py b/rasa/shared/importers/importer.py
index 513fee53541b..1f45c2ba84a5 100644
--- a/rasa/shared/importers/importer.py
+++ b/rasa/shared/importers/importer.py
@@ -522,10 +522,10 @@ async def _additional_training_data_from_stories(self) -> TrainingData:
         # model has to be retrained due to changes in the event order within
         # the stories.
         sorted_utterances = sorted(
-            utterances, key=lambda user: user.intent_name or user.text
+            utterances, key=lambda user: user.intent_name or user.text or ""
         )
         sorted_actions = sorted(
-            actions, key=lambda action: action.action_name or action.action_text
+            actions, key=lambda action: action.action_name or action.action_text or ""
         )
 
         additional_messages_from_stories = [
diff --git a/rasa/shared/nlu/training_data/formats/rasa_yaml.py b/rasa/shared/nlu/training_data/formats/rasa_yaml.py
index bb6e1dc9a70e..adaf46d43c2f 100644
--- a/rasa/shared/nlu/training_data/formats/rasa_yaml.py
+++ b/rasa/shared/nlu/training_data/formats/rasa_yaml.py
@@ -436,7 +436,10 @@ def process_synonyms(cls, training_data: "TrainingData") -> List[OrderedDict]:
             inverted_synonyms[synonym].append(example)
 
         return cls.process_training_examples_by_key(
-            inverted_synonyms, KEY_SYNONYM, KEY_SYNONYM_EXAMPLES
+            inverted_synonyms,
+            KEY_SYNONYM,
+            KEY_SYNONYM_EXAMPLES,
+            example_extraction_predicate=lambda x: str(x),
         )
 
     @classmethod
@@ -448,12 +451,23 @@ def process_regexes(cls, training_data: "TrainingData") -> List[OrderedDict]:
             inverted_regexes[regex["name"]].append(regex["pattern"])
 
         return cls.process_training_examples_by_key(
-            inverted_regexes, KEY_REGEX, KEY_REGEX_EXAMPLES
+            inverted_regexes,
+            KEY_REGEX,
+            KEY_REGEX_EXAMPLES,
+            example_extraction_predicate=lambda x: str(x),
         )
 
     @classmethod
     def process_lookup_tables(cls, training_data: "TrainingData") -> List[OrderedDict]:
-        prepared_lookup_tables = OrderedDict()
+        """Serializes the look up tables.
+
+        Args:
+            training_data: The training data object with potential look up tables.
+
+        Returns:
+            The serialized lookup tables.
+        """
+        prepared_lookup_tables: Dict[Text, List[Text]] = OrderedDict()
         for lookup_table in training_data.lookup_tables:
             # this is a lookup table filename
             if isinstance(lookup_table["elements"], str):
@@ -461,16 +475,36 @@ def process_lookup_tables(cls, training_data: "TrainingData") -> List[OrderedDic
             prepared_lookup_tables[lookup_table["name"]] = lookup_table["elements"]
 
         return cls.process_training_examples_by_key(
-            prepared_lookup_tables, KEY_LOOKUP, KEY_LOOKUP_EXAMPLES
+            prepared_lookup_tables,
+            KEY_LOOKUP,
+            KEY_LOOKUP_EXAMPLES,
+            example_extraction_predicate=lambda x: str(x),
         )
 
     @staticmethod
     def process_training_examples_by_key(
-        training_examples: Dict,
+        training_examples: Dict[Text, List[Union[Dict, Text]]],
         key_name: Text,
         key_examples: Text,
-        example_extraction_predicate: Callable[[Dict[Text, Any]], Text] = lambda x: x,
+        example_extraction_predicate: Callable[[Dict[Text, Any]], Text],
     ) -> List[OrderedDict]:
+        """Prepares training examples  to be written to YAML.
+
+        This can be any NLU training data (intent examples, lookup tables, etc.)
+
+        Args:
+            training_examples: Multiple training examples. Mappings in case additional
+                values were specified for an example (e.g. metadata) or just the plain
+                value.
+            key_name: The top level key which the examples belong to (e.g. `intents`)
+            key_examples: The sub key which the examples should be added to
+                (e.g. `examples`).
+            example_extraction_predicate: Function to extract example value (e.g. the
+                the text for an intent example)
+
+        Returns:
+            NLU training data examples prepared for writing to YAML.
+        """
         intents = []
 
         for intent_name, examples in training_examples.items():
@@ -505,8 +539,8 @@ def process_training_examples_by_key(
 
     @staticmethod
     def _convert_training_examples(
-        training_examples: List[Dict],
-        example_extraction_predicate: Callable[[Dict[Text, Any]], Text] = lambda x: x,
+        training_examples: List[Union[Dict, List[Text]]],
+        example_extraction_predicate: Callable[[Dict[Text, Any]], Text],
     ) -> Tuple[List[Dict], Optional[Dict]]:
         """Returns converted training examples and potential intent metadata."""
         converted_examples = []
@@ -555,7 +589,7 @@ def render(example: Dict) -> Dict:
         return [render(ex) for ex in examples]
 
     @staticmethod
-    def _render_training_examples_as_text(examples: List[Dict]) -> List[Text]:
+    def _render_training_examples_as_text(examples: List[Dict]) -> LiteralScalarString:
         def render(example: Dict) -> Text:
             return TrainingDataWriter.generate_list_item(example[KEY_INTENT_TEXT])
 
diff --git a/rasa/shared/nlu/training_data/message.py b/rasa/shared/nlu/training_data/message.py
index cdaaa0b800fa..1085c7cb2daa 100644
--- a/rasa/shared/nlu/training_data/message.py
+++ b/rasa/shared/nlu/training_data/message.py
@@ -226,10 +226,14 @@ def get_sparse_features(
             attribute, featurizers
         )
 
-        sequence_features = self._combine_features(sequence_features, featurizers)
-        sentence_features = self._combine_features(sentence_features, featurizers)
+        combined_sequence_features = self._combine_features(
+            sequence_features, featurizers
+        )
+        combined_sentence_features = self._combine_features(
+            sentence_features, featurizers
+        )
 
-        return sequence_features, sentence_features
+        return combined_sequence_features, combined_sentence_features
 
     def get_sparse_feature_sizes(
         self, attribute: Text, featurizers: Optional[List[Text]] = None
@@ -280,10 +284,14 @@ def get_dense_features(
             attribute, featurizers
         )
 
-        sequence_features = self._combine_features(sequence_features, featurizers)
-        sentence_features = self._combine_features(sentence_features, featurizers)
+        combined_sequence_features = self._combine_features(
+            sequence_features, featurizers
+        )
+        combined_sentence_features = self._combine_features(
+            sentence_features, featurizers
+        )
 
-        return sequence_features, sentence_features
+        return combined_sequence_features, combined_sentence_features
 
     def get_all_features(
         self, attribute: Text, featurizers: Optional[List[Text]] = None
diff --git a/rasa/telemetry.py b/rasa/telemetry.py
index e51406362d85..0cbdd97e6a13 100644
--- a/rasa/telemetry.py
+++ b/rasa/telemetry.py
@@ -11,7 +11,7 @@
 import sys
 import textwrap
 import typing
-from typing import Any, Callable, Dict, List, Optional, Text
+from typing import Any, Callable, Dict, List, Optional, Text, Union
 import uuid
 
 import async_generator
@@ -849,7 +849,7 @@ def track_server_start(
 
     def project_fingerprint_from_model(
         _model_directory: Optional[Text],
-    ) -> Optional[Text]:
+    ) -> Optional[Union[Text, List[Text], int, float]]:
         """Get project fingerprint from an app's loaded model."""
         if _model_directory:
             try:
diff --git a/rasa/utils/common.py b/rasa/utils/common.py
index 7edca7194a5c..7cac8dc6d824 100644
--- a/rasa/utils/common.py
+++ b/rasa/utils/common.py
@@ -14,6 +14,7 @@
     Type,
     TypeVar,
     Union,
+    ContextManager,
 )
 
 import rasa.utils.io
@@ -26,10 +27,10 @@
 T = TypeVar("T")
 
 
-class TempDirectoryPath(str):
-    """Represents a path to an temporary directory. When used as a context
-    manager, it erases the contents of the directory on exit.
+class TempDirectoryPath(str, ContextManager):
+    """Represents a path to an temporary directory.
 
+    When used as a context manager, it erases the contents of the directory on exit.
     """
 
     def __enter__(self) -> "TempDirectoryPath":
@@ -40,7 +41,7 @@ def __exit__(
         _exc: Optional[Type[BaseException]],
         _value: Optional[Exception],
         _tb: Optional[TracebackType],
-    ) -> bool:
+    ) -> None:
         if os.path.exists(self):
             shutil.rmtree(self)
 
diff --git a/rasa/utils/tensorflow/data_generator.py b/rasa/utils/tensorflow/data_generator.py
index 49c9a54a07e9..cc50f527d0d1 100644
--- a/rasa/utils/tensorflow/data_generator.py
+++ b/rasa/utils/tensorflow/data_generator.py
@@ -76,7 +76,7 @@ def prepare_batch(
         start: Optional[int] = None,
         end: Optional[int] = None,
         tuple_sizes: Optional[Dict[Text, int]] = None,
-    ) -> Tuple[Optional[np.ndarray]]:
+    ) -> Tuple[Optional[np.ndarray], ...]:
         """Slices model data into batch using given start and end value.
 
         Args:
diff --git a/rasa/utils/tensorflow/environment.py b/rasa/utils/tensorflow/environment.py
index 010f4eaf6d40..04846c3fc518 100644
--- a/rasa/utils/tensorflow/environment.py
+++ b/rasa/utils/tensorflow/environment.py
@@ -85,15 +85,15 @@ def _parse_gpu_config(gpu_memory_config: Text) -> Dict[int, int]:
 
     # gpu_config is of format "gpu_id_1:gpu_id_1_memory, gpu_id_2: gpu_id_2_memory"
     # Parse it and store in a dictionary
-    parsed_gpu_config = {}
+    parsed_gpu_config: Dict[int, int] = {}
 
     try:
         for instance in gpu_memory_config.split(","):
             instance_gpu_id, instance_gpu_mem = instance.split(":")
-            instance_gpu_id = int(instance_gpu_id)
-            instance_gpu_mem = int(instance_gpu_mem)
+            parsed_instance_gpu_id = int(instance_gpu_id)
+            parsed_instance_gpu_mem = int(instance_gpu_mem)
 
-            parsed_gpu_config[instance_gpu_id] = instance_gpu_mem
+            parsed_gpu_config[parsed_instance_gpu_id] = parsed_instance_gpu_mem
     except ValueError:
         # Helper explanation of where the error comes from
         raise ValueError(
diff --git a/rasa/utils/tensorflow/model_data.py b/rasa/utils/tensorflow/model_data.py
index 103e3eca89a2..29b48a6894e2 100644
--- a/rasa/utils/tensorflow/model_data.py
+++ b/rasa/utils/tensorflow/model_data.py
@@ -264,7 +264,7 @@ def __init__(
         self.label_sub_key = label_sub_key
         # should be updated when features are added
         self.num_examples = self.number_of_examples()
-        self.sparse_feature_sizes = None
+        self.sparse_feature_sizes = {}
 
     def get(
         self, key: Text, sub_key: Optional[Text] = None
@@ -741,7 +741,7 @@ def balanced_data(self, data: Data, batch_size: int, shuffle: bool) -> Data:
                 if min(num_data_cycles) > 0:
                     break
 
-        final_data = defaultdict(lambda: defaultdict(list))
+        final_data: Data = defaultdict(lambda: defaultdict(list))
         for key, attribute_data in new_data.items():
             for sub_key, features in attribute_data.items():
                 for f in features:
@@ -789,7 +789,7 @@ def _data_for_ids(data: Optional[Data], ids: np.ndarray) -> Data:
         Returns:
             The filtered data
         """
-        new_data = defaultdict(lambda: defaultdict(list))
+        new_data: Data = defaultdict(lambda: defaultdict(list))
 
         if data is None:
             return new_data
diff --git a/rasa/utils/tensorflow/model_data_utils.py b/rasa/utils/tensorflow/model_data_utils.py
index e75e598c5001..f2c4a96d451e 100644
--- a/rasa/utils/tensorflow/model_data_utils.py
+++ b/rasa/utils/tensorflow/model_data_utils.py
@@ -3,7 +3,7 @@
 import numpy as np
 import scipy.sparse
 from collections import defaultdict, OrderedDict
-from typing import List, Optional, Text, Dict, Tuple, Union, Any
+from typing import List, Optional, Text, Dict, Tuple, Union, Any, DefaultDict
 
 from rasa.nlu.constants import TOKENS_NAMES
 from rasa.utils.tensorflow.model_data import Data, FeatureArray
@@ -141,7 +141,7 @@ def get_tag_ids(
 def _surface_attributes(
     features: List[List[Dict[Text, List["Features"]]]],
     featurizers: Optional[List[Text]] = None,
-) -> Dict[Text, List[List[List["Features"]]]]:
+) -> DefaultDict[Text, List[List[Optional[List["Features"]]]]]:
     """Restructure the input.
 
     "features" can, for example, be a dictionary of attributes (INTENT,
@@ -171,20 +171,21 @@ def _surface_attributes(
     )
 
     output = defaultdict(list)
-
     for list_of_attribute_to_features in features:
         intermediate_features = defaultdict(list)
         for attribute_to_features in list_of_attribute_to_features:
             for attribute in attributes:
-                features = attribute_to_features.get(attribute)
+                attribute_features = attribute_to_features.get(attribute)
                 if featurizers:
-                    features = _filter_features(features, featurizers)
+                    attribute_features = _filter_features(
+                        attribute_features, featurizers
+                    )
 
                 # if attribute is not present in the example, populate it with None
-                intermediate_features[attribute].append(features)
+                intermediate_features[attribute].append(attribute_features)
 
-        for key, value in intermediate_features.items():
-            output[key].append(value)
+        for key, collection_of_feature_collections in intermediate_features.items():
+            output[key].append(collection_of_feature_collections)
 
     return output
 
diff --git a/rasa/utils/tensorflow/models.py b/rasa/utils/tensorflow/models.py
index bfabd565c24c..53790394a42e 100644
--- a/rasa/utils/tensorflow/models.py
+++ b/rasa/utils/tensorflow/models.py
@@ -361,14 +361,16 @@ def _merge_batch_outputs(
     def _empty_lists_to_none_in_dict(input_dict: Dict[Text, Any]) -> Dict[Text, Any]:
         """Recursively replaces empty list or np array with None in a dictionary."""
 
-        def _recurse(x: Union[Dict[Text, Any], List[Any], np.ndarray]) -> Optional[Any]:
+        def _recurse(
+            x: Union[Dict[Text, Any], List[Any], np.ndarray]
+        ) -> Optional[Union[Dict[Text, Any], List[np.ndarray]]]:
             if isinstance(x, dict):
                 return {k: _recurse(v) for k, v in x.items()}
             elif (isinstance(x, list) or isinstance(x, np.ndarray)) and np.size(x) == 0:
                 return None
             return x
 
-        return _recurse(input_dict)
+        return {k: _recurse(v) for k, v in input_dict.items()}
 
     def _get_metric_results(self, prefix: Optional[Text] = "") -> Dict[Text, float]:
         return {
@@ -451,7 +453,9 @@ def batch_to_model_data_format(
         if isinstance(batch[0], Tuple):
             batch = batch[0]
 
-        batch_data = defaultdict(lambda: defaultdict(list))
+        batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]] = defaultdict(
+            lambda: defaultdict(list)
+        )
 
         idx = 0
         for key, values in data_signature.items():
@@ -855,7 +859,8 @@ def _get_batch_dim(attribute_data: Dict[Text, List[tf.Tensor]]) -> int:
         for key, data in attribute_data.items():
             if data:
                 return tf.shape(data[0])[0]
-        return None
+
+        return 0
 
     def _calculate_entity_loss(
         self,
diff --git a/rasa/utils/tensorflow/rasa_layers.py b/rasa/utils/tensorflow/rasa_layers.py
index de6261d79378..2daec3567000 100644
--- a/rasa/utils/tensorflow/rasa_layers.py
+++ b/rasa/utils/tensorflow/rasa_layers.py
@@ -901,6 +901,8 @@ def _features_as_token_ids(
                     self._tf_layers[self.SPARSE_TO_DENSE_FOR_TOKEN_IDS](f)
                 )
 
+        return None
+
     def _create_mlm_tensors(
         self,
         sequence_features: List[Union[tf.Tensor, tf.SparseTensor]],
diff --git a/setup.cfg b/setup.cfg
index b636edea5411..6b3e66735e56 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -47,5 +47,5 @@ disallow_untyped_decorators = True
 # FIXME: working our way towards removing these
 	# see https://github.com/RasaHQ/rasa/pull/6470
 	# the list below is sorted by the number of errors for each error code, in decreasing order
-disable_error_code = arg-type, assignment, var-annotated, return-value, union-attr,
-    override, operator, attr-defined, misc, return
+disable_error_code = arg-type, assignment, var-annotated, union-attr,
+    override, operator, attr-defined, misc
diff --git a/tests/core/utilities.py b/tests/core/utilities.py
index 5897a6ed707f..c3a81579cae5 100644
--- a/tests/core/utilities.py
+++ b/tests/core/utilities.py
@@ -49,18 +49,18 @@ def mocked_cmd_input(package, text: Text):
         text = [text]
 
     text_generator = itertools.cycle(text)
-    i = package.get_user_input
+    i = package._get_user_input
 
     def mocked_input(*args, **kwargs):
         value = next(text_generator)
         print(f"wrote '{value}' to input")
         return value
 
-    package.get_user_input = mocked_input
+    package._get_user_input = mocked_input
     try:
         yield
     finally:
-        package.get_user_input = i
+        package._get_user_input = i
 
 
 def user_uttered(