diff --git a/.github/scripts/mr_generate_summary.py b/.github/scripts/mr_generate_summary.py index 20d7a7b46e46..e1e039afaf76 100644 --- a/.github/scripts/mr_generate_summary.py +++ b/.github/scripts/mr_generate_summary.py @@ -58,7 +58,13 @@ def read_results(file): with open(file) as json_file: data = json.load(json_file) - keys = ["accuracy", "weighted avg", "macro avg", "micro avg", "conversation_accuracy"] + keys = [ + "accuracy", + "weighted avg", + "macro avg", + "micro avg", + "conversation_accuracy", + ] result = {key: data[key] for key in keys if key in data} return result diff --git a/.github/scripts/mr_publish_results.py b/.github/scripts/mr_publish_results.py index 7f0af454081c..cb301ff3e784 100644 --- a/.github/scripts/mr_publish_results.py +++ b/.github/scripts/mr_publish_results.py @@ -69,7 +69,13 @@ def read_results(file): with open(file) as json_file: data = json.load(json_file) - keys = ["accuracy", "weighted avg", "macro avg", "micro avg", "conversation_accuracy"] + keys = [ + "accuracy", + "weighted avg", + "macro avg", + "micro avg", + "conversation_accuracy", + ] result = {key: data[key] for key in keys if key in data} return result diff --git a/poetry.lock b/poetry.lock index ca1fef01c4a4..247fbf2f32fc 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3157,7 +3157,7 @@ transformers = ["transformers"] [metadata] lock-version = "1.1" python-versions = ">=3.6,<3.9" -content-hash = "8319a632c0f0f25e3574887786ba9fb51277f7cc7010a02f8866933065506563" +content-hash = "80436920af7907d20d9d32586e178c2ea66bb31ebb3c85c42a7219f77e6e8301" [metadata.files] absl-py = [ diff --git a/pyproject.toml b/pyproject.toml index 103346159d6d..6fa8e1d22731 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -156,6 +156,7 @@ types-requests = "^2.25.0" types-setuptools = "^57.0.0" memory-profiler = "^0.58.0" psutil = "^5.8.0" +mypy-extensions = "^0.4.3" [tool.poetry.extras] spacy = [ "spacy",] diff --git a/rasa/cli/train.py b/rasa/cli/train.py index a85dbed7b4d0..51b8ad9d4f07 100644 --- a/rasa/cli/train.py +++ b/rasa/cli/train.py @@ -165,6 +165,7 @@ def run_core_training( rasa.utils.common.run_in_loop( do_compare_training(args, story_file, additional_arguments) ) + return None def run_nlu_training( diff --git a/rasa/cli/x.py b/rasa/cli/x.py index 4dad62163880..60fdb139ecf2 100644 --- a/rasa/cli/x.py +++ b/rasa/cli/x.py @@ -2,7 +2,8 @@ import asyncio import importlib.util import logging -from multiprocessing import get_context, Process +from multiprocessing.process import BaseProcess +from multiprocessing import get_context import os import signal import sys @@ -198,7 +199,7 @@ def _is_correct_event_broker(event_broker: EndpointConfig) -> bool: def start_rasa_for_local_rasa_x( args: argparse.Namespace, rasa_x_token: Text -) -> Process: +) -> BaseProcess: """Starts the Rasa X API with Rasa as a background process.""" credentials_path, endpoints_path = _get_credentials_and_endpoints_paths(args) endpoints = AvailableEndpoints.read_endpoints(endpoints_path) diff --git a/rasa/core/actions/action.py b/rasa/core/actions/action.py index 03c8344f0b7c..28eb8568eb6a 100644 --- a/rasa/core/actions/action.py +++ b/rasa/core/actions/action.py @@ -510,7 +510,7 @@ async def run( domain: "Domain", ) -> List[Event]: """Runs action. Please see parent class for the full docstring.""" - _events = [SessionStarted(metadata=self.metadata)] + _events: List[Event] = [SessionStarted(metadata=self.metadata)] if domain.session_config.carry_over_slots: _events.extend(self._slot_set_events_from_tracker(tracker)) @@ -690,7 +690,7 @@ async def run( events_json = response.get("events", []) responses = response.get("responses", []) - bot_messages = await self._utter_responses( + bot_messages: List[Event] = await self._utter_responses( responses, output_channel, nlg, tracker ) diff --git a/rasa/core/actions/forms.py b/rasa/core/actions/forms.py index 4adcca6a2520..b1091fb507ba 100644 --- a/rasa/core/actions/forms.py +++ b/rasa/core/actions/forms.py @@ -425,7 +425,7 @@ async def validate_slots( domain: Domain, output_channel: OutputChannel, nlg: NaturalLanguageGenerator, - ) -> List[Event]: + ) -> List[Union[SlotSet, Event]]: """Validate the extracted slots. If a custom action is available for validating the slots, we call it to validate @@ -445,7 +445,7 @@ async def validate_slots( for the validated slots. """ logger.debug(f"Validating extracted slots: {slot_candidates}") - events = [ + events: List[Union[SlotSet, Event]] = [ SlotSet(slot_name, value) for slot_name, value in slot_candidates.items() ] @@ -506,7 +506,7 @@ async def validate( domain: Domain, output_channel: OutputChannel, nlg: NaturalLanguageGenerator, - ) -> List[Event]: + ) -> List[Union[SlotSet, Event]]: """Extract and validate value of requested slot. If nothing was extracted reject execution of the form action. @@ -560,9 +560,9 @@ async def request_next_slot( output_channel: OutputChannel, nlg: NaturalLanguageGenerator, events_so_far: List[Event], - ) -> List[Event]: + ) -> List[Union[SlotSet, Event]]: """Request the next slot and response if needed, else return `None`.""" - request_slot_events = [] + request_slot_events: List[Union[SlotSet, Event]] = [] if await self.is_done(output_channel, nlg, tracker, domain, events_so_far): # The custom action for slot validation decided to stop the form early diff --git a/rasa/core/actions/two_stage_fallback.py b/rasa/core/actions/two_stage_fallback.py index feb845395cec..0d4251c696bf 100644 --- a/rasa/core/actions/two_stage_fallback.py +++ b/rasa/core/actions/two_stage_fallback.py @@ -104,7 +104,8 @@ async def deactivate( return await self._give_up(output_channel, nlg, tracker, domain) # revert fallback events - return [UserUtteranceReverted()] + _message_clarification(tracker) + reverted_event: List[Event] = [UserUtteranceReverted()] + return reverted_event + _message_clarification(tracker) async def _give_up( self, @@ -137,7 +138,7 @@ def _two_fallbacks_in_a_row(tracker: DialogueStateTracker) -> bool: def _last_n_intent_names( tracker: DialogueStateTracker, number_of_last_intent_names: int -) -> List[Text]: +) -> List[Optional[Text]]: intent_names = [] for i in range(number_of_last_intent_names): message = tracker.get_last_event_for( diff --git a/rasa/core/agent.py b/rasa/core/agent.py index 70d9fedf58a8..ab5f0b81adb7 100644 --- a/rasa/core/agent.py +++ b/rasa/core/agent.py @@ -4,7 +4,16 @@ import shutil import tempfile from pathlib import Path -from typing import Any, Callable, Dict, List, Optional, Text, Tuple, Union +from typing import ( + Any, + Callable, + Dict, + List, + Optional, + Text, + Tuple, + Union, +) import uuid import aiohttp @@ -52,6 +61,8 @@ from rasa.utils.endpoints import EndpointConfig import rasa.utils.io +from rasa.shared.core.generator import TrackerWithCachedStates + logger = logging.getLogger(__name__) @@ -679,7 +690,7 @@ def _are_all_featurizers_using_a_max_history(self) -> bool: """Check if all featurizers are MaxHistoryTrackerFeaturizer.""" def has_max_history_featurizer(policy: Policy) -> bool: - return ( + return bool( policy.featurizer and hasattr(policy.featurizer, "max_history") and policy.featurizer.max_history is not None @@ -700,9 +711,8 @@ async def load_data( use_story_concatenation: bool = True, debug_plots: bool = False, exclusion_percentage: Optional[int] = None, - ) -> List[DialogueStateTracker]: + ) -> List["TrackerWithCachedStates"]: """Load training data from a resource.""" - max_history = self._max_history() if unique_last_num_states is None: @@ -769,7 +779,6 @@ def _clear_model_directory(model_path: Text) -> None: Only removes files if the directory seems to contain a previously persisted model. Otherwise does nothing to avoid deleting `/` by accident.""" - if not os.path.exists(model_path): return diff --git a/rasa/core/channels/botframework.py b/rasa/core/channels/botframework.py index 43378645883c..fed9e28c4cba 100644 --- a/rasa/core/channels/botframework.py +++ b/rasa/core/channels/botframework.py @@ -75,6 +75,7 @@ async def _get_headers(self) -> Optional[Dict[Text, Any]]: return BotFramework.headers else: logger.error("Could not get BotFramework token") + return None else: return BotFramework.headers diff --git a/rasa/core/channels/channel.py b/rasa/core/channels/channel.py index fd8cc65ca054..d002fc69e5b1 100644 --- a/rasa/core/channels/channel.py +++ b/rasa/core/channels/channel.py @@ -200,6 +200,8 @@ def decode_bearer_token( except Exception: logger.exception("Failed to decode bearer token.") + return None + class OutputChannel: """Output channel base class. diff --git a/rasa/core/channels/console.py b/rasa/core/channels/console.py index 42eec83dfe2d..86a89630dbf8 100644 --- a/rasa/core/channels/console.py +++ b/rasa/core/channels/console.py @@ -47,9 +47,10 @@ def print_buttons( rasa.shared.utils.cli.print_color( cli_utils.button_to_string(button, idx), color=color ) + return None -def print_bot_output( +def _print_bot_output( message: Dict[Text, Any], is_latest_message: bool = False, color: Text = rasa.shared.utils.io.bcolors.OKBLUE, @@ -90,17 +91,19 @@ def print_bot_output( json.dumps(message.get("custom"), indent=2), color=color ) + return None -def get_user_input(previous_response: Optional[Dict[str, Any]]) -> Optional[Text]: + +def _get_user_input(previous_response: Optional[Dict[str, Any]]) -> Optional[Text]: button_response = None if previous_response is not None: - button_response = print_bot_output(previous_response, is_latest_message=True) + button_response = _print_bot_output(previous_response, is_latest_message=True) if button_response is not None: response = cli_utils.payload_from_button_question(button_response) if response == cli_utils.FREE_TEXT_INPUT_PROMPT: # Re-prompt user with a free text input - response = get_user_input({}) + response = _get_user_input({}) else: response = questionary.text( "", @@ -169,7 +172,7 @@ async def record_messages( previous_response = None await asyncio.sleep(0.5) # Wait for server to start while not utils.is_limit_reached(num_messages, max_message_limit): - text = get_user_input(previous_response) + text = _get_user_input(previous_response) if text == exit_text or text is None: break @@ -181,7 +184,7 @@ async def record_messages( previous_response = None async for response in bot_responses: if previous_response is not None: - print_bot_output(previous_response) + _print_bot_output(previous_response) previous_response = response else: bot_responses = await send_message_receive_block( @@ -190,7 +193,7 @@ async def record_messages( previous_response = None for response in bot_responses: if previous_response is not None: - print_bot_output(previous_response) + _print_bot_output(previous_response) previous_response = response num_messages += 1 diff --git a/rasa/core/channels/hangouts.py b/rasa/core/channels/hangouts.py index c65273c856b8..339f87a8eb49 100644 --- a/rasa/core/channels/hangouts.py +++ b/rasa/core/channels/hangouts.py @@ -63,7 +63,7 @@ def _text_button_card(text: Text, buttons: List) -> Union[Dict, None]: logger.error( "Buttons must be a list of dicts with 'title' and 'payload' as keys" ) - return + return None hangouts_buttons.append( { @@ -246,10 +246,11 @@ def _extract_message(self, req: Request) -> Text: @staticmethod def _extract_room(req: Request) -> Union[Text, None]: - if req.json["space"]["type"] == "ROOM": return req.json["space"]["displayName"] + return None + def _extract_input_channel(self) -> Text: return self.name() diff --git a/rasa/core/channels/slack.py b/rasa/core/channels/slack.py index 0b01c49050da..0f2c99bf656f 100644 --- a/rasa/core/channels/slack.py +++ b/rasa/core/channels/slack.py @@ -308,7 +308,6 @@ def _is_interactive_message(payload: Dict) -> bool: @staticmethod def _get_interactive_response(action: Dict) -> Optional[Text]: """Parse the payload for the response value.""" - if action["type"] == "button": return action.get("value") elif action["type"] == "select": @@ -328,6 +327,8 @@ def _get_interactive_response(action: Dict) -> Optional[Text]: elif action["type"] == "datepicker": return action.get("selected_date") + return None + async def process_message( self, request: Request, diff --git a/rasa/core/channels/socketio.py b/rasa/core/channels/socketio.py index 0a65d2acebbc..52fdf8c0289f 100644 --- a/rasa/core/channels/socketio.py +++ b/rasa/core/channels/socketio.py @@ -170,12 +170,13 @@ def get_output_channel(self) -> Optional["OutputChannel"]: "Please use a different channel for external events in these " "scenarios." ) - return + return None return SocketIOOutput(self.sio, self.bot_message_evt) def blueprint( self, on_new_message: Callable[[UserMessage], Awaitable[Any]] ) -> Blueprint: + """Defines a Sanic blueprint.""" # Workaround so that socketio works with requests from other origins. # https://github.com/miguelgrinberg/python-socketio/issues/205#issuecomment-493769183 sio = AsyncServer(async_mode="sanic", cors_allowed_origins=[]) @@ -191,9 +192,7 @@ async def health(_: Request) -> HTTPResponse: return response.json({"status": "ok"}) @sio.on("connect", namespace=self.namespace) - async def connect( - sid: Text, environ: Dict, auth: Optional[Dict] - ) -> Optional[bool]: + async def connect(sid: Text, environ: Dict, auth: Optional[Dict]) -> bool: if self.jwt_key: jwt_payload = None if auth and auth.get("token"): @@ -203,10 +202,12 @@ async def connect( if jwt_payload: logger.debug(f"User {sid} connected to socketIO endpoint.") + return True else: return False else: logger.debug(f"User {sid} connected to socketIO endpoint.") + return True @sio.on("disconnect", namespace=self.namespace) async def disconnect(sid: Text) -> None: diff --git a/rasa/core/featurizers/tracker_featurizers.py b/rasa/core/featurizers/tracker_featurizers.py index 985ae879684f..49108c84ca03 100644 --- a/rasa/core/featurizers/tracker_featurizers.py +++ b/rasa/core/featurizers/tracker_featurizers.py @@ -560,7 +560,9 @@ def training_states_labels_and_entities( domain: Domain, omit_unset_slots: bool = False, ignore_action_unlikely_intent: bool = False, - ) -> Tuple[List[List[State]], List[List[Text]], List[List[Dict[Text, Any]]]]: + ) -> Tuple[ + List[List[State]], List[List[Optional[Text]]], List[List[Dict[Text, Any]]] + ]: """Transforms trackers to states, action labels, and entity data. Args: diff --git a/rasa/core/lock_store.py b/rasa/core/lock_store.py index 30a2453f8a4c..ac78da3df30f 100644 --- a/rasa/core/lock_store.py +++ b/rasa/core/lock_store.py @@ -259,7 +259,10 @@ def get_lock(self, conversation_id: Text) -> Optional[TicketLock]: if serialised_lock: return TicketLock.from_dict(json.loads(serialised_lock)) + return None + def delete_lock(self, conversation_id: Text) -> None: + """Deletes lock for conversation ID.""" deletion_successful = self.red.delete(self.key_prefix + conversation_id) self._log_deletion(conversation_id, deletion_successful) diff --git a/rasa/core/nlg/callback.py b/rasa/core/nlg/callback.py index 721e8d5bc30f..895c56043fef 100644 --- a/rasa/core/nlg/callback.py +++ b/rasa/core/nlg/callback.py @@ -77,7 +77,7 @@ async def generate( method="post", json=body, timeout=DEFAULT_REQUEST_TIMEOUT ) - if self.validate_response(response): + if isinstance(response, dict) and self.validate_response(response): return response else: raise RasaException("NLG web endpoint returned an invalid response.") diff --git a/rasa/core/policies/form_policy.py b/rasa/core/policies/form_policy.py index 57c9812657e5..817f4500f197 100644 --- a/rasa/core/policies/form_policy.py +++ b/rasa/core/policies/form_policy.py @@ -1,5 +1,5 @@ import logging -from typing import List, Dict, Text, Optional, Any, Union, Tuple +from typing import List, Dict, Text, Optional, Any import rasa.shared.utils.common import rasa.shared.utils.io @@ -12,7 +12,7 @@ ACTIVE_LOOP, LOOP_REJECTED, ) -from rasa.shared.core.domain import State, Domain +from rasa.shared.core.domain import State, Domain, SubStateValue from rasa.shared.core.events import LoopInterrupted from rasa.core.featurizers.tracker_featurizers import TrackerFeaturizer from rasa.shared.nlu.interpreter import NaturalLanguageInterpreter @@ -55,9 +55,7 @@ def __init__( ) @staticmethod - def _get_active_form_name( - state: State, - ) -> Optional[Union[Text, Tuple[Union[float, Text]]]]: + def _get_active_form_name(state: State,) -> Optional[SubStateValue]: return state.get(ACTIVE_LOOP, {}).get(LOOP_NAME) @staticmethod @@ -87,8 +85,8 @@ def _create_lookup_from_states( self, trackers_as_states: List[List[State]], trackers_as_actions: List[List[Text]], - ) -> Dict[Text, Text]: - """Add states to lookup dict""" + ) -> Dict[Text, SubStateValue]: + """Add states to lookup dict.""" lookup = {} for states in trackers_as_states: active_form = self._get_active_form_name(states[-1]) diff --git a/rasa/core/policies/memoization.py b/rasa/core/policies/memoization.py index 197765784321..0e4b1703ce59 100644 --- a/rasa/core/policies/memoization.py +++ b/rasa/core/policies/memoization.py @@ -308,12 +308,12 @@ def _back_to_the_future( # use first action, if we went first time and second action, if we went again idx_to_use = idx_of_second_action if again else idx_of_first_action if idx_to_use is None: - return + return None # make second ActionExecuted the first one events = tracker.applied_events()[idx_to_use:] if not events: - return + return None mcfly_tracker = tracker.init_copy() for e in events: diff --git a/rasa/core/policies/rule_policy.py b/rasa/core/policies/rule_policy.py index 815bcd2f58bc..b0f63a4df8a3 100644 --- a/rasa/core/policies/rule_policy.py +++ b/rasa/core/policies/rule_policy.py @@ -193,7 +193,7 @@ def _create_feature_key(self, states: List[State]) -> Optional[Text]: new_states.insert(0, state) if not new_states: - return + return None # we sort keys to make sure that the same states # represented as dictionaries have the same json strings @@ -421,7 +421,7 @@ def _get_slots_loops_from_states( for states in trackers_as_states: for state in states: slots.update(set(state.get(SLOTS, {}).keys())) - active_loop = state.get(ACTIVE_LOOP, {}).get(LOOP_NAME) + active_loop: Optional[Text] = state.get(ACTIVE_LOOP, {}).get(LOOP_NAME) if active_loop: loops.add(active_loop) return slots, loops @@ -592,7 +592,7 @@ def _run_prediction_on_trackers( trackers: List[TrackerWithCachedStates], domain: Domain, collect_sources: bool, - ) -> Tuple[List[Text], Set[Text]]: + ) -> Tuple[List[Text], Set[Optional[Text]]]: if collect_sources: self._rules_sources = defaultdict(list) @@ -665,7 +665,7 @@ def _collect_rule_sources( def _find_contradicting_and_used_in_stories_rules( self, trackers: List[TrackerWithCachedStates], domain: Domain - ) -> Tuple[List[Text], Set[Text]]: + ) -> Tuple[List[Text], Set[Optional[Text]]]: return self._run_prediction_on_trackers(trackers, domain, collect_sources=False) def _analyze_rules( @@ -1080,7 +1080,7 @@ def predict_action_probabilities( def _predict( self, tracker: DialogueStateTracker, domain: Domain - ) -> Tuple[PolicyPrediction, Text]: + ) -> Tuple[PolicyPrediction, Optional[Text]]: ( rules_action_name_from_text, prediction_source_from_text, diff --git a/rasa/core/policies/sklearn_policy.py b/rasa/core/policies/sklearn_policy.py index 01736258de20..cf110177e441 100644 --- a/rasa/core/policies/sklearn_policy.py +++ b/rasa/core/policies/sklearn_policy.py @@ -347,7 +347,7 @@ def load( f"Failed to load dialogue model. Path {filename.absolute()} " f"doesn't exist." ) - return + return cls() featurizer = TrackerFeaturizer.load(path) assert isinstance(featurizer, MaxHistoryTrackerFeaturizer), ( diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py index 5c824db14e49..e596e02c11d6 100644 --- a/rasa/core/policies/ted_policy.py +++ b/rasa/core/policies/ted_policy.py @@ -462,7 +462,7 @@ def _create_data_for_entities( self, entity_tags: Optional[List[List[Dict[Text, List["Features"]]]]] ) -> Optional[Data]: if not self.config[ENTITY_RECOGNITION]: - return + return None # check that there are real entity tags if entity_tags and self._should_extract_entities(entity_tags): @@ -476,6 +476,8 @@ def _create_data_for_entities( ) self.config[ENTITY_RECOGNITION] = False + return None + def _create_model_data( self, tracker_state_features: List[List[Dict[Text, List["Features"]]]], @@ -821,11 +823,11 @@ def _create_optional_event_for_entities( # entities belong only to the last user message # and only if user text was used for prediction, # a user message always comes after action listen - return + return None if not self.config[ENTITY_RECOGNITION]: # entity recognition is not turned on, no entities can be predicted - return + return None # The batch dimension of entity prediction is not the same as batch size, # rather it is the number of last (if max history featurizer else all) @@ -841,7 +843,7 @@ def _create_optional_event_for_entities( if ENTITY_ATTRIBUTE_TYPE not in predicted_tags: # no entities detected - return + return None # entities belong to the last message of the tracker # convert the predicted tags to actual entities diff --git a/rasa/core/test.py b/rasa/core/test.py index b7b292a2128c..0981c9fd27a3 100644 --- a/rasa/core/test.py +++ b/rasa/core/test.py @@ -280,14 +280,14 @@ def serialise(self) -> Tuple[PredictionList, PredictionList]: filter( lambda x: x.get(ENTITY_ATTRIBUTE_TEXT) == text, self.entity_targets ), - key=lambda x: x.get(ENTITY_ATTRIBUTE_START), + key=lambda x: x[ENTITY_ATTRIBUTE_START], ) entity_predictions = sorted( filter( lambda x: x.get(ENTITY_ATTRIBUTE_TEXT) == text, self.entity_predictions, ), - key=lambda x: x.get(ENTITY_ATTRIBUTE_START), + key=lambda x: x[ENTITY_ATTRIBUTE_START], ) i_pred, i_target = 0, 0 @@ -447,7 +447,7 @@ def _clean_entity_results( cleaned_entities = [] for r in tuple(entity_results): - cleaned_entity = {ENTITY_ATTRIBUTE_TEXT: text} + cleaned_entity: EntityPrediction = {ENTITY_ATTRIBUTE_TEXT: text} for k in ( ENTITY_ATTRIBUTE_START, ENTITY_ATTRIBUTE_END, @@ -596,13 +596,14 @@ def _get_e2e_entity_evaluation_result( return EntityEvaluationResult( entity_targets, entities_predicted_by_policies, tokens, text ) + return None def _run_action_prediction( processor: "MessageProcessor", partial_tracker: DialogueStateTracker, expected_action: Text, -) -> Tuple[Text, PolicyPrediction, EntityEvaluationResult]: +) -> Tuple[Text, PolicyPrediction, Optional[EntityEvaluationResult]]: action, prediction = processor.predict_next_action(partial_tracker) predicted_action = action.name() diff --git a/rasa/core/training/interactive.py b/rasa/core/training/interactive.py index bfe0480f1a7d..a55a3fecf20e 100644 --- a/rasa/core/training/interactive.py +++ b/rasa/core/training/interactive.py @@ -15,6 +15,7 @@ Tuple, Union, Set, + cast, ) from sanic import Sanic, response @@ -90,6 +91,8 @@ # still works. import rasa.utils.io as io_utils +from rasa.shared.core.generator import TrackerWithCachedStates + logger = logging.getLogger(__name__) PATHS = { @@ -148,9 +151,8 @@ async def send_message( conversation_id: Text, message: Text, parse_data: Optional[Dict[Text, Any]] = None, -) -> Dict[Text, Any]: +) -> Optional[Any]: """Send a user message to a conversation.""" - payload = { "sender": UserUttered.type_name, "text": message, @@ -166,25 +168,22 @@ async def send_message( async def request_prediction( endpoint: EndpointConfig, conversation_id: Text -) -> Dict[Text, Any]: +) -> Optional[Any]: """Request the next action prediction from core.""" - return await endpoint.request( method="post", subpath=f"/conversations/{conversation_id}/predict" ) -async def retrieve_domain(endpoint: EndpointConfig) -> Dict[Text, Any]: +async def retrieve_domain(endpoint: EndpointConfig) -> Optional[Any]: """Retrieve the domain from core.""" - return await endpoint.request( method="get", subpath="/domain", headers={"Accept": "application/json"} ) -async def retrieve_status(endpoint: EndpointConfig) -> Dict[Text, Any]: +async def retrieve_status(endpoint: EndpointConfig) -> Optional[Any]: """Retrieve the status from core.""" - return await endpoint.request(method="get", subpath="/status") @@ -194,12 +193,15 @@ async def retrieve_tracker( verbosity: EventVerbosity = EventVerbosity.ALL, ) -> Dict[Text, Any]: """Retrieve a tracker from core.""" - path = f"/conversations/{conversation_id}/tracker?include_events={verbosity.name}" - return await endpoint.request( + result = await endpoint.request( method="get", subpath=path, headers={"Accept": "application/json"} ) + # If the request wasn't successful the previous call had already raised. Hence, + # we can be sure we have the tracker in the right format. + return cast(Dict[Text, Any], result) + async def send_action( endpoint: EndpointConfig, @@ -208,9 +210,8 @@ async def send_action( policy: Optional[Text] = None, confidence: Optional[float] = None, is_new_action: bool = False, -) -> Dict[Text, Any]: +) -> Optional[Any]: """Log an action to a conversation.""" - payload = ActionExecuted(action_name, policy, confidence).as_dict() subpath = f"/conversations/{conversation_id}/execute" @@ -252,9 +253,8 @@ async def send_event( endpoint: EndpointConfig, conversation_id: Text, evt: Union[List[Dict[Text, Any]], Dict[Text, Any]], -) -> Dict[Text, Any]: +) -> Optional[Any]: """Log an event to a conversation.""" - subpath = f"/conversations/{conversation_id}/tracker/events" return await endpoint.request(json=evt, method="post", subpath=subpath) @@ -262,7 +262,6 @@ async def send_event( def format_bot_output(message: BotUttered) -> Text: """Format a bot response to be displayed in the history table.""" - # First, add text to output output = message.text or "" @@ -301,7 +300,6 @@ def format_bot_output(message: BotUttered) -> Text: def latest_user_message(events: List[Dict[Text, Any]]) -> Optional[Dict[Text, Any]]: """Return most recent user message.""" - for i, e in enumerate(reversed(events)): if e.get("event") == UserUttered.type_name: return e @@ -374,7 +372,6 @@ async def _request_free_text_action( async def _request_free_text_utterance( conversation_id: Text, endpoint: EndpointConfig, action: Text ) -> Text: - question = questionary.text( message=(f"Please type the message for your new bot response '{action}':"), validate=io_utils.not_empty_validator("Please enter a response"), @@ -650,16 +647,16 @@ async def _ask_if_quit(conversation_id: Text, endpoint: EndpointConfig) -> bool: # this is also the default answer if the user presses Ctrl-C await _write_data_to_file(conversation_id, endpoint) raise Abort() - elif answer == "continue": - # in this case we will just return, and the original - # question will get asked again - return True elif answer == "undo": raise UndoLastStep() elif answer == "fork": raise ForkTracker() elif answer == "restart": raise RestartConversation() + else: # `continue` or no answer + # in this case we will just return, and the original + # question will get asked again + return True async def _request_action_from_user( @@ -1584,7 +1581,7 @@ async def _get_tracker_events_to_plot( async def _get_training_trackers( file_importer: TrainingDataImporter, domain: Dict[str, Any] -) -> List[DialogueStateTracker]: +) -> List[TrackerWithCachedStates]: from rasa.core import training return await training.load_data( diff --git a/rasa/core/utils.py b/rasa/core/utils.py index 2156b5ec53c1..3b94f849a20c 100644 --- a/rasa/core/utils.py +++ b/rasa/core/utils.py @@ -88,10 +88,8 @@ def dump_obj_as_yaml_to_file( ) -def list_routes(app: Sanic) -> Text: - """List all the routes of a sanic application. - - Mainly used for debugging.""" +def list_routes(app: Sanic) -> Dict[Text, Text]: + """List all the routes of a sanic application. Mainly used for debugging.""" from urllib.parse import unquote output = {} diff --git a/rasa/model.py b/rasa/model.py index 78be4034060c..456dca7da184 100644 --- a/rasa/model.py +++ b/rasa/model.py @@ -8,7 +8,16 @@ import tempfile import typing from pathlib import Path -from typing import Any, Text, Tuple, Union, Optional, List, Dict, NamedTuple +from typing import ( + Any, + Text, + Tuple, + Union, + Optional, + List, + Dict, + NamedTuple, +) from packaging import version @@ -621,7 +630,7 @@ async def update_model_with_new_domain( def get_model_for_finetuning( previous_model_file: Optional[Union[Path, Text]] -) -> Optional[Text]: +) -> Optional[Union[Path, Text]]: """Gets validated path for model to finetune. Args: diff --git a/rasa/model_training.py b/rasa/model_training.py index 39b7538e779b..3acabed1089f 100644 --- a/rasa/model_training.py +++ b/rasa/model_training.py @@ -482,7 +482,7 @@ async def train_core_async( "No stories given. Please provide stories in order to " "train a Rasa Core model using the '--stories' argument." ) - return + return None return await _train_core_with_validated_data( file_importer, @@ -570,7 +570,7 @@ async def _train_core_with_validated_data( model_prefix="core-", ) - return _train_path + return _train_path async def _core_model_for_finetuning( @@ -677,7 +677,7 @@ async def train_nlu_async( "No NLU data given. Please provide NLU data in order to train " "a Rasa NLU model using the '--nlu' argument." ) - return + return None # training NLU only hence the training files still have to be selected file_importer = TrainingDataImporter.load_nlu_importer_from_config( @@ -691,7 +691,7 @@ async def train_nlu_async( f"Please verify the data format. " f"The NLU model training will be skipped now." ) - return + return None return await _train_nlu_with_validated_data( file_importer, @@ -780,7 +780,7 @@ async def _train_nlu_with_validated_data( model_prefix="nlu-", ) - return _train_path + return _train_path async def _nlu_model_for_finetuning( @@ -812,11 +812,11 @@ async def _nlu_model_for_finetuning( ) config = await file_importer.get_config() - model_to_finetune = Interpreter.load( + loaded_model_to_finetune = Interpreter.load( old_nlu, new_config=config, finetuning_epoch_fraction=finetuning_epoch_fraction, ) - if not model_to_finetune: + if not loaded_model_to_finetune: return None - return model_to_finetune + return loaded_model_to_finetune diff --git a/rasa/nlu/classifiers/diet_classifier.py b/rasa/nlu/classifiers/diet_classifier.py index 0ce0a86552ca..ffbf38bffbcc 100644 --- a/rasa/nlu/classifiers/diet_classifier.py +++ b/rasa/nlu/classifiers/diet_classifier.py @@ -8,7 +8,7 @@ import scipy.sparse import tensorflow as tf -from typing import Any, Dict, List, Optional, Text, Tuple, Union, Type +from typing import Any, Dict, List, Optional, Text, Tuple, TypeVar, Union, Type import rasa.shared.utils.io import rasa.utils.io as io_utils @@ -1046,16 +1046,18 @@ def persist(self, file_name: Text, model_dir: Text) -> Dict[Text, Any]: return {"file": file_name} + T = TypeVar("T") + @classmethod def load( - cls, + cls: T, meta: Dict[Text, Any], model_dir: Text, model_metadata: Metadata = None, cached_component: Optional["DIETClassifier"] = None, should_finetune: bool = False, **kwargs: Any, - ) -> "DIETClassifier": + ) -> T: """Loads the trained model from the provided directory.""" if not meta.get("file"): logger.debug( diff --git a/rasa/nlu/components.py b/rasa/nlu/components.py index 89e37394119e..79efb2136f3f 100644 --- a/rasa/nlu/components.py +++ b/rasa/nlu/components.py @@ -477,7 +477,8 @@ def name(self) -> Text: will be a proper pipeline definition where `ComponentA` is the name of the first component of the pipeline. """ - return type(self).name + # cast due to https://github.com/python/mypy/issues/7945 + return typing.cast(str, type(self).name) @property def unique_name(self) -> Text: @@ -822,9 +823,6 @@ def can_handle_language(cls, language: Hashable) -> bool: return language not in not_supported_language_list -C = typing.TypeVar("C", bound=Component) - - class ComponentBuilder: """Creates trainers and interpreters based on configurations. @@ -875,7 +873,7 @@ def load_component( model_dir: Text, model_metadata: "Metadata", **context: Any, - ) -> Component: + ) -> Optional[Component]: """Loads a component. Tries to retrieve a component from the cache, else calls @@ -892,7 +890,6 @@ def load_component( Returns: The loaded component. """ - from rasa.nlu import registry try: @@ -928,7 +925,6 @@ def create_component( Returns: The created component. """ - from rasa.nlu import registry from rasa.nlu.model import Metadata @@ -945,12 +941,3 @@ def create_component( f"Failed to create component '{component_config['name']}'. " f"Error: {e}" ) - - def create_component_from_class(self, component_class: Type[C], **cfg: Any) -> C: - """Create a component based on a class and a configuration. - - Mainly used to make use of caching when instantiating component classes.""" - - component_config = {"name": component_class.name} - - return self.create_component(component_config, RasaNLUModelConfig(cfg)) diff --git a/rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py b/rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py index b100e926fb87..d09d040903fd 100644 --- a/rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py +++ b/rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py @@ -3,7 +3,16 @@ from pathlib import Path import numpy as np -from typing import Any, Dict, Optional, Text, List, Type, Union, Callable +from typing import ( + Any, + Dict, + Optional, + Text, + List, + Type, + Union, + Callable, +) from rasa.nlu.tokenizers.spacy_tokenizer import POS_TAG_KEY from rasa.shared.constants import DOCS_URL_COMPONENTS @@ -145,9 +154,7 @@ def _map_features_to_indices( return feature_to_idx_dict @staticmethod - def _build_feature_vocabulary( - features: List[List[Dict[Text, Any]]] - ) -> Dict[Text, List[Text]]: + def _build_feature_vocabulary(features: List[List[Dict[Text, Any]]]) -> OrderedDict: feature_vocabulary = defaultdict(set) for sentence_features in features: @@ -156,13 +163,12 @@ def _build_feature_vocabulary( feature_vocabulary[feature_name].add(feature_value) # sort items to ensure same order every time (for tests) - feature_vocabulary = OrderedDict(sorted(feature_vocabulary.items())) + ordered_feature_vocabulary = OrderedDict(sorted(feature_vocabulary.items())) - return feature_vocabulary + return ordered_feature_vocabulary def _create_sparse_features(self, message: Message) -> None: - """Convert incoming messages into sparse features using the configured - features.""" + """Convert incoming messages into sparse features.""" import scipy.sparse tokens = message.get(TOKENS_NAMES[TEXT]) @@ -254,7 +260,7 @@ def _get_feature_value( token_idx: int, pointer_position: int, token_length: int, - ) -> Union[bool, int, Text]: + ) -> Union[bool, int, Text, None]: if feature == END_OF_SENTENCE: return token_idx + pointer_position == token_length - 1 @@ -273,6 +279,7 @@ def _get_feature_value( f"Invalid value '{value}' for feature '{feature}'." f" Feature is ignored." ) + return value @classmethod diff --git a/rasa/nlu/registry.py b/rasa/nlu/registry.py index 4f0891ddda9f..472b0c61f8f2 100644 --- a/rasa/nlu/registry.py +++ b/rasa/nlu/registry.py @@ -7,8 +7,9 @@ import logging import traceback import typing -from typing import Any, Dict, Optional, Text, Type +from typing import Any, Dict, Optional, Text, Type, List +from rasa.nlu.components import Component from rasa.nlu.classifiers.diet_classifier import DIETClassifier from rasa.nlu.classifiers.fallback_classifier import FallbackClassifier from rasa.nlu.classifiers.keyword_intent_classifier import KeywordIntentClassifier @@ -49,7 +50,6 @@ from rasa.shared.constants import DOCS_URL_COMPONENTS if typing.TYPE_CHECKING: - from rasa.nlu.components import Component from rasa.nlu.config import RasaNLUModelConfig logger = logging.getLogger(__name__) @@ -57,7 +57,7 @@ # Classes of all known components. If a new component should be added, # its class name should be listed here. -component_classes = [ +component_classes: List[Type[Component]] = [ # utils SpacyNLP, MitieNLP, @@ -95,7 +95,9 @@ ] # Mapping from a components name to its class to allow name based lookup. -registered_components = {c.name: c for c in component_classes} +registered_components: Dict[Text, Type[Component]] = { + c.name: c for c in component_classes +} class ComponentNotFoundException(ModuleNotFoundError, RasaException): @@ -181,12 +183,11 @@ def load_component_by_meta( def create_component_by_config( component_config: Dict[Text, Any], config: "RasaNLUModelConfig" -) -> Optional["Component"]: +) -> "Component": """Resolves a component and calls it's create method. Inits it based on a previously persisted model. """ - # try to get class name first, else create by name component_name = component_config.get("class", component_config["name"]) component_class = get_component_class(component_name) diff --git a/rasa/nlu/selectors/response_selector.py b/rasa/nlu/selectors/response_selector.py index a01b2163465b..bfa465929375 100644 --- a/rasa/nlu/selectors/response_selector.py +++ b/rasa/nlu/selectors/response_selector.py @@ -630,8 +630,7 @@ def load( **kwargs: Any, ) -> "ResponseSelector": """Loads the trained model from the provided directory.""" - - model = super().load( + model: ResponseSelector = super().load( meta, model_dir, model_metadata, cached_component, **kwargs ) if not meta.get("file"): diff --git a/rasa/nlu/test.py b/rasa/nlu/test.py index 56dd3dacf3e2..6931d47ea55a 100644 --- a/rasa/nlu/test.py +++ b/rasa/nlu/test.py @@ -49,7 +49,6 @@ NO_ENTITY_TAG, INTENT_NAME_KEY, PREDICTED_CONFIDENCE_KEY, - ENTITY_ATTRIBUTE_TEXT, ) from rasa.model import get_model from rasa.nlu.components import ComponentBuilder @@ -67,7 +66,7 @@ EntityPrediction = TypedDict( "EntityPrediction", { - ENTITY_ATTRIBUTE_TEXT: Text, + "text": Text, "entities": List[Dict[Text, Any]], "predicted_entities": List[Dict[Text, Any]], }, @@ -759,13 +758,12 @@ def collect_incorrect_entity_predictions( for entity_result in entity_results: for i in range(offset, offset + len(entity_result.tokens)): if merged_targets[i] != merged_predictions[i]: - errors.append( - { - "text": entity_result.message, - "entities": entity_result.entity_targets, - "predicted_entities": entity_result.entity_predictions, - } - ) + prediction: EntityPrediction = { + "text": entity_result.message, + "entities": entity_result.entity_targets, + "predicted_entities": entity_result.entity_predictions, + } + errors.append(prediction) break offset += len(entity_result.tokens) return errors @@ -821,13 +819,12 @@ def collect_successful_entity_predictions( merged_targets[i] == merged_predictions[i] and merged_targets[i] != NO_ENTITY ): - successes.append( - { - "text": entity_result.message, - "entities": entity_result.entity_targets, - "predicted_entities": entity_result.entity_predictions, - } - ) + prediction: EntityPrediction = { + "text": entity_result.message, + "entities": entity_result.entity_targets, + "predicted_entities": entity_result.entity_predictions, + } + successes.append(prediction) break offset += len(entity_result.tokens) return successes @@ -1381,12 +1378,13 @@ def is_response_selector_present(interpreter: Interpreter) -> bool: return response_selectors != [] -def get_available_response_selector_types(interpreter: Interpreter) -> List[Text]: +def get_available_response_selector_types( + interpreter: Interpreter, +) -> List[Optional[Text]]: """Gets all available response selector types.""" - from rasa.nlu.selectors.response_selector import ResponseSelector - response_selector_types = [ + response_selector_types: List[Optional[Text]] = [ c.retrieval_intent for c in interpreter.pipeline if isinstance(c, ResponseSelector) diff --git a/rasa/nlu/train.py b/rasa/nlu/train.py index e9273cda4c32..82d0bd72545d 100644 --- a/rasa/nlu/train.py +++ b/rasa/nlu/train.py @@ -59,7 +59,11 @@ async def load_data_from_endpoint( return training_data except Exception as e: - logger.warning(f"Could not retrieve training data from URL:\n{e}") + logger.warning( + f"Could not retrieve training data from URL. Using empty " + f"training data instead. Error details:\n{e}" + ) + return TrainingData() def create_persistor(persistor: Optional[Text]) -> Optional["Persistor"]: diff --git a/rasa/nlu/utils/hugging_face/transformers_pre_post_processors.py b/rasa/nlu/utils/hugging_face/transformers_pre_post_processors.py index 62ab42bffd76..a716ebb78658 100644 --- a/rasa/nlu/utils/hugging_face/transformers_pre_post_processors.py +++ b/rasa/nlu/utils/hugging_face/transformers_pre_post_processors.py @@ -22,6 +22,8 @@ def cleanup_tokens( # remove empty strings token_ids_string = [(id, string) for id, string in token_ids_string if string] + token_ids: List[int] + token_strings: List[Text] # return as individual token ids and token strings token_ids, token_strings = zip(*token_ids_string) return token_ids, token_strings diff --git a/rasa/server.py b/rasa/server.py index e41eea908f05..c3c6d4331090 100644 --- a/rasa/server.py +++ b/rasa/server.py @@ -33,6 +33,7 @@ import rasa import rasa.core.utils +from rasa.nlu.emulators.emulator import Emulator import rasa.utils.common import rasa.shared.utils.common import rasa.shared.utils.io @@ -78,9 +79,12 @@ if TYPE_CHECKING: from ssl import SSLContext # noqa: F401 from rasa.core.processor import MessageProcessor - from mypy_extensions import VarArg, KwArg + from mypy_extensions import Arg, VarArg, KwArg - SanicView = Callable[[Request, VarArg(), KwArg()], response.BaseHTTPResponse] + SanicView = Callable[ + [Arg(Request, "request"), VarArg(), KwArg()], # noqa: F821 + response.BaseHTTPResponse, + ] logger = logging.getLogger(__name__) @@ -165,10 +169,10 @@ def decorated(*args: Any, **kwargs: Any) -> Any: return decorator -def ensure_conversation_exists() -> "SanicView": +def ensure_conversation_exists() -> Callable[["SanicView"], "SanicView"]: """Wraps a request handler ensuring the conversation exists.""" - def decorator(f: "SanicView") -> HTTPResponse: + def decorator(f: "SanicView") -> "SanicView": @wraps(f) def decorated(request: Request, *args: Any, **kwargs: Any) -> HTTPResponse: conversation_id = kwargs["conversation_id"] @@ -440,10 +444,11 @@ def create_ssl_context( return None -def _create_emulator(mode: Optional[Text]) -> NoEmulator: +def _create_emulator(mode: Optional[Text]) -> Emulator: """Create emulator for specified mode. - If no emulator is specified, we will use the Rasa NLU format.""" + If no emulator is specified, we will use the Rasa NLU format. + """ if mode is None: return NoEmulator() elif mode.lower() == "wit": diff --git a/rasa/shared/core/domain.py b/rasa/shared/core/domain.py index 33c58183736d..3ebb96d463d5 100644 --- a/rasa/shared/core/domain.py +++ b/rasa/shared/core/domain.py @@ -80,7 +80,8 @@ # State is a dictionary with keys (USER, PREVIOUS_ACTION, SLOTS, ACTIVE_LOOP) # representing the origin of a SubState; # the values are SubStates, that contain the information needed for featurization -SubState = Dict[Text, Union[Text, Tuple[Union[float, Text]]]] +SubStateValue = Union[Text, Tuple[Union[float, Text]]] +SubState = Dict[Text, SubStateValue] State = Dict[Text, SubState] logger = logging.getLogger(__name__) @@ -966,7 +967,7 @@ def _get_featurized_entities(self, latest_message: UserUttered) -> Set[Text]: def _get_user_sub_state( self, tracker: "DialogueStateTracker" - ) -> Dict[Text, Union[Text, Tuple[Text]]]: + ) -> Dict[Text, Union[None, Text, List[Optional[Text]], Tuple[str, ...]]]: """Turns latest UserUttered event into a substate. The substate will contain intent, text, and entities (if any are present). @@ -982,7 +983,9 @@ def _get_user_sub_state( if not latest_message or latest_message.is_empty(): return {} - sub_state = latest_message.as_sub_state() + sub_state: Dict[ + Text, Union[None, Text, List[Optional[Text]], Tuple[str, ...]] + ] = latest_message.as_sub_state() # Filter entities based on intent config. We need to convert the set into a # tuple because sub_state will be later transformed into a frozenset (so it can @@ -1017,7 +1020,7 @@ def _get_slots_sub_state( Returns: a dictionary mapping slot names to their featurization """ - slots = {} + slots: Dict[Text, Union[Text, Tuple[float]]] = {} for slot_name, slot in tracker.slots.items(): if slot is not None and slot.as_feature(): if omit_unset_slots and not slot.has_been_set: @@ -1033,8 +1036,9 @@ def _get_slots_sub_state( @staticmethod def _get_prev_action_sub_state( tracker: "DialogueStateTracker", - ) -> Dict[Text, Text]: + ) -> Optional[Dict[Text, Text]]: """Turn the previous taken action into a state name. + Args: tracker: dialog state tracker containing the dialog so far Returns: @@ -1315,7 +1319,9 @@ def get_responses_with_multilines( return final_responses - def _transform_intents_for_file(self) -> List[Union[Text, Dict[Text, Any]]]: + def _transform_intents_for_file( + self, + ) -> List[Dict[Text, Dict[Text, Union[bool, List[Text]]]]]: """Transform intent properties for displaying or writing into a domain file. Internally, there is a property `used_entities` that lists all entities to be @@ -1361,7 +1367,7 @@ def _transform_entities_for_file(self) -> List[Union[Text, Dict[Text, Any]]]: Returns: The entity properties as they are used in domain files. """ - entities_for_file = [] + entities_for_file: List[Union[Text, Dict[Text, Any]]] = [] for entity in self.entities: if entity in self.roles and entity in self.groups: diff --git a/rasa/shared/core/events.py b/rasa/shared/core/events.py index a4b5bffe5097..7d5213e3ff9b 100644 --- a/rasa/shared/core/events.py +++ b/rasa/shared/core/events.py @@ -544,7 +544,7 @@ def as_dict(self) -> Dict[Text, Any]: ) return _dict - def as_sub_state(self) -> Dict[Text, Union[None, Text, List[Optional[Text]]]]: + def as_sub_state(self,) -> Dict[Text, Union[None, Text, List[Optional[Text]]]]: """Turns a UserUttered event into features. The substate contains information about entities, intent and text of the @@ -571,7 +571,7 @@ def as_sub_state(self) -> Dict[Text, Union[None, Text, List[Optional[Text]]]]: if ENTITY_ATTRIBUTE_GROUP in entity ) - out = {} + out: Dict[Text, Union[None, Text, List[Optional[Text]]]] = {} # During training we expect either intent_name or text to be set. # During prediction both will be set. if self.text and ( @@ -959,7 +959,9 @@ def as_story_string(self) -> Text: return f"{self.type_name}{props}" @classmethod - def _from_story_string(cls, parameters: Dict[Text, Any]) -> Optional[List[Event]]: + def _from_story_string( + cls, parameters: Dict[Text, Any] + ) -> Optional[List["SlotSet"]]: slots = [] for slot_key, slot_val in parameters.items(): @@ -1526,7 +1528,7 @@ def __repr__(self) -> Text: self.action_name, self.policy, self.confidence ) - def __str__(self) -> Text: + def __str__(self) -> Optional[Text]: """Returns event as human readable string.""" return self.action_name or self.action_text @@ -1541,7 +1543,7 @@ def __eq__(self, other: Any) -> bool: return self.__members__() == other.__members__() - def as_story_string(self) -> Text: + def as_story_string(self) -> Optional[Text]: """Returns event in Markdown format.""" if self.action_text: raise UnsupportedFeatureException( diff --git a/rasa/shared/core/generator.py b/rasa/shared/core/generator.py index 1d6f8e2e8a35..66c99e928778 100644 --- a/rasa/shared/core/generator.py +++ b/rasa/shared/core/generator.py @@ -103,13 +103,14 @@ def past_states_for_hashing( # if don't have it cached, we use the domain to calculate the states # from the events - if self._states_for_hashing is None: + states_for_hashing = self._states_for_hashing + if states_for_hashing is None: states = super().past_states(domain, omit_unset_slots=omit_unset_slots) - self._states_for_hashing = deque( - self.freeze_current_state(s) for s in states - ) + states_for_hashing = deque(self.freeze_current_state(s) for s in states) + + self._states_for_hashing = states_for_hashing - return self._states_for_hashing + return states_for_hashing @staticmethod def _unfreeze_states(frozen_states: Deque[FrozenState]) -> List[State]: @@ -218,7 +219,7 @@ def update(self, event: Event, skip_states: bool = False) -> None: # define types -TrackerLookupDict = Dict[Optional[Text], List[TrackerWithCachedStates]] +TrackerLookupDict = Dict[Text, List[TrackerWithCachedStates]] TrackersTuple = Tuple[List[TrackerWithCachedStates], List[TrackerWithCachedStates]] diff --git a/rasa/shared/core/trackers.py b/rasa/shared/core/trackers.py index 4f9ab8c975a0..999ea0e314ce 100644 --- a/rasa/shared/core/trackers.py +++ b/rasa/shared/core/trackers.py @@ -878,7 +878,9 @@ def latest_action_name(self) -> Optional[Text]: ) -def get_active_loop_name(state: State) -> Optional[Text]: +def get_active_loop_name( + state: State, +) -> Optional[Union[Text, Tuple[Union[float, Text]]]]: """Get the name of current active loop. Args: @@ -891,7 +893,7 @@ def get_active_loop_name(state: State) -> Optional[Text]: not state.get(ACTIVE_LOOP) or state[ACTIVE_LOOP].get(LOOP_NAME) == SHOULD_NOT_BE_SET ): - return + return None return state[ACTIVE_LOOP].get(LOOP_NAME) diff --git a/rasa/shared/core/training_data/story_reader/yaml_story_reader.py b/rasa/shared/core/training_data/story_reader/yaml_story_reader.py index 609273203a5e..51cbf5532681 100644 --- a/rasa/shared/core/training_data/story_reader/yaml_story_reader.py +++ b/rasa/shared/core/training_data/story_reader/yaml_story_reader.py @@ -437,7 +437,7 @@ def _parse_raw_user_utterance(self, step: Dict[Text, Any]) -> Optional[UserUtter @staticmethod def _parse_raw_entities( raw_entities: Union[List[Dict[Text, Text]], List[Text]] - ) -> List[Dict[Text, Text]]: + ) -> List[Dict[Text, Optional[Text]]]: final_entities = [] for entity in raw_entities: if isinstance(entity, dict): diff --git a/rasa/shared/core/training_data/story_writer/yaml_story_writer.py b/rasa/shared/core/training_data/story_writer/yaml_story_writer.py index 542d1e5336b3..96b0269141c3 100644 --- a/rasa/shared/core/training_data/story_writer/yaml_story_writer.py +++ b/rasa/shared/core/training_data/story_writer/yaml_story_writer.py @@ -1,6 +1,13 @@ from collections import OrderedDict from pathlib import Path -from typing import Any, Dict, List, Text, Union, Optional +from typing import ( + Any, + Dict, + List, + Text, + Union, + Optional, +) from ruamel import yaml from ruamel.yaml.comments import CommentedMap @@ -287,14 +294,14 @@ def process_action(action: ActionExecuted) -> Optional[OrderedDict]: return result @staticmethod - def process_slot(event: SlotSet) -> Dict[Text, List[Dict]]: + def process_slot(event: SlotSet) -> OrderedDict: """Converts a single `SlotSet` event into an ordered dict. Args: event: Original `SlotSet` event. Returns: - Dict with an `SlotSet` event. + OrderedDict with an `SlotSet` event. """ return OrderedDict([(KEY_SLOT_NAME, [{event.key: event.value}])]) diff --git a/rasa/shared/core/training_data/structures.py b/rasa/shared/core/training_data/structures.py index 3ea111e666cf..8216029d8f9f 100644 --- a/rasa/shared/core/training_data/structures.py +++ b/rasa/shared/core/training_data/structures.py @@ -254,7 +254,7 @@ def _add_action_listen(self, events: List[Event]) -> None: def explicit_events( self, domain: Domain, should_append_final_listen: bool = True - ) -> List[Union[Event, List[Event]]]: + ) -> List[Event]: """Returns events contained in the story step including implicit events. Not all events are always listed in the story dsl. This @@ -262,8 +262,7 @@ def explicit_events( set slots. This functions makes these events explicit and returns them with the rest of the steps events. """ - - events = [] + events: List[Event] = [] for e in self.events: if isinstance(e, UserUttered): @@ -473,13 +472,11 @@ def merge(self, other: Optional["StoryGraph"]) -> "StoryGraph": def overlapping_checkpoint_names( cps: List[Checkpoint], other_cps: List[Checkpoint] ) -> Set[Text]: - """Find overlapping checkpoints names""" - + """Find overlapping checkpoints names.""" return {cp.name for cp in cps} & {cp.name for cp in other_cps} def with_cycles_removed(self) -> "StoryGraph": """Create a graph with the cyclic edges removed from this graph.""" - story_end_checkpoints = self.story_end_checkpoints.copy() cyclic_edge_ids = self.cyclic_edge_ids # we need to remove the start steps and replace them with steps ending @@ -638,9 +635,8 @@ def _is_checkpoint_in_list( @staticmethod def _find_unused_checkpoints( story_steps: ValuesView[StoryStep], story_end_checkpoints: Dict[Text, Text] - ) -> Set[Text]: + ) -> Set[Optional[Text]]: """Finds all unused checkpoints.""" - collected_start = {STORY_END, STORY_START} collected_end = {STORY_END, STORY_START} diff --git a/rasa/shared/importers/importer.py b/rasa/shared/importers/importer.py index 513fee53541b..1f45c2ba84a5 100644 --- a/rasa/shared/importers/importer.py +++ b/rasa/shared/importers/importer.py @@ -522,10 +522,10 @@ async def _additional_training_data_from_stories(self) -> TrainingData: # model has to be retrained due to changes in the event order within # the stories. sorted_utterances = sorted( - utterances, key=lambda user: user.intent_name or user.text + utterances, key=lambda user: user.intent_name or user.text or "" ) sorted_actions = sorted( - actions, key=lambda action: action.action_name or action.action_text + actions, key=lambda action: action.action_name or action.action_text or "" ) additional_messages_from_stories = [ diff --git a/rasa/shared/nlu/training_data/formats/rasa_yaml.py b/rasa/shared/nlu/training_data/formats/rasa_yaml.py index bb6e1dc9a70e..adaf46d43c2f 100644 --- a/rasa/shared/nlu/training_data/formats/rasa_yaml.py +++ b/rasa/shared/nlu/training_data/formats/rasa_yaml.py @@ -436,7 +436,10 @@ def process_synonyms(cls, training_data: "TrainingData") -> List[OrderedDict]: inverted_synonyms[synonym].append(example) return cls.process_training_examples_by_key( - inverted_synonyms, KEY_SYNONYM, KEY_SYNONYM_EXAMPLES + inverted_synonyms, + KEY_SYNONYM, + KEY_SYNONYM_EXAMPLES, + example_extraction_predicate=lambda x: str(x), ) @classmethod @@ -448,12 +451,23 @@ def process_regexes(cls, training_data: "TrainingData") -> List[OrderedDict]: inverted_regexes[regex["name"]].append(regex["pattern"]) return cls.process_training_examples_by_key( - inverted_regexes, KEY_REGEX, KEY_REGEX_EXAMPLES + inverted_regexes, + KEY_REGEX, + KEY_REGEX_EXAMPLES, + example_extraction_predicate=lambda x: str(x), ) @classmethod def process_lookup_tables(cls, training_data: "TrainingData") -> List[OrderedDict]: - prepared_lookup_tables = OrderedDict() + """Serializes the look up tables. + + Args: + training_data: The training data object with potential look up tables. + + Returns: + The serialized lookup tables. + """ + prepared_lookup_tables: Dict[Text, List[Text]] = OrderedDict() for lookup_table in training_data.lookup_tables: # this is a lookup table filename if isinstance(lookup_table["elements"], str): @@ -461,16 +475,36 @@ def process_lookup_tables(cls, training_data: "TrainingData") -> List[OrderedDic prepared_lookup_tables[lookup_table["name"]] = lookup_table["elements"] return cls.process_training_examples_by_key( - prepared_lookup_tables, KEY_LOOKUP, KEY_LOOKUP_EXAMPLES + prepared_lookup_tables, + KEY_LOOKUP, + KEY_LOOKUP_EXAMPLES, + example_extraction_predicate=lambda x: str(x), ) @staticmethod def process_training_examples_by_key( - training_examples: Dict, + training_examples: Dict[Text, List[Union[Dict, Text]]], key_name: Text, key_examples: Text, - example_extraction_predicate: Callable[[Dict[Text, Any]], Text] = lambda x: x, + example_extraction_predicate: Callable[[Dict[Text, Any]], Text], ) -> List[OrderedDict]: + """Prepares training examples to be written to YAML. + + This can be any NLU training data (intent examples, lookup tables, etc.) + + Args: + training_examples: Multiple training examples. Mappings in case additional + values were specified for an example (e.g. metadata) or just the plain + value. + key_name: The top level key which the examples belong to (e.g. `intents`) + key_examples: The sub key which the examples should be added to + (e.g. `examples`). + example_extraction_predicate: Function to extract example value (e.g. the + the text for an intent example) + + Returns: + NLU training data examples prepared for writing to YAML. + """ intents = [] for intent_name, examples in training_examples.items(): @@ -505,8 +539,8 @@ def process_training_examples_by_key( @staticmethod def _convert_training_examples( - training_examples: List[Dict], - example_extraction_predicate: Callable[[Dict[Text, Any]], Text] = lambda x: x, + training_examples: List[Union[Dict, List[Text]]], + example_extraction_predicate: Callable[[Dict[Text, Any]], Text], ) -> Tuple[List[Dict], Optional[Dict]]: """Returns converted training examples and potential intent metadata.""" converted_examples = [] @@ -555,7 +589,7 @@ def render(example: Dict) -> Dict: return [render(ex) for ex in examples] @staticmethod - def _render_training_examples_as_text(examples: List[Dict]) -> List[Text]: + def _render_training_examples_as_text(examples: List[Dict]) -> LiteralScalarString: def render(example: Dict) -> Text: return TrainingDataWriter.generate_list_item(example[KEY_INTENT_TEXT]) diff --git a/rasa/shared/nlu/training_data/message.py b/rasa/shared/nlu/training_data/message.py index cdaaa0b800fa..1085c7cb2daa 100644 --- a/rasa/shared/nlu/training_data/message.py +++ b/rasa/shared/nlu/training_data/message.py @@ -226,10 +226,14 @@ def get_sparse_features( attribute, featurizers ) - sequence_features = self._combine_features(sequence_features, featurizers) - sentence_features = self._combine_features(sentence_features, featurizers) + combined_sequence_features = self._combine_features( + sequence_features, featurizers + ) + combined_sentence_features = self._combine_features( + sentence_features, featurizers + ) - return sequence_features, sentence_features + return combined_sequence_features, combined_sentence_features def get_sparse_feature_sizes( self, attribute: Text, featurizers: Optional[List[Text]] = None @@ -280,10 +284,14 @@ def get_dense_features( attribute, featurizers ) - sequence_features = self._combine_features(sequence_features, featurizers) - sentence_features = self._combine_features(sentence_features, featurizers) + combined_sequence_features = self._combine_features( + sequence_features, featurizers + ) + combined_sentence_features = self._combine_features( + sentence_features, featurizers + ) - return sequence_features, sentence_features + return combined_sequence_features, combined_sentence_features def get_all_features( self, attribute: Text, featurizers: Optional[List[Text]] = None diff --git a/rasa/telemetry.py b/rasa/telemetry.py index e51406362d85..0cbdd97e6a13 100644 --- a/rasa/telemetry.py +++ b/rasa/telemetry.py @@ -11,7 +11,7 @@ import sys import textwrap import typing -from typing import Any, Callable, Dict, List, Optional, Text +from typing import Any, Callable, Dict, List, Optional, Text, Union import uuid import async_generator @@ -849,7 +849,7 @@ def track_server_start( def project_fingerprint_from_model( _model_directory: Optional[Text], - ) -> Optional[Text]: + ) -> Optional[Union[Text, List[Text], int, float]]: """Get project fingerprint from an app's loaded model.""" if _model_directory: try: diff --git a/rasa/utils/common.py b/rasa/utils/common.py index 7edca7194a5c..7cac8dc6d824 100644 --- a/rasa/utils/common.py +++ b/rasa/utils/common.py @@ -14,6 +14,7 @@ Type, TypeVar, Union, + ContextManager, ) import rasa.utils.io @@ -26,10 +27,10 @@ T = TypeVar("T") -class TempDirectoryPath(str): - """Represents a path to an temporary directory. When used as a context - manager, it erases the contents of the directory on exit. +class TempDirectoryPath(str, ContextManager): + """Represents a path to an temporary directory. + When used as a context manager, it erases the contents of the directory on exit. """ def __enter__(self) -> "TempDirectoryPath": @@ -40,7 +41,7 @@ def __exit__( _exc: Optional[Type[BaseException]], _value: Optional[Exception], _tb: Optional[TracebackType], - ) -> bool: + ) -> None: if os.path.exists(self): shutil.rmtree(self) diff --git a/rasa/utils/tensorflow/data_generator.py b/rasa/utils/tensorflow/data_generator.py index 49c9a54a07e9..cc50f527d0d1 100644 --- a/rasa/utils/tensorflow/data_generator.py +++ b/rasa/utils/tensorflow/data_generator.py @@ -76,7 +76,7 @@ def prepare_batch( start: Optional[int] = None, end: Optional[int] = None, tuple_sizes: Optional[Dict[Text, int]] = None, - ) -> Tuple[Optional[np.ndarray]]: + ) -> Tuple[Optional[np.ndarray], ...]: """Slices model data into batch using given start and end value. Args: diff --git a/rasa/utils/tensorflow/environment.py b/rasa/utils/tensorflow/environment.py index 010f4eaf6d40..04846c3fc518 100644 --- a/rasa/utils/tensorflow/environment.py +++ b/rasa/utils/tensorflow/environment.py @@ -85,15 +85,15 @@ def _parse_gpu_config(gpu_memory_config: Text) -> Dict[int, int]: # gpu_config is of format "gpu_id_1:gpu_id_1_memory, gpu_id_2: gpu_id_2_memory" # Parse it and store in a dictionary - parsed_gpu_config = {} + parsed_gpu_config: Dict[int, int] = {} try: for instance in gpu_memory_config.split(","): instance_gpu_id, instance_gpu_mem = instance.split(":") - instance_gpu_id = int(instance_gpu_id) - instance_gpu_mem = int(instance_gpu_mem) + parsed_instance_gpu_id = int(instance_gpu_id) + parsed_instance_gpu_mem = int(instance_gpu_mem) - parsed_gpu_config[instance_gpu_id] = instance_gpu_mem + parsed_gpu_config[parsed_instance_gpu_id] = parsed_instance_gpu_mem except ValueError: # Helper explanation of where the error comes from raise ValueError( diff --git a/rasa/utils/tensorflow/model_data.py b/rasa/utils/tensorflow/model_data.py index 103e3eca89a2..29b48a6894e2 100644 --- a/rasa/utils/tensorflow/model_data.py +++ b/rasa/utils/tensorflow/model_data.py @@ -264,7 +264,7 @@ def __init__( self.label_sub_key = label_sub_key # should be updated when features are added self.num_examples = self.number_of_examples() - self.sparse_feature_sizes = None + self.sparse_feature_sizes = {} def get( self, key: Text, sub_key: Optional[Text] = None @@ -741,7 +741,7 @@ def balanced_data(self, data: Data, batch_size: int, shuffle: bool) -> Data: if min(num_data_cycles) > 0: break - final_data = defaultdict(lambda: defaultdict(list)) + final_data: Data = defaultdict(lambda: defaultdict(list)) for key, attribute_data in new_data.items(): for sub_key, features in attribute_data.items(): for f in features: @@ -789,7 +789,7 @@ def _data_for_ids(data: Optional[Data], ids: np.ndarray) -> Data: Returns: The filtered data """ - new_data = defaultdict(lambda: defaultdict(list)) + new_data: Data = defaultdict(lambda: defaultdict(list)) if data is None: return new_data diff --git a/rasa/utils/tensorflow/model_data_utils.py b/rasa/utils/tensorflow/model_data_utils.py index e75e598c5001..f2c4a96d451e 100644 --- a/rasa/utils/tensorflow/model_data_utils.py +++ b/rasa/utils/tensorflow/model_data_utils.py @@ -3,7 +3,7 @@ import numpy as np import scipy.sparse from collections import defaultdict, OrderedDict -from typing import List, Optional, Text, Dict, Tuple, Union, Any +from typing import List, Optional, Text, Dict, Tuple, Union, Any, DefaultDict from rasa.nlu.constants import TOKENS_NAMES from rasa.utils.tensorflow.model_data import Data, FeatureArray @@ -141,7 +141,7 @@ def get_tag_ids( def _surface_attributes( features: List[List[Dict[Text, List["Features"]]]], featurizers: Optional[List[Text]] = None, -) -> Dict[Text, List[List[List["Features"]]]]: +) -> DefaultDict[Text, List[List[Optional[List["Features"]]]]]: """Restructure the input. "features" can, for example, be a dictionary of attributes (INTENT, @@ -171,20 +171,21 @@ def _surface_attributes( ) output = defaultdict(list) - for list_of_attribute_to_features in features: intermediate_features = defaultdict(list) for attribute_to_features in list_of_attribute_to_features: for attribute in attributes: - features = attribute_to_features.get(attribute) + attribute_features = attribute_to_features.get(attribute) if featurizers: - features = _filter_features(features, featurizers) + attribute_features = _filter_features( + attribute_features, featurizers + ) # if attribute is not present in the example, populate it with None - intermediate_features[attribute].append(features) + intermediate_features[attribute].append(attribute_features) - for key, value in intermediate_features.items(): - output[key].append(value) + for key, collection_of_feature_collections in intermediate_features.items(): + output[key].append(collection_of_feature_collections) return output diff --git a/rasa/utils/tensorflow/models.py b/rasa/utils/tensorflow/models.py index bfabd565c24c..53790394a42e 100644 --- a/rasa/utils/tensorflow/models.py +++ b/rasa/utils/tensorflow/models.py @@ -361,14 +361,16 @@ def _merge_batch_outputs( def _empty_lists_to_none_in_dict(input_dict: Dict[Text, Any]) -> Dict[Text, Any]: """Recursively replaces empty list or np array with None in a dictionary.""" - def _recurse(x: Union[Dict[Text, Any], List[Any], np.ndarray]) -> Optional[Any]: + def _recurse( + x: Union[Dict[Text, Any], List[Any], np.ndarray] + ) -> Optional[Union[Dict[Text, Any], List[np.ndarray]]]: if isinstance(x, dict): return {k: _recurse(v) for k, v in x.items()} elif (isinstance(x, list) or isinstance(x, np.ndarray)) and np.size(x) == 0: return None return x - return _recurse(input_dict) + return {k: _recurse(v) for k, v in input_dict.items()} def _get_metric_results(self, prefix: Optional[Text] = "") -> Dict[Text, float]: return { @@ -451,7 +453,9 @@ def batch_to_model_data_format( if isinstance(batch[0], Tuple): batch = batch[0] - batch_data = defaultdict(lambda: defaultdict(list)) + batch_data: Dict[Text, Dict[Text, List[tf.Tensor]]] = defaultdict( + lambda: defaultdict(list) + ) idx = 0 for key, values in data_signature.items(): @@ -855,7 +859,8 @@ def _get_batch_dim(attribute_data: Dict[Text, List[tf.Tensor]]) -> int: for key, data in attribute_data.items(): if data: return tf.shape(data[0])[0] - return None + + return 0 def _calculate_entity_loss( self, diff --git a/rasa/utils/tensorflow/rasa_layers.py b/rasa/utils/tensorflow/rasa_layers.py index de6261d79378..2daec3567000 100644 --- a/rasa/utils/tensorflow/rasa_layers.py +++ b/rasa/utils/tensorflow/rasa_layers.py @@ -901,6 +901,8 @@ def _features_as_token_ids( self._tf_layers[self.SPARSE_TO_DENSE_FOR_TOKEN_IDS](f) ) + return None + def _create_mlm_tensors( self, sequence_features: List[Union[tf.Tensor, tf.SparseTensor]], diff --git a/setup.cfg b/setup.cfg index b636edea5411..6b3e66735e56 100644 --- a/setup.cfg +++ b/setup.cfg @@ -47,5 +47,5 @@ disallow_untyped_decorators = True # FIXME: working our way towards removing these # see https://github.com/RasaHQ/rasa/pull/6470 # the list below is sorted by the number of errors for each error code, in decreasing order -disable_error_code = arg-type, assignment, var-annotated, return-value, union-attr, - override, operator, attr-defined, misc, return +disable_error_code = arg-type, assignment, var-annotated, union-attr, + override, operator, attr-defined, misc diff --git a/tests/core/utilities.py b/tests/core/utilities.py index 5897a6ed707f..c3a81579cae5 100644 --- a/tests/core/utilities.py +++ b/tests/core/utilities.py @@ -49,18 +49,18 @@ def mocked_cmd_input(package, text: Text): text = [text] text_generator = itertools.cycle(text) - i = package.get_user_input + i = package._get_user_input def mocked_input(*args, **kwargs): value = next(text_generator) print(f"wrote '{value}' to input") return value - package.get_user_input = mocked_input + package._get_user_input = mocked_input try: yield finally: - package.get_user_input = i + package._get_user_input = i def user_uttered(