Skip to content
This repository has been archived by the owner on Nov 3, 2023. It is now read-only.

woi: filter knowledge #4114

Merged
merged 3 commits into from
Oct 28, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 48 additions & 2 deletions parlai/tasks/wizard_of_internet/agents.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,14 @@
from parlai.utils.data import DatatypeHelper
import parlai.utils.logging as logging
import parlai.tasks.wizard_of_internet.constants as CONST
from parlai.core.mutators import register_mutator
from parlai.core.mutators import register_mutator, ManyEpisodeMutator
from parlai.tasks.wizard_of_wikipedia.agents import (
AddLabel as AddLabelWizWiki,
AddLabelLM as AddLabelLMWizWiki,
CheckedSentenceAsLabel as CheckedSentenceAsLabelWizWiki,
AddCheckedSentence as AddCheckedSentenceWizWiki,
)

import random
from .build import build


Expand Down Expand Up @@ -638,3 +637,50 @@ class AddLabelLM(AddLabelLMWizWiki):
"""

pass


@register_mutator("woi_filter_no_passage_used")
class WoiFilterNoPassageUsed(ManyEpisodeMutator):
"""
Allows to filter any examples where no passage was selected to base the wizard reply
on.

This works best in flattened mode. E.g. run with: parlai display_data -t
wizard_of_internet -n 100 -dt valid --mutators flatten+filter_no_passage_used
"""

def many_episode_mutation(self, episode):
out_episodes = []
for e in episode:
checked_sentences = e.get(CONST.SELECTED_SENTENCES)
checked_sentences = ' '.join(checked_sentences)
if checked_sentences == CONST.NO_SELECTED_SENTENCES_TOKEN:
pass
else:
out_episodes.append([e])
return out_episodes


@register_mutator("woi_filter_selected_knowledge_in_retrieved_docs")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see any reason for having this teacher. Given how the task teacher add these fields here. The overlap must exist between the selected sentences and the retrieved sentences.

Copy link
Contributor Author

@jaseweston jaseweston Oct 26, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(It's a mutator, not a teacher). It was to debug check that it is true, because of the problems Leo reported.. it might be useful for other tasks in the future with knowledge..

class WoiFilterSelectedKnowledgeInRetrievedDocs(ManyEpisodeMutator):
"""
Allows to filter any examples where '__retrieved-docs__' field does contain the
'__selected-sentences__'.
"""

def many_episode_mutation(self, episode):
out_episodes = []
for e in episode:
checked_sentences = e.get(CONST.SELECTED_SENTENCES)
docs = ' '.join(e.get('__retrieved-docs__'))
if ' '.join(checked_sentences) != CONST.NO_SELECTED_SENTENCES_TOKEN:
found = True
for sent in checked_sentences:
s = sent.lstrip(' ').rstrip(' ')
if s not in docs:
found = False
if found:
out_episodes.append([e])
else:
pass
return out_episodes