Skip to content
This repository has been archived by the owner on Nov 3, 2023. It is now read-only.

[GPU] Fix Tests #4749

Merged
merged 1 commit into from
Aug 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions parlai/core/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,9 @@ def _prec_recall_f1_score(pred_items, gold_items):
return precision, recall, f1

@staticmethod
def compute(guess: str, answers: List[str]) -> F1Metric:
def compute(
guess: str, answers: List[str], expose_p_and_r: bool = False
) -> Union[F1Metric, Tuple[F1Metric, F1Metric, F1Metric]]:
if guess is None or answers is None:
return AverageMetric(0, 0)
g_tokens = normalize_answer(guess).split()
Expand All @@ -546,7 +548,10 @@ def compute(guess: str, answers: List[str]) -> F1Metric:
max_p, max_r, max_f1 = 0, 0, 0
for p, r, f1 in scores:
max_p, max_r, max_f1 = max(max_p, p), max(max_r, r), max(f1, max_f1)
return (F1Metric(max_p, 1), F1Metric(max_r, 1), F1Metric(max_f1, 1))
if expose_p_and_r:
return (F1Metric(max_p, 1), F1Metric(max_r, 1), F1Metric(max_f1, 1))
else:
return F1Metric(max_f1, 1)


class ExactMatchMetric(AverageMetric):
Expand Down Expand Up @@ -730,7 +735,9 @@ def compute_many(
:return: (rouge-1, rouge-2, rouge-L)
"""
measure = measure.lower()
assert measure in ROUGE_METRICS_MEASURES, "Use one of recall 'r' (default), f1 'f', or precision 'p'."
assert (
measure in ROUGE_METRICS_MEASURES
), "Use one of recall 'r' (default), f1 'f', or precision 'p'."

# possible global initialization
try:
Expand Down Expand Up @@ -1031,7 +1038,9 @@ def evaluate_response(self, observation: Message, labels: List[str]) -> None:

if prediction is not None:
self.add('accuracy', ExactMatchMetric.compute(prediction, labels))
precision, recall, f1 = F1Metric.compute(prediction, labels)
precision, recall, f1 = F1Metric.compute(
prediction, labels, expose_p_and_r=True
)
self.add('precision', precision)
self.add('recall', recall)
self.add('f1', f1)
Expand Down
10 changes: 7 additions & 3 deletions tests/nightly/gpu/test_bb3.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
from projects.bb3.tests.opt_presets import INIT_OPT


LOCAL = False


def _self_memory(text):
return f'{PROMPT.SELF_MEMORY_PREFIX}: {text}'

Expand Down Expand Up @@ -115,7 +118,7 @@ class TestOptFtBase(unittest.TestCase):
def setUp(self):
self.opt = INIT_OPT
for k, v in self.opt.items():
if 'BB3OPTAgent' in v:
if 'BB3OPTAgent' in str(v):
self.opt[k] = 'projects.bb3.agents.opt_api_agent:MockOptAgent'

self.opt['search_server'] = 'test'
Expand Down Expand Up @@ -340,8 +343,8 @@ def setUp(self):
opt = copy.deepcopy(self.opt)
overrides = {
'knowledge_conditioning': 'combined',
'opt_server': 'http://18.117.126.138:3000',
'search_server': 'bing_cc',
'opt_server': 'http://localhost:6000',
'search_server': 'test',
}
for k, v in overrides.items():
opt[k] = v
Expand All @@ -351,6 +354,7 @@ def setUp(self):
self.batch_agent = create_agent(opt)


@unittest.skipUnless(LOCAL, "must be local to specify opt server")
class TestOptMainServerBatching(TestOptMainServerBase):
def test_batching(self):
self.batch_agent = create_agent(self.opt)
Expand Down