From 2b5052a211539570bbf7e71f618affecbad78d1f Mon Sep 17 00:00:00 2001 From: Ondrej Dusek Date: Tue, 12 Jan 2021 11:36:26 +0100 Subject: [PATCH] Allow variable number of references for BLEU via API This allows BLEU to use a variable number of references (different number of references for each sentence). If a sentence has fewer than the maximum number of references, None is used to fill remaining reference streams. --- sacrebleu/metrics/bleu.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/sacrebleu/metrics/bleu.py b/sacrebleu/metrics/bleu.py index bacb8c5c..26da7965 100644 --- a/sacrebleu/metrics/bleu.py +++ b/sacrebleu/metrics/bleu.py @@ -3,7 +3,6 @@ import math import logging from collections import Counter -from itertools import zip_longest from typing import List, Iterable, Union from ..tokenizers import TOKENIZERS @@ -256,10 +255,18 @@ def corpus_score(self, sys_stream: Union[str, Iterable[str]], # look for already-tokenized sentences tokenized_count = 0 + # sanity checks + if any(len(ref_stream) != len(sys_stream) for ref_stream in ref_streams): + raise EOFError("System and reference streams have different lengths!") + if any(line is None for line in sys_stream): + raise EOFError("Undefined line in system stream!") + fhs = [sys_stream] + ref_streams - for lines in zip_longest(*fhs): - if None in lines: - raise EOFError("Source and reference streams have different lengths!") + for lines in zip(*fhs): + # remove undefined references (i.e. we have fewer references for this particular sentence) + lines = [x for x in lines if x is not None] + if len(lines) < 2: # we need at least system + 1 defined reference + raise EOFError("No valid references for a sentence!") if self.lc: lines = [x.lower() for x in lines]