Skip to content

Commit

Permalink
Add ECHR Judgment Classification scenario (#3311)
Browse files Browse the repository at this point in the history
Co-authored-by: Ryo Kawahara <[email protected]>
Co-authored-by: Mikio Takeuchi <[email protected]>
  • Loading branch information
3 people authored Feb 5, 2025
1 parent 5fb6ee8 commit 78ec8cf
Show file tree
Hide file tree
Showing 3 changed files with 156 additions and 0 deletions.
26 changes: 26 additions & 0 deletions src/helm/benchmark/run_specs/enterprise_run_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,32 @@ def get_casehold_spec() -> RunSpec:
)


@run_spec_function("echr_judgment_classification")
def get_echr_judgment_classification_spec() -> RunSpec:
"""A different implementation (binary classification) of lex_glue_fixed:subset=ecthr_a"""
from helm.benchmark.scenarios.echr_judgment_classification_scenario import EchrJudgeScenario

scenario_spec = ScenarioSpec(
class_name="helm.benchmark.scenarios.echr_judgment_classification_scenario.EchrJudgeScenario",
args={"filter_max_length": 600},
)

adapter_spec = get_generation_adapter_spec(
instructions=EchrJudgeScenario.PROMPT_INST_WITH_EX,
input_noun=EchrJudgeScenario.PROMPT_INPUT,
output_noun=EchrJudgeScenario.PROMPT_OUTPUT,
max_tokens=1,
)

return RunSpec(
name="echr_judgment_classification",
scenario_spec=scenario_spec,
adapter_spec=adapter_spec,
metric_specs=get_exact_match_metric_specs() + _get_weighted_classification_metric_specs(labels=["yes", "no"]),
groups=["echr_judgment_classification"],
)


# Climate


Expand Down
113 changes: 113 additions & 0 deletions src/helm/benchmark/scenarios/echr_judgment_classification_scenario.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
from typing import List, Optional
import json
import os
import re

from helm.benchmark.scenarios.scenario import (
Scenario,
Instance,
Reference,
TRAIN_SPLIT,
VALID_SPLIT,
TEST_SPLIT,
CORRECT_TAG,
Input,
Output,
)
from helm.common.general import ensure_file_downloaded, ensure_directory_exists


class EchrJudgeScenario(Scenario):
"""The "Binary Violation" Classification task from the paper Neural Legal Judgment Prediction in English [(Chalkidis et al., 2019)](https://arxiv.org/pdf/1906.02059.pdf).
The task is to analyze the description of a legal case from the European Court of Human Rights (ECHR),
and classify it as positive if any human rights article or protocol has been violated and negative otherwise.
The case text can be very long, which sometimes results in incorrect model output
when using zero-shot predictions in many cases.
Therefore, have added two trivial cases to the instructions part.
Example Prompt:
Is the following case a violation of human rights? (Instructions)
Case: Human rights have not been violated. (Trivial No case in instructions)
Answer: No
Case: Human rights have been violated. (Trivial Yes case in instructions)
Answer: Yes
Case: <TEXT> (In-context examples, if possible)
Answer: <Label> (Label is correct answer, Yes or No)
...
Case: <TEXT> (Target input text)
Answer: <Output> (Output ::= Yes | No)
""" # noqa: E501

# Names of the tasks we support
name = "echr_judgment_classification"
description = 'The "Binary Violation" Classification task from the paper Neural Legal Judgment Prediction in English [(Chalkidis et al., 2019)](https://arxiv.org/pdf/1906.02059.pdf).' # noqa: E501
tags = ["classification", "judgement", "legal"]

# Dataset file name
_DATASET_URL = "https://archive.org/download/ECHR-ACL2019/ECHR_Dataset.zip"

# Answer labels
YES_ANSWER = "Yes"
NO_ANSWER = "No"

# Prompt constants (used in adapter)
PROMPT_INPUT = "Case"
PROMPT_OUTPUT = "Answer"

YES_EX = f"\n\n{PROMPT_INPUT}: Human rights have been violated.\n{PROMPT_OUTPUT}: {YES_ANSWER}"
NO_EX = f"\n\n{PROMPT_INPUT}: Human rights have not been violated.\n{PROMPT_OUTPUT}: {NO_ANSWER}"
INST_EX = f"{NO_EX}{YES_EX}"

PROMPT_INST = "Is the following case a violation of human rights?" # Prompt for instructions
PROMPT_INST_WITH_EX = f"{PROMPT_INST}{INST_EX}" # Prompt for instructions with trivial examples

def __init__(self, filter_max_length: Optional[int] = None):
"""
Args:
filter_max_length: Int indicating maximum length for documents. Documents longer than
train_filter_max_length tokens (using whitespace tokenization)
will be filtered out.
"""
super().__init__()
self.filter_max_length = filter_max_length

def count_words(self, text: str) -> int:
"""Returns the number of words in the text"""
return len(re.split(r"\W+", text))

def get_instances(self, output_path: str) -> List[Instance]:
data_dir = os.path.join(output_path, "data")
ensure_directory_exists(data_dir)
file_name = self._DATASET_URL.split("/")[-1]
download_directory_path = os.path.join(data_dir, self._DATASET_URL)
ensure_file_downloaded(
source_url=self._DATASET_URL,
target_path=download_directory_path,
unpack=True,
unpack_type="unzip",
)

source_split_to_helm_split = {"EN_train": TRAIN_SPLIT, "EN_dev": VALID_SPLIT, "EN_test": TEST_SPLIT}

instances: List[Instance] = []
for source_split, helm_split in source_split_to_helm_split.items():
target_data_dir = os.path.join(download_directory_path, source_split)
for file_name in os.listdir(target_data_dir):
if not file_name.endswith(".json"):
continue
file_path = os.path.join(target_data_dir, file_name)
with open(file_path, "r") as f:
raw_data = json.load(f)
input_text = " ".join(raw_data["TEXT"])
if self.filter_max_length is not None and self.count_words(input_text) > self.filter_max_length:
continue
answer = self.YES_ANSWER if len(raw_data["VIOLATED_ARTICLES"]) > 0 else self.NO_ANSWER
correct_reference = Reference(Output(text=answer), tags=[CORRECT_TAG])
instances.append(Instance(input=Input(input_text), references=[correct_reference], split=helm_split))
return instances
17 changes: 17 additions & 0 deletions src/helm/benchmark/static/schema_enterprise.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ run_groups:
subgroups:
- legal_contract_summarization
- casehold
- echr_judgment_classification
- legal_opinion_sentiment_classification

- name: climate_scenarios
Expand Down Expand Up @@ -188,6 +189,22 @@ run_groups:
when: before 2021
language: English

- name: echr_judgment_classification
display_name: ECHR Judgment Classification
description: The "Binary Violation" Classification task from the paper Neural Legal Judgment Prediction in English [(Chalkidis et al., 2019)](https://arxiv.org/pdf/1906.02059.pdf). The task is to analyze the description of a legal case from the European Court of Human Rights (ECHR), and classify it as positive if any human rights article or protocol has been violated and negative otherwise.
metric_groups:
- accuracy
- general_information
environment:
main_name: classification_weighted_f1
main_split: test
taxonomy:
task: text classification
what: casees from the European Court of Human Rights
who: judiciary of the European Court of Human Rights
when: 2014-2018 (train) and 2014-2018 (test)
language: English

- name: legal_opinion_sentiment_classification
display_name: Legal Opinion Sentiment Classification
description: A legal opinion sentiment classification task based on the paper Effective Approach to Develop a Sentiment Annotator For Legal Domain in a Low Resource Setting [(Ratnayaka et al., 2020)](https://arxiv.org/pdf/2011.00318.pdf).
Expand Down

0 comments on commit 78ec8cf

Please sign in to comment.