Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main'
Browse files Browse the repository at this point in the history
  • Loading branch information
HongbangYuan committed Aug 2, 2022
2 parents dceb1bf + bcfb653 commit ae02f13
Show file tree
Hide file tree
Showing 52 changed files with 1,065 additions and 840 deletions.
9 changes: 7 additions & 2 deletions cogktr/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
__all__ = [
# core
"BaseMetric",
"BaseMaskedLMMetric",
"BaseDisambiguationMetric",
"BaseClassificationMetric",
"BaseRegressionMetric",
Expand All @@ -21,6 +22,7 @@
"CommonsenseqaQagnnProcessor",
"OpenBookQAReader",
"Conll2003Processor",
"LamaProcessor",
"MultisegchnsentibertProcessor",
"QnliProcessor",
"QnliSembertProcessor",
Expand All @@ -39,11 +41,11 @@
"CommonsenseqaReader",
"CommonsenseqaQagnnReader",
"Conll2003Reader",
"LamaReader",
"MultisegchnsentibertReader",
"QnliReader",
"SemcorReader",
"TSemcorProcessor",
"BSemcorProcessor",
"Squad2Reader",
"Sst2Reader",
"Sst5Reader",
Expand All @@ -59,6 +61,8 @@
"WikipediaLinker",
"WordnetLinker",
"BaseSearcher",
"ConcetNetSearcher",
"WikidataSearcher",
"WikipediaSearcher",
"WordnetSearcher",
"BaseTagger",
Expand All @@ -72,12 +76,13 @@

# models
"BaseModel",
"BaseMaskedLM",
"BaseDisambiguationModel",
"BaseQuestionAnsweringModel",
"BaseSentencePairClassificationModel",
"BaseSentencePairRegressionModel",
"BaseSequenceLabelingModel",
"BaseTextClassificationModel",
"EsrModel",
"HLGModel",
"KgembModel",
"KtembModel",
Expand Down
1 change: 1 addition & 0 deletions cogktr/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
# metric
"BaseMetric",
"BaseDisambiguationMetric",
"BaseMaskedLMMetric",
"BaseClassificationMetric",
"BaseRegressionMetric",

Expand Down
7 changes: 3 additions & 4 deletions cogktr/core/evaluator.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import torch
from torch.utils.data import DataLoader
from tqdm import tqdm
import time
import os
import logging
from cogktr.utils.io_utils import load_model
Expand All @@ -12,9 +11,9 @@ class Evaluator:
def __init__(
self,
model,
checkpoint_path,
dev_data,
metrics,
checkpoint_path="",
sampler=None,
collate_fn=None,
drop_last=False,
Expand All @@ -26,9 +25,9 @@ def __init__(
"""
在指定数据据上验证模型指标
:param model: 待验证模型
:param checkpoint_path: 模型参数文件所在目录
:param dev_data: 验证数据集
:param metrics: 验证指标
:param checkpoint_path: 模型参数文件所在目录
:param sampler: 验证数据集对应的采样器
:param collate_fn: 拼接为batch的函数
:param drop_last: 是否丢掉最后一个数据
Expand Down Expand Up @@ -59,7 +58,7 @@ def __init__(
if os.path.isfile(model_file):
self.model = load_model(self.model,model_file)
else:
raise ValueError("Pretrained model file {} does not exist!".format(model_file))
print("Pretrained model file {} does not exist!".format(model_file))

self.model.to(self.device)

Expand Down
2 changes: 2 additions & 0 deletions cogktr/core/metric/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from .base_metric import *
from .base_classification_metric import *
from .base_disambiguation_metric import *
from .base_masked_lm import *
from .base_regression_metric import *

__all__ = [
"BaseMetric",
"BaseClassificationMetric",
"BaseDisambiguationMetric",
"BaseMaskedLMMetric",
"BaseRegressionMetric",
]
16 changes: 8 additions & 8 deletions cogktr/core/metric/base_classification_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score


class BaseClassificationMetric(BaseMetric):
def __init__(self, mode, default_metric_name=None):
super().__init__()
Expand All @@ -12,12 +13,11 @@ def __init__(self, mode, default_metric_name=None):
self.mode = mode
self.label_list = list()
self.pre_list = list()
self.default_metric_name=default_metric_name
self.default_metric_name = default_metric_name
if default_metric_name is None:
self.default_metric_name = "F1" if mode=="binary" else "macro_F1"
self.default_metric_name = "F1" if mode == "binary" else "macro_F1"
else:
self.default_metric_name=default_metric_name

self.default_metric_name = default_metric_name

def evaluate(self, pred, label):
self.label_list = self.label_list + label.cpu().tolist()
Expand All @@ -29,11 +29,11 @@ def get_metric(self, reset=True):
P = precision_score(self.label_list, self.pre_list, average="binary")
R = recall_score(self.label_list, self.pre_list, average="binary")
F1 = f1_score(self.label_list, self.pre_list, average="binary")
Acc = accuracy_score(self.label_list,self.pre_list)
Acc = accuracy_score(self.label_list, self.pre_list)
evaluate_result = {"P": P,
"R": R,
"F1": F1,
"Acc":Acc,
"Acc": Acc,
}
if self.mode == "multi":
micro_P = precision_score(self.label_list, self.pre_list, average="micro")
Expand All @@ -42,14 +42,14 @@ def get_metric(self, reset=True):
macro_P = precision_score(self.label_list, self.pre_list, average="macro")
macro_R = recall_score(self.label_list, self.pre_list, average="macro")
macro_F1 = f1_score(self.label_list, self.pre_list, average="macro")
Acc = accuracy_score(self.label_list,self.pre_list)
Acc = accuracy_score(self.label_list, self.pre_list)
evaluate_result = {"micro_P": micro_P,
"micro_R": micro_R,
"micro_F1": micro_F1,
"macro_P": macro_P,
"macro_R": macro_R,
"macro_F1": macro_F1,
"Acc":Acc,
"Acc": Acc,
}
if reset:
self.label_list = list()
Expand Down
10 changes: 6 additions & 4 deletions cogktr/core/metric/base_disambiguation_metric.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,28 @@
from cogktr.core.metric.base_metric import BaseMetric
import numpy as np


class BaseDisambiguationMetric(BaseMetric):
def __init__(self, segment_list):
super().__init__()
self.segment_list = segment_list
self.label_list = list()
self.pre_list = list()
self.default_metric_name = "F1"

def evaluate(self, pred, label):
self.label_list = self.label_list + label.cpu().tolist()
self.pre_list = self.pre_list + pred.cpu().tolist()

def get_metric(self, reset=True):
label_list=np.array(self.label_list)
pre_list=np.array(self.pre_list)
label_list = np.array(self.label_list)
pre_list = np.array(self.pre_list)
evaluate_result = {}
ok = 0
for begin, end in zip(self.segment_list[:-1], self.segment_list[1:]):
if pre_list[begin:end].argmax() == int(np.where(label_list[begin:end]==1)[0]):
if pre_list[begin:end].argmax() == int(np.where(label_list[begin:end] == 1)[0]):
ok += 1
F1 =ok / (len(self.segment_list) - 1) * 100
F1 = ok / (len(self.segment_list) - 1) * 100
evaluate_result = {"F1": F1}
if reset:
self.label_list = list()
Expand Down
26 changes: 26 additions & 0 deletions cogktr/core/metric/base_masked_lm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from cogktr.core.metric.base_metric import BaseMetric
from sklearn.metrics import top_k_accuracy_score


class BaseMaskedLMMetric(BaseMetric):
def __init__(self, topk=1):
super().__init__()
self.topk = topk
self.label_list = list()
self.pre_list = list()
self.default_metric_name = "Top_K_Acc"

def evaluate(self, pred, label):
self.label_list = self.label_list + label.cpu().tolist()
self.pre_list = self.pre_list + pred.cpu().tolist()

def get_metric(self, reset=True):
top_k_acc = top_k_accuracy_score(self.label_list,
self.pre_list,
k=self.topk,
labels=list(range(len(self.pre_list[0]))))
evaluate_result = {"Top_K_Acc": top_k_acc}
if reset:
self.label_list = list()
self.pre_list = list()
return evaluate_result
17 changes: 4 additions & 13 deletions cogktr/core/metric/base_question_answering_metric.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,22 @@
from cogktr.core.metric.base_metric import BaseMetric
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import top_k_accuracy_score
import numpy as np


class BaseQuestionAnsweringMetric(BaseMetric):
def __init__(self):
super().__init__()
self.label_list = list()
self.pre_list = list()
self.default_metric_name = "Top_K_Acc"

def evaluate(self, pred, label):
# print(pred.size())
# print(label.size())
self.label_list = self.label_list + label.cpu().tolist()
self.pre_list = self.pre_list + pred.cpu().tolist()

def get_metric(self, reset=True, topk=5):
# print(self.pre_list.size())
# print(self.label_list.size())
# print(max(self.label_list))
# print(len(self.pre_list[0]))
acc = top_k_accuracy_score(self.label_list, self.pre_list, k=topk, labels=range(len(self.pre_list[0])))
evaluate_result = {"Acc": acc}
top_k_acc = top_k_accuracy_score(self.label_list, self.pre_list, k=topk, labels=range(len(self.pre_list[0])))
evaluate_result = {"Top_K_Acc": top_k_acc}
if reset:
self.label_list = list()
self.pre_list = list()
return evaluate_result
return evaluate_result
5 changes: 3 additions & 2 deletions cogktr/core/metric/base_regression_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ def __init__(self):
super().__init__()
self.pre_list = list()
self.label_list = list()
self.default_metric_name = "r2"

def evaluate(self, pred, label):
self.pre_list = self.pre_list + pred.cpu().tolist()
Expand All @@ -21,11 +22,11 @@ def get_metric(self, reset=True):
r2 = r2_score(self.pre_list, self.label_list)
mse = mean_squared_error(self.pre_list, self.label_list)
mae = mean_absolute_error(self.pre_list, self.label_list)
pear=pearsonr(self.pre_list,self.label_list)[0]
pear = pearsonr(self.pre_list, self.label_list)[0]
evaluate_result = {"r2": r2,
"mse": mse,
"mae": mae,
"pear":pear,
"pear": pear,
}
if reset:
self.pre_list = list()
Expand Down
3 changes: 2 additions & 1 deletion cogktr/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@
"CommonsenseqaQagnnProcessor",
"OpenBookQAReader",
"Conll2003Processor",
"LamaProcessor",
"MultisegchnsentibertProcessor",
"QnliProcessor",
"QnliSembertProcessor",
"SemcorProcessor",
"TSemcorProcessor",
"BSemcorProcessor",
"Squad2Processor",
"Squad2SembertProcessor",
"Sst2Processor",
Expand All @@ -32,6 +32,7 @@
"CommonsenseqaReader",
"CommonsenseqaQagnnReader",
"Conll2003Reader",
"LamaReader",
"MultisegchnsentibertReader",
"QnliReader",
"SemcorReader",
Expand Down
5 changes: 4 additions & 1 deletion cogktr/data/processor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from cogktr.data.processor.commonsenseqa_processors import *
from cogktr.data.processor.commonsenseqa_qagnn_processors import *
from cogktr.data.processor.conll2003_processors import *
from cogktr.data.processor.lama_processors import *
from cogktr.data.processor.multisegchnsentibert_processors import *
from cogktr.data.processor.qnli_processors import *
from cogktr.data.processor.semcor_processors import *
Expand All @@ -23,6 +24,9 @@
# conll2003processor
"Conll2003Processor",

# lamaprocessor
"LamaProcessor",

# multisegchnsentibertprocessor
"MultisegchnsentibertProcessor",

Expand All @@ -33,7 +37,6 @@
# semcorprocessor
"SemcorProcessor",
"TSemcorProcessor",
"BSemcorProcessor",

# squad2processor
"Squad2Processor",
Expand Down
3 changes: 3 additions & 0 deletions cogktr/data/processor/base_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ def __init__(self, debug=False):
self.debug = debug
pass

def procss_one(self, sentence):
pass

def _process(self, data):
pass

Expand Down
Loading

0 comments on commit ae02f13

Please sign in to comment.