Skip to content

Commit

Permalink
Commiting project
Browse files Browse the repository at this point in the history
  • Loading branch information
dsvilarkovic committed Jul 15, 2022
1 parent 4f25e96 commit e18d9d9
Show file tree
Hide file tree
Showing 238 changed files with 83,020 additions and 1 deletion.
7,230 changes: 7,230 additions & 0 deletions ClickbaitClassifier.ipynb

Large diffs are not rendered by default.

3,345 changes: 3,345 additions & 0 deletions DataProcessing/Classifier_clickbait_evaluation_of_outputs.ipynb

Large diffs are not rendered by default.

Binary file not shown.
128 changes: 128 additions & 0 deletions DataProcessing/clickbait_classifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
import torch
from transformers import BertModel, BertConfig, PretrainedConfig, PreTrainedModel, AutoModel, AutoConfig
from typing import List, Optional, Tuple, Union
from transformers.modeling_outputs import TokenClassifierOutput,SequenceClassifierOutput
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss, BCELoss
import torch.nn as nn
# from modeling_mpnet import MPNetModel, MPnetConfig

class ClickbaitConfig(PretrainedConfig):
def __init__(
self,
model_type: str = "bert",
pretrained_model: str = "bert-base-uncased",
num_labels: int = 1,
dropout: float = 0.1,
inner_dim1: int = 256,
inner_dim2: int = 32,
max_length: int = 512,
load_pretrained: bool = True,
freeze_bert: bool = True,
**kwargs
):
super(ClickbaitConfig, self).__init__(num_labels=num_labels, **kwargs)
self.model_type = model_type
self.pretrained_model = pretrained_model
self.dropout = dropout
self.inner_dim1 = inner_dim1
self.inner_dim2 = inner_dim2
self.max_length = max_length
self.load_pretrained = load_pretrained
self.freeze_bert = freeze_bert


class BertClickbaitClassifier(PreTrainedModel):
"""
Taken and extended from BertforSequenceClassification : https://github.com/huggingface/transformers/blob/v4.19.2/src/transformers/models/bert/modeling_bert.py#L1508
"""
config_class = ClickbaitConfig
def __init__(self, config: ClickbaitConfig):
super(BertClickbaitClassifier, self).__init__(config)
self.num_labels = config.num_labels
self.config = config
# self.bert_config = BertConfig.from_pretrained(config.pretrained_model)
self.bert_config = AutoConfig.from_pretrained(config.pretrained_model)

# self.bert = BertModel(self.bert_config)
self.bert = AutoModel.from_pretrained(config.pretrained_model, config=self.bert_config)
# self.bert = SentenceTransformer(config.pretrained_model, config=self.bert_config)
# self.bert = MPNetModel(config.pretrained_model, config=self.bert_config)
if config.load_pretrained:
print("Load pretrained weights from {}".format(config.pretrained_model))
self.bert = self.bert.from_pretrained(config.pretrained_model)
if config.freeze_bert:
print("Freeze weights in the BERT model. Just the classifier will be trained")
for param in self.bert.parameters():
param.requires_grad = False

self.linear_1 = nn.Linear(self.bert.config.hidden_size, config.inner_dim1)
self.dropout_1 = nn.Dropout(config.dropout)
self.relu_1 = nn.ReLU()
self.dropout_2 = nn.Dropout(config.dropout)
self.linear_2 = nn.Linear(config.inner_dim1, config.inner_dim2)
self.relu_2 = nn.ReLU()
self.dropout_3 = nn.Dropout(config.dropout)
self.classifier = nn.Linear(config.inner_dim2, config.num_labels)
self.sigmoid = nn.Sigmoid()


def forward(
self,
input_ids: Optional[torch.Tensor] = None,
attention_mask: Optional[torch.Tensor] = None,
token_type_ids: Optional[torch.Tensor] = None,
position_ids: Optional[torch.Tensor] = None,
head_mask: Optional[torch.Tensor] = None,
inputs_embeds: Optional[torch.Tensor] = None,
labels: Optional[torch.Tensor] = None,
output_attentions: Optional[bool] = None,
output_hidden_states: Optional[bool] = None,
return_dict: Optional[bool] = None,
) -> Union[Tuple[torch.Tensor], SequenceClassifierOutput]:
r"""
labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
`config.num_labels > 1` a classification loss is computed (Cross-Entropy).
"""

return_dict = return_dict if return_dict is not None else self.config.use_return_dict

outputs = self.bert(
input_ids,
attention_mask=attention_mask,
token_type_ids=token_type_ids,
position_ids=position_ids,
head_mask=head_mask,
inputs_embeds=inputs_embeds,
output_attentions=output_attentions,
output_hidden_states=output_hidden_states,
return_dict=return_dict,
)

output = outputs[0][:,0,:]

x = self.dropout_1(output)
x = self.linear_1(x)
x = self.relu_1(x)
x = self.dropout_2(x)
x = self.linear_2(x)
x = self.relu_2(x)
x = self.dropout_3(x)

logits = self.classifier(x)
logits = self.sigmoid(logits)

loss = None
if labels is not None:
loss_fct = BCELoss(weight=WEIGHT)
labels = 1.0*labels
loss = loss_fct(logits.view(-1), labels.view(-1))
if not return_dict:
output = (logits,) + outputs[2:]
return ((loss,) + output) if loss is not None else output

return SequenceClassifierOutput(
loss=loss,
logits=logits
)
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit e18d9d9

Please sign in to comment.