Skip to content

Commit

Permalink
modify some files
Browse files Browse the repository at this point in the history
  • Loading branch information
北词你好 committed Dec 17, 2024
1 parent 46f9488 commit c733d67
Show file tree
Hide file tree
Showing 11 changed files with 785 additions and 196 deletions.
37 changes: 37 additions & 0 deletions .github/workflows/coveralls.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
name: coveralls

on:
push:
branches:
- main
pull_request:
branches:
- main

jobs:
test:
runs-on: ubuntu-latest

steps:
- name: Checkout code
uses: actions/checkout@v3

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.9'

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt

- name: Run tests and generate coverage report
run: |
pip install pytest pytest-cov
pytest --cov=my_project tests/

- name: Upload coverage to Coveralls
uses: coverallsapp/github-action@v2
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
272 changes: 177 additions & 95 deletions README.rst

Large diffs are not rendered by default.

Binary file removed SocialED/dataset/data/MAVEN/MAVEN.npy
Binary file not shown.
6 changes: 3 additions & 3 deletions SocialED/dataset/dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,8 +337,8 @@ def load_data(self):

if __name__ == "__main__":
# Test MAVEN dataset
maven = MAVEN()
maven_df = maven.load_data()
#maven = MAVEN()
dataset = MAVEN().load_data()
print("MAVEN dataset head:")
print(maven_df.head())
print(dataset.head())

100 changes: 2 additions & 98 deletions SocialED/detector/clkd.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
import en_core_web_lg
import fr_core_news_lg
import sys
import numpy as np


sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from dataset.dataloader import DatasetLoader
Expand Down Expand Up @@ -140,103 +142,6 @@ def __init__(self, **kwargs):
})


# class args_define:
# parser = argparse.ArgumentParser()
# # Hyper parameters
# parser.add_argument('--n_epochs', default=n_epochs, type=int,
# help="Number of initial-training/maintenance-training epochs.")
# parser.add_argument('--n_infer_epochs', default=0, type=int,
# help="Number of inference epochs.")
# parser.add_argument('--window_size', default=3, type=int,
# help="Maintain the model after predicting window_size blocks.")
# parser.add_argument('--patience', default=5, type=int,
# help="Early stop if performance did not improve in the last patience epochs.")
# parser.add_argument('--margin', default=3., type=float,
# help="Margin for computing triplet losses")
# parser.add_argument('--lr', default=1e-3, type=float,
# help="Learning rate")
# parser.add_argument('--batch_size', default=2000, type=int,
# help="Batch size (number of nodes sampled to compute triplet loss in each batch)")
# parser.add_argument('--n_neighbors', default=800, type=int,
# help="Number of neighbors sampled for each node.")
# parser.add_argument('--word_embedding_dim', type=int, default=300)
# parser.add_argument('--hidden_dim', default=8, type=int,
# help="Hidden dimension")
# parser.add_argument('--out_dim', default=32, type=int,
# help="Output dimension of tweet representations")
# parser.add_argument('--num_heads', default=4, type=int,
# help="Number of heads in each GAT layer")
# parser.add_argument('--use_residual', dest='use_residual', default=True,
# action='store_false',
# help="If true, add residual(skip) connections")
# parser.add_argument('--validation_percent', default=0.1, type=float,
# help="Percentage of validation nodes(tweets)")
# parser.add_argument('--test_percent', default=0.2, type=float,
# help="Percentage of test nodes(tweets)")
# parser.add_argument('--use_hardest_neg', dest='use_hardest_neg', default=False,
# action='store_true',
# help="If true, use hardest negative messages to form triplets. Otherwise use random ones")
# parser.add_argument('--metrics', type=str, default='ami')
# # Other arguments
# parser.add_argument('--use_cuda', dest='use_cuda', default=False,
# action='store_true',
# help="Use cuda")
# parser.add_argument('--gpuid', type=int, default=0)
# parser.add_argument('--mask_path', default=None,
# type=str, help="File path that contains the training, validation and test masks")
# parser.add_argument('--log_interval', default=10, type=int,
# help="Log interval")
# # offline or online situation
# parser.add_argument('--is_incremental', action='store_true', default=False,
# help="static or incremental")
# # Teacher-Student structure or Mutual-Learning structure
# parser.add_argument('--mutual', action='store_true', default=False)
#
# parser.add_argument('--mode', type=int, default=0)
# # mode==2, add linear cross-lingual knowledge ditillation; mode == 4, add non-linear cross-lingual knowledge transformation
# # mode==0, no knowledge distillation
# # mode==1,directly input student attribute features to teacher model
# parser.add_argument('--add_mapping', action='store_true', default=False)
# parser.add_argument('--data_path', default='../model/model_saved/clkd/English',
# type=str, help="Path of features, labels and edges")
# parser.add_argument('--file_path', default='../model/model_saved/clkd',
# type=str, help="default path to save the file")
# # offline situation Teacher-Student structure
# parser.add_argument('--Tmodel_path',
# default='../model/model_saved/clkd/English/Tmodel/',
# # '803_hash_static-8-English/0mode/embeddings_0227165510-0-English-nomap',
# type=str,
# help="File path that contains the pre-trained teacher model.")
# parser.add_argument('--lang', type=str, default="French")
# parser.add_argument('--Tealang', type=str, default='English')
# parser.add_argument('--t', type=int, default=1)
#
# # Mutual-Learning structure
# parser.add_argument('--data_path1', default='../model/model_saved/clkd/English',
# type=str, help="Path of features, labels and edges")
# parser.add_argument('--data_path2', default='../model/model_saved/clkd/French',
# type=str, help="Path of features, labels and edges")
# parser.add_argument('--lang1', type=str, default="English")
# parser.add_argument('--lang2', type=str, default="French")
# parser.add_argument('--e', type=int, default=0)
# parser.add_argument('--mt', type=float, default=0.5)
# parser.add_argument('--rd', type=float, default=0.1)
#
# # construct_graph
# parser.add_argument('--is_static', type=bool, default=False)
# parser.add_argument('--graph_lang', type=str, default='English')
# parser.add_argument('--tgtlang', type=str, default='French')
# parser.add_argument('--days', type=int, default=7)
#
# # generate_initial_features
# parser.add_argument('--initial_lang', type=str, default='English')
# parser.add_argument('--TransLinear', type=bool, default=True)
# parser.add_argument('--TransNonlinear', type=bool, default=True)
# parser.add_argument('--tgt', type=str, default='French')
# parser.add_argument('--embpath', type=str, default='../model/model_saved/clkd/dictrans/fr-en-for.npy')
# parser.add_argument('--wordpath', type=str, default='../model/model_saved/clkd/dictrans/wordsFrench.txt')
#
# args = parser.parse_args()


# Inference(prediction)
Expand Down Expand Up @@ -921,7 +826,6 @@ def evaluate_model(extract_features, extract_labels, indices, epoch, num_isolate


# metrics
import numpy as np


class Metric:
Expand Down
14 changes: 14 additions & 0 deletions SocialED/metrics/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from .metric import eval_nmi
from .metric import eval_ami
from .metric import eval_ari
from .metric import eval_f1
from .metric import eval_acc

__all__ = [
'eval_nmi',
'eval_ami',
'eval_ari',
'eval_f1',
'eval_acc'
]

112 changes: 112 additions & 0 deletions SocialED/metrics/metric.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
# -*- coding: utf-8 -*-
"""
Metrics used to evaluate the outlier detection performance
"""
# Author: Yingtong Dou <[email protected]>, Kay Liu <[email protected]>
# License: BSD 2 clause


from sklearn import metrics



def eval_nmi(ground_truths, predictions):
"""
Normalized Mutual Information (NMI) score for clustering evaluation.

Parameters
----------
ground_truths : array-like
Ground truth labels.
predictions : array-like
Predicted cluster labels.

Returns
-------
nmi : float
Normalized Mutual Information score.
"""
nmi = metrics.normalized_mutual_info_score(ground_truths, predictions)
return nmi


def eval_ami(ground_truths, predictions):
"""
Adjusted Mutual Information (AMI) score for clustering evaluation.

Parameters
----------
ground_truths : array-like
Ground truth labels.
predictions : array-like
Predicted cluster labels.

Returns
-------
ami : float
Adjusted Mutual Information score.
"""
ami = metrics.adjusted_mutual_info_score(ground_truths, predictions)
return ami


def eval_ari(ground_truths, predictions):
"""
Adjusted Rand Index (ARI) score for clustering evaluation.

Parameters
----------
ground_truths : array-like
Ground truth labels.
predictions : array-like
Predicted cluster labels.

Returns
-------
ari : float
Adjusted Rand Index score.
"""
ari = metrics.adjusted_rand_score(ground_truths, predictions)
return ari


def eval_f1(ground_truths, predictions):
"""
F1 score for classification evaluation.

Parameters
----------
ground_truths : array-like
Ground truth labels.
predictions : array-like
Predicted labels.

Returns
-------
f1 : float
F1 score.
"""
f1 = metrics.f1_score(ground_truths, predictions, average='macro')
return f1


def eval_acc(ground_truths, predictions):
"""
Accuracy score for classification evaluation.

Parameters
----------
ground_truths : array-like
Ground truth labels.
predictions : array-like
Predicted labels.

Returns
-------
acc : float
Accuracy score.
"""
acc = metrics.accuracy_score(ground_truths, predictions)
return acc


24 changes: 24 additions & 0 deletions SocialED/test/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import os
import sys

# Add parent directory to path so we can import modules
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

# Import test modules
from .testBERT import *
from .testBiLSTM import *
from .testEventX import *
from .testKPGNN import *
from .testRPLMSED import *
from .testword2vec import *
from .testRPLMSED import *

__all__ = [
"testBERT",
"testBiLSTM",
"testEventX",
"testKPGNN",
"testRPLMSED",
"testword2vec"
]

2 changes: 2 additions & 0 deletions SocialED/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .utility import *
from .score_converter import to_edge_score, to_graph_score
43 changes: 43 additions & 0 deletions SocialED/utils/graph_deal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# -*- coding: utf-8 -*-
"""Outlier Score Converters
"""
# Author: Kay Liu <[email protected]>
# License: BSD 2 clause


def to_edge_score(score, edge_index):
"""Convert outlier node score to outlier edge score by averaging the
scores of two nodes connected by an edge.

Parameters
----------
score : torch.Tensor
The node score.
edge_index : torch.Tensor
The edge index.

Returns
-------
score : torch.Tensor
The edge score.
"""
score = (score[edge_index[0]] + score[edge_index[1]]) / 2
return score


def to_graph_score(score):
"""Convert outlier node score to outlier graph score by averaging
the scores of all nodes in a graph.

Parameters
----------
score : torch.Tensor
The node score.

Returns
-------
score : torch.Tensor
The graph score.
"""

return score.mean(dim=-1)
Loading

0 comments on commit c733d67

Please sign in to comment.