modify some files

RingBDStack · Dec 17, 2024 · c733d67 · c733d67
1 parent 46f9488
commit c733d67
Show file tree

Hide file tree

Showing 11 changed files with 785 additions and 196 deletions.
diff --git a/.github/workflows/coveralls.yml b/.github/workflows/coveralls.yml
@@ -0,0 +1,37 @@
+name: coveralls
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v3
+
+    - name: Set up Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.9'
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -r requirements.txt
+
+    - name: Run tests and generate coverage report
+      run: |
+        pip install pytest pytest-cov
+        pytest --cov=my_project tests/
+
+    - name: Upload coverage to Coveralls
+      uses: coverallsapp/github-action@v2
+      with:
+        github-token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/README.rst b/README.rst
diff --git a/SocialED/dataset/data/MAVEN/MAVEN.npy b/SocialED/dataset/data/MAVEN/MAVEN.npy
diff --git a/SocialED/dataset/dataloader.py b/SocialED/dataset/dataloader.py
@@ -337,8 +337,8 @@ def load_data(self):
 
 if __name__ == "__main__":
     # Test MAVEN dataset
-    maven = MAVEN()
-    maven_df = maven.load_data()
+    #maven = MAVEN()
+    dataset = MAVEN().load_data()
     print("MAVEN dataset head:")
-    print(maven_df.head())
+    print(dataset.head())
 
diff --git a/SocialED/detector/clkd.py b/SocialED/detector/clkd.py
@@ -24,6 +24,8 @@
 import en_core_web_lg
 import fr_core_news_lg
 import sys
+import numpy as np
+
 
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from dataset.dataloader import DatasetLoader
@@ -140,103 +142,6 @@ def __init__(self, **kwargs):
         })
 
 
-# class args_define:
-#     parser = argparse.ArgumentParser()
-#     # Hyper parameters
-#     parser.add_argument('--n_epochs', default=n_epochs, type=int,
-#                         help="Number of initial-training/maintenance-training epochs.")
-#     parser.add_argument('--n_infer_epochs', default=0, type=int,
-#                         help="Number of inference epochs.")
-#     parser.add_argument('--window_size', default=3, type=int,
-#                         help="Maintain the model after predicting window_size blocks.")
-#     parser.add_argument('--patience', default=5, type=int,
-#                         help="Early stop if performance did not improve in the last patience epochs.")
-#     parser.add_argument('--margin', default=3., type=float,
-#                         help="Margin for computing triplet losses")
-#     parser.add_argument('--lr', default=1e-3, type=float,
-#                         help="Learning rate")
-#     parser.add_argument('--batch_size', default=2000, type=int,
-#                         help="Batch size (number of nodes sampled to compute triplet loss in each batch)")
-#     parser.add_argument('--n_neighbors', default=800, type=int,
-#                         help="Number of neighbors sampled for each node.")
-#     parser.add_argument('--word_embedding_dim', type=int, default=300)
-#     parser.add_argument('--hidden_dim', default=8, type=int,
-#                         help="Hidden dimension")
-#     parser.add_argument('--out_dim', default=32, type=int,
-#                         help="Output dimension of tweet representations")
-#     parser.add_argument('--num_heads', default=4, type=int,
-#                         help="Number of heads in each GAT layer")
-#     parser.add_argument('--use_residual', dest='use_residual', default=True,
-#                         action='store_false',
-#                         help="If true, add residual(skip) connections")
-#     parser.add_argument('--validation_percent', default=0.1, type=float,
-#                         help="Percentage of validation nodes(tweets)")
-#     parser.add_argument('--test_percent', default=0.2, type=float,
-#                         help="Percentage of test nodes(tweets)")
-#     parser.add_argument('--use_hardest_neg', dest='use_hardest_neg', default=False,
-#                         action='store_true',
-#                         help="If true, use hardest negative messages to form triplets. Otherwise use random ones")
-#     parser.add_argument('--metrics', type=str, default='ami')
-#     # Other arguments
-#     parser.add_argument('--use_cuda', dest='use_cuda', default=False,
-#                         action='store_true',
-#                         help="Use cuda")
-#     parser.add_argument('--gpuid', type=int, default=0)
-#     parser.add_argument('--mask_path', default=None,
-#                         type=str, help="File path that contains the training, validation and test masks")
-#     parser.add_argument('--log_interval', default=10, type=int,
-#                         help="Log interval")
-#     # offline or online situation
-#     parser.add_argument('--is_incremental', action='store_true', default=False,
-#                         help="static or incremental")
-#     # Teacher-Student structure or Mutual-Learning structure
-#     parser.add_argument('--mutual', action='store_true', default=False)
-#
-#     parser.add_argument('--mode', type=int, default=0)
-#     # mode==2, add linear cross-lingual knowledge ditillation; mode == 4, add non-linear cross-lingual knowledge transformation
-#     # mode==0, no knowledge distillation
-#     # mode==1,directly input student attribute features to teacher model
-#     parser.add_argument('--add_mapping', action='store_true', default=False)
-#     parser.add_argument('--data_path', default='../model/model_saved/clkd/English',
-#                         type=str, help="Path of features, labels and edges")
-#     parser.add_argument('--file_path', default='../model/model_saved/clkd',
-#                         type=str, help="default path to save the file")
-#     # offline situation Teacher-Student structure
-#     parser.add_argument('--Tmodel_path',
-#                         default='../model/model_saved/clkd/English/Tmodel/',
-#                         # '803_hash_static-8-English/0mode/embeddings_0227165510-0-English-nomap',
-#                         type=str,
-#                         help="File path that contains the pre-trained teacher model.")
-#     parser.add_argument('--lang', type=str, default="French")
-#     parser.add_argument('--Tealang', type=str, default='English')
-#     parser.add_argument('--t', type=int, default=1)
-#
-#     # Mutual-Learning structure
-#     parser.add_argument('--data_path1', default='../model/model_saved/clkd/English',
-#                         type=str, help="Path of features, labels and edges")
-#     parser.add_argument('--data_path2', default='../model/model_saved/clkd/French',
-#                         type=str, help="Path of features, labels and edges")
-#     parser.add_argument('--lang1', type=str, default="English")
-#     parser.add_argument('--lang2', type=str, default="French")
-#     parser.add_argument('--e', type=int, default=0)
-#     parser.add_argument('--mt', type=float, default=0.5)
-#     parser.add_argument('--rd', type=float, default=0.1)
-#
-#     # construct_graph
-#     parser.add_argument('--is_static', type=bool, default=False)
-#     parser.add_argument('--graph_lang', type=str, default='English')
-#     parser.add_argument('--tgtlang', type=str, default='French')
-#     parser.add_argument('--days', type=int, default=7)
-#
-#     # generate_initial_features
-#     parser.add_argument('--initial_lang', type=str, default='English')
-#     parser.add_argument('--TransLinear', type=bool, default=True)
-#     parser.add_argument('--TransNonlinear', type=bool, default=True)
-#     parser.add_argument('--tgt', type=str, default='French')
-#     parser.add_argument('--embpath', type=str, default='../model/model_saved/clkd/dictrans/fr-en-for.npy')
-#     parser.add_argument('--wordpath', type=str, default='../model/model_saved/clkd/dictrans/wordsFrench.txt')
-#
-#     args = parser.parse_args()
 
 
 # Inference(prediction)
@@ -921,7 +826,6 @@ def evaluate_model(extract_features, extract_labels, indices, epoch, num_isolate
 
 
 # metrics
-import numpy as np
 
 
 class Metric:

diff --git a/SocialED/metrics/__init__.py b/SocialED/metrics/__init__.py
@@ -0,0 +1,14 @@
+from .metric import eval_nmi
+from .metric import eval_ami
+from .metric import eval_ari
+from .metric import eval_f1
+from .metric import eval_acc
+
+__all__ = [
+    'eval_nmi',
+    'eval_ami', 
+    'eval_ari',
+    'eval_f1',
+    'eval_acc'
+]
+
diff --git a/SocialED/metrics/metric.py b/SocialED/metrics/metric.py
@@ -0,0 +1,112 @@
+# -*- coding: utf-8 -*-
+"""
+Metrics used to evaluate the outlier detection performance
+"""
+# Author: Yingtong Dou <[email protected]>, Kay Liu <[email protected]>
+# License: BSD 2 clause
+
+
+from sklearn import metrics
+
+
+
+def eval_nmi(ground_truths, predictions):
+    """
+    Normalized Mutual Information (NMI) score for clustering evaluation.
+
+    Parameters
+    ----------
+    ground_truths : array-like
+        Ground truth labels.
+    predictions : array-like 
+        Predicted cluster labels.
+
+    Returns
+    -------
+    nmi : float
+        Normalized Mutual Information score.
+    """
+    nmi = metrics.normalized_mutual_info_score(ground_truths, predictions)
+    return nmi
+
+
+def eval_ami(ground_truths, predictions):
+    """
+    Adjusted Mutual Information (AMI) score for clustering evaluation.
+
+    Parameters
+    ----------
+    ground_truths : array-like
+        Ground truth labels.
+    predictions : array-like
+        Predicted cluster labels.
+
+    Returns
+    -------
+    ami : float
+        Adjusted Mutual Information score.
+    """
+    ami = metrics.adjusted_mutual_info_score(ground_truths, predictions)
+    return ami
+
+
+def eval_ari(ground_truths, predictions):
+    """
+    Adjusted Rand Index (ARI) score for clustering evaluation.
+
+    Parameters
+    ----------
+    ground_truths : array-like
+        Ground truth labels.
+    predictions : array-like
+        Predicted cluster labels.
+
+    Returns
+    -------
+    ari : float
+        Adjusted Rand Index score.
+    """
+    ari = metrics.adjusted_rand_score(ground_truths, predictions)
+    return ari
+
+
+def eval_f1(ground_truths, predictions):
+    """
+    F1 score for classification evaluation.
+
+    Parameters
+    ----------
+    ground_truths : array-like
+        Ground truth labels.
+    predictions : array-like
+        Predicted labels.
+
+    Returns
+    -------
+    f1 : float
+        F1 score.
+    """
+    f1 = metrics.f1_score(ground_truths, predictions, average='macro')
+    return f1
+
+
+def eval_acc(ground_truths, predictions):
+    """
+    Accuracy score for classification evaluation.
+
+    Parameters
+    ----------
+    ground_truths : array-like
+        Ground truth labels.
+    predictions : array-like
+        Predicted labels.
+
+    Returns
+    -------
+    acc : float
+        Accuracy score.
+    """
+    acc = metrics.accuracy_score(ground_truths, predictions)
+    return acc
+
+
diff --git a/SocialED/test/__init__.py b/SocialED/test/__init__.py
@@ -0,0 +1,24 @@
+import os
+import sys
+
+# Add parent directory to path so we can import modules
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+# Import test modules
+from .testBERT import *
+from .testBiLSTM import *
+from .testEventX import *
+from .testKPGNN import *
+from .testRPLMSED import *
+from .testword2vec import *
+from .testRPLMSED import *
+
+__all__ = [
+    "testBERT",
+    "testBiLSTM",
+    "testEventX",
+    "testKPGNN",
+    "testRPLMSED",
+    "testword2vec"
+]
+
diff --git a/SocialED/utils/__init__.py b/SocialED/utils/__init__.py
@@ -0,0 +1,2 @@
+from .utility import *
+from .score_converter import to_edge_score, to_graph_score
diff --git a/SocialED/utils/graph_deal.py b/SocialED/utils/graph_deal.py
@@ -0,0 +1,43 @@
+# -*- coding: utf-8 -*-
+"""Outlier Score Converters
+"""
+# Author: Kay Liu <[email protected]>
+# License: BSD 2 clause
+
+
+def to_edge_score(score, edge_index):
+    """Convert outlier node score to outlier edge score by averaging the
+    scores of two nodes connected by an edge.
+
+    Parameters
+    ----------
+    score : torch.Tensor
+        The node score.
+    edge_index : torch.Tensor
+        The edge index.
+
+    Returns
+    -------
+    score : torch.Tensor
+        The edge score.
+    """
+    score = (score[edge_index[0]] + score[edge_index[1]]) / 2
+    return score
+
+
+def to_graph_score(score):
+    """Convert outlier node score to outlier graph score by averaging
+    the scores of all nodes in a graph.
+
+    Parameters
+    ----------
+    score : torch.Tensor
+        The node score.
+
+    Returns
+    -------
+    score : torch.Tensor
+        The graph score.
+    """
+
+    return score.mean(dim=-1)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		from .utility import *
		from .score_converter import to_edge_score, to_graph_score