Skip to content

Commit

Permalink
[ENH] remove unnecessary call to check_X in SFA and SFA_Fast (#994)
Browse files Browse the repository at this point in the history
* switch test example for pipeline

* switch test example for pipeline

* SFA

* docstrings
  • Loading branch information
TonyBagnall authored Dec 18, 2023
1 parent 30e1dd7 commit 778678e
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 59 deletions.
8 changes: 5 additions & 3 deletions aeon/transformations/collection/dictionary_based/_sfa.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor

from aeon.transformations.collection import BaseCollectionTransformer
from aeon.utils.validation.panel import check_X

# The binning methods to use: equi-depth, equi-width, information gain or kmeans
binning_methods = {
Expand Down Expand Up @@ -90,6 +89,11 @@ class SFA(BaseCollectionTransformer):
Attributes
----------
words: []
words is a list of arrays of integers, one for each case. Each array is
length ``(series_length - window_size+1)``. Each integer is a birt
representation of a word. So, for example if ``word_length=6`` and
``alphabet_size=4`, integer 3235 is bit string 11 00 10 10 00 11,
representing word daccad.
breakpoints: = []
num_insts = 0
num_atts = 0
Expand Down Expand Up @@ -237,8 +241,6 @@ def _fit(self, X, y=None):
"Typed Dictionaries can only handle 15 levels "
"(this is way to many anyway)."
)

X = check_X(X, enforce_univariate=True, coerce_to_numpy=True)
X = X.squeeze(1)

if self.levels > 1:
Expand Down
97 changes: 41 additions & 56 deletions aeon/transformations/collection/dictionary_based/_sfa_fast.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
from sklearn.utils import check_random_state

from aeon.transformations.collection import BaseCollectionTransformer
from aeon.utils.validation.panel import check_X

# The binning methods to use: equi-depth, equi-width, information gain or kmeans
binning_methods = {
Expand Down Expand Up @@ -78,55 +77,45 @@ class SFAFast(BaseCollectionTransformer):
If True, the Fourier coefficient selection is done via the largest variance.
If False, the first Fourier coefficients are selected. Only applicable if
labels are given.
dilation : int, default = 0
When set to dilation > 1, adds dilation to the sliding window operation.
save_words: boolean, default = False
whether to save the words generated for each series (default False)
bigrams: boolean, default = False
whether to create bigrams of SFA words.
feature_selection: {"chi2", "chi2_top_k", "none", "random"}, default: none
Sets the feature selections strategy to be used. Large amounts of memory
may be needed depending on the setting of bigrams (true is more) or
alpha (larger is more).
'chi2' reduces the number of words, keeping those above the 'p_threshold'.
'chi2_top_k' reduces the number of words to at most 'max_feature_count',
dropping values based on p-value.
'random' reduces the number to at most 'max_feature_count',
by randomly selecting features.
'none' does not apply any feature selection and yields large bag of words,
p_threshold: int, default=0.05 (disabled by default)
If feature_selection=chi2 is chosen, feature selection is applied based on
the chi-squared test. This is the p-value threshold to use for chi-squared
test on bag-of-words (lower means more strict). 1 indicates that the test
should not be performed.
max_feature_count: int, default=256
If feature_selection=random is chosen, this parameter defines the number of
randomly chosen unique words used.
skip_grams: boolean, default = False
whether to create skip-grams of SFA words
remove_repeat_words: boolean, default = False
whether to use numerosity reduction (default False)
return_sparse: boolean, default=True
if set to true, a scipy sparse matrix will be returned as BOP model.
If set to false a dense array will be returned as BOP model. Sparse
arrays are much more compact.
n_jobs: int, optional, default = 1
The number of jobs to run in parallel for both `transform`.
``-1`` means using all processors.
return_pandas_data_series: boolean, default = False
set to true to return Pandas Series as a result of transform.
setting to true reduces speed significantly but is required for
automatic test.
dilation : int, default = 0
When set to dilation > 1, adds dilation to the sliding window operation.
save_words : boolean, default = False
whether to save the words generated for each series (default False)
bigrams : boolean, default = False
Whether to create bigrams of SFA words.
feature_selection : {"chi2", "chi2_top_k", "none", "random"}, default: none
Sets the feature selections strategy to be used. Large amounts of memory
may be needed depending on the setting of bigrams (true is more) or
alpha (larger is more).
'chi2' reduces the number of words, keeping those above the 'p_threshold'.
'chi2_top_k' reduces the number of words to at most 'max_feature_count',
dropping values based on p-value.
'random' reduces the number to at most 'max_feature_count',
by randomly selecting features.
'none' does not apply any feature selection and yields large bag of words,
p_threshold : int, default=0.05 (disabled by default)
If feature_selection=chi2 is chosen, feature selection is applied based on
the chi-squared test. This is the p-value threshold to use for chi-squared
test on bag-of-words (lower means more strict). 1 indicates that the test
should not be performed.
max_feature_count : int, default=256
If feature_selection=random is chosen, this parameter defines the number of
randomly chosen unique words used.
skip_grams : boolean, default = False
Whether to create skip-grams of SFA words.
remove_repeat_words : boolean, default = False
Whether to use numerosity reduction.
return_sparse : boolean, default=True
If set to true, a scipy sparse matrix will be returned as BOP model.
If set to false a dense array will be returned as BOP model. Sparse
arrays are much more compact.
n_jobs : int, default = 1
The number of jobs to run in parallel for both `transform`.
``-1`` means using all processors.
return_pandas_data_series : boolean, default = False
set to true to return Pandas Series as a result of transform.
setting to true reduces speed significantly but is required for
automatic test.
Attributes
----------
Expand Down Expand Up @@ -258,8 +247,6 @@ def _fit_transform(self, X, y=None):
self.support = np.arange(self.word_length_actual)
self.letter_bits = np.uint32(math.ceil(math.log2(self.alphabet_size)))
# self.word_bits = self.word_length_actual * self.letter_bits

X = check_X(X, enforce_univariate=True, coerce_to_numpy=True)
X = X.squeeze(1)

if self.dilation >= 1 or self.first_difference:
Expand Down Expand Up @@ -303,7 +290,7 @@ def _fit(self, X, y=None):
Parameters
----------
X : pandas DataFrame or 3d numpy array, input time series.
X : 3d numpy array, input time series.
y : array_like, target values (optional, ignored).
Returns
Expand All @@ -319,14 +306,12 @@ def _transform(self, X, y=None):
Parameters
----------
X : pandas DataFrame or 3d numpy array, input time series.
y : array_like, target values (optional, ignored).
X : 3d numpy array, input time series.
Returns
-------
List of dictionaries containing SFA words
"""
X = check_X(X, enforce_univariate=True, coerce_to_numpy=True)
X = X.squeeze(1)

if self.dilation >= 1 or self.first_difference:
Expand Down

0 comments on commit 778678e

Please sign in to comment.