diff --git a/aeon/transformations/collection/dictionary_based/_sfa.py b/aeon/transformations/collection/dictionary_based/_sfa.py
index a6ffb8b0d7..543052a201 100644
--- a/aeon/transformations/collection/dictionary_based/_sfa.py
+++ b/aeon/transformations/collection/dictionary_based/_sfa.py
@@ -20,7 +20,6 @@
 from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
 
 from aeon.transformations.collection import BaseCollectionTransformer
-from aeon.utils.validation.panel import check_X
 
 # The binning methods to use: equi-depth, equi-width, information gain or kmeans
 binning_methods = {
@@ -90,6 +89,11 @@ class SFA(BaseCollectionTransformer):
     Attributes
     ----------
     words: []
+        words is a list of arrays of integers, one for each case. Each array is
+        length ``(series_length - window_size+1)``. Each integer is a birt
+        representation of a word. So, for example if ``word_length=6`` and
+        ``alphabet_size=4`, integer 3235 is bit string 11 00 10 10 00 11,
+        representing word daccad.
     breakpoints: = []
     num_insts = 0
     num_atts = 0
@@ -237,8 +241,6 @@ def _fit(self, X, y=None):
                 "Typed Dictionaries can only handle 15 levels "
                 "(this is way to many anyway)."
             )
-
-        X = check_X(X, enforce_univariate=True, coerce_to_numpy=True)
         X = X.squeeze(1)
 
         if self.levels > 1:
diff --git a/aeon/transformations/collection/dictionary_based/_sfa_fast.py b/aeon/transformations/collection/dictionary_based/_sfa_fast.py
index 95fcad6330..c9ca494c7e 100644
--- a/aeon/transformations/collection/dictionary_based/_sfa_fast.py
+++ b/aeon/transformations/collection/dictionary_based/_sfa_fast.py
@@ -29,7 +29,6 @@
 from sklearn.utils import check_random_state
 
 from aeon.transformations.collection import BaseCollectionTransformer
-from aeon.utils.validation.panel import check_X
 
 # The binning methods to use: equi-depth, equi-width, information gain or kmeans
 binning_methods = {
@@ -78,55 +77,45 @@ class SFAFast(BaseCollectionTransformer):
         If True, the Fourier coefficient selection is done via the largest variance.
         If False, the first Fourier coefficients are selected. Only applicable if
         labels are given.
-    dilation :            int, default = 0
-            When set to dilation > 1, adds dilation to the sliding window operation.
-
-        save_words:          boolean, default = False
-            whether to save the words generated for each series (default False)
-
-        bigrams:             boolean, default = False
-            whether to create bigrams of SFA words.
-
-        feature_selection: {"chi2", "chi2_top_k", "none", "random"}, default: none
-            Sets the feature selections strategy to be used. Large amounts of memory
-            may be needed depending on the setting of bigrams (true is more) or
-            alpha (larger is more).
-            'chi2' reduces the number of words, keeping those above the 'p_threshold'.
-            'chi2_top_k' reduces the number of words to at most 'max_feature_count',
-            dropping values based on p-value.
-            'random' reduces the number to at most 'max_feature_count',
-            by randomly selecting features.
-            'none' does not apply any feature selection and yields large bag of words,
-
-        p_threshold:  int, default=0.05 (disabled by default)
-            If feature_selection=chi2 is chosen, feature selection is applied based on
-            the chi-squared test. This is the p-value threshold to use for chi-squared
-            test on bag-of-words (lower means more strict). 1 indicates that the test
-            should not be performed.
-
-        max_feature_count:  int, default=256
-            If feature_selection=random is chosen, this parameter defines the number of
-            randomly chosen unique words used.
-
-        skip_grams:     boolean, default = False
-            whether to create skip-grams of SFA words
-
-        remove_repeat_words: boolean, default = False
-            whether to use numerosity reduction (default False)
-
-        return_sparse:  boolean, default=True
-            if set to true, a scipy sparse matrix will be returned as BOP model.
-            If set to false a dense array will be returned as BOP model. Sparse
-            arrays are much more compact.
-
-        n_jobs:     int, optional, default = 1
-            The number of jobs to run in parallel for both `transform`.
-            ``-1`` means using all processors.
-
-        return_pandas_data_series:          boolean, default = False
-            set to true to return Pandas Series as a result of transform.
-            setting to true reduces speed significantly but is required for
-            automatic test.
+    dilation : int, default = 0
+        When set to dilation > 1, adds dilation to the sliding window operation.
+    save_words : boolean, default = False
+        whether to save the words generated for each series (default False)
+    bigrams : boolean, default = False
+        Whether to create bigrams of SFA words.
+    feature_selection : {"chi2", "chi2_top_k", "none", "random"}, default: none
+        Sets the feature selections strategy to be used. Large amounts of memory
+        may be needed depending on the setting of bigrams (true is more) or
+        alpha (larger is more).
+        'chi2' reduces the number of words, keeping those above the 'p_threshold'.
+        'chi2_top_k' reduces the number of words to at most 'max_feature_count',
+        dropping values based on p-value.
+        'random' reduces the number to at most 'max_feature_count',
+        by randomly selecting features.
+        'none' does not apply any feature selection and yields large bag of words,
+    p_threshold :  int, default=0.05 (disabled by default)
+        If feature_selection=chi2 is chosen, feature selection is applied based on
+        the chi-squared test. This is the p-value threshold to use for chi-squared
+        test on bag-of-words (lower means more strict). 1 indicates that the test
+        should not be performed.
+    max_feature_count :  int, default=256
+        If feature_selection=random is chosen, this parameter defines the number of
+        randomly chosen unique words used.
+    skip_grams : boolean, default = False
+        Whether to create skip-grams of SFA words.
+    remove_repeat_words : boolean, default = False
+       Whether to use numerosity reduction.
+    return_sparse :  boolean, default=True
+        If set to true, a scipy sparse matrix will be returned as BOP model.
+        If set to false a dense array will be returned as BOP model. Sparse
+        arrays are much more compact.
+    n_jobs : int, default = 1
+        The number of jobs to run in parallel for both `transform`.
+        ``-1`` means using all processors.
+    return_pandas_data_series : boolean, default = False
+        set to true to return Pandas Series as a result of transform.
+        setting to true reduces speed significantly but is required for
+        automatic test.
 
     Attributes
     ----------
@@ -258,8 +247,6 @@ def _fit_transform(self, X, y=None):
         self.support = np.arange(self.word_length_actual)
         self.letter_bits = np.uint32(math.ceil(math.log2(self.alphabet_size)))
         # self.word_bits = self.word_length_actual * self.letter_bits
-
-        X = check_X(X, enforce_univariate=True, coerce_to_numpy=True)
         X = X.squeeze(1)
 
         if self.dilation >= 1 or self.first_difference:
@@ -303,7 +290,7 @@ def _fit(self, X, y=None):
 
         Parameters
         ----------
-        X : pandas DataFrame or 3d numpy array, input time series.
+        X : 3d numpy array, input time series.
         y : array_like, target values (optional, ignored).
 
         Returns
@@ -319,14 +306,12 @@ def _transform(self, X, y=None):
 
         Parameters
         ----------
-        X : pandas DataFrame or 3d numpy array, input time series.
-        y : array_like, target values (optional, ignored).
+        X : 3d numpy array, input time series.
 
         Returns
         -------
         List of dictionaries containing SFA words
         """
-        X = check_X(X, enforce_univariate=True, coerce_to_numpy=True)
         X = X.squeeze(1)
 
         if self.dilation >= 1 or self.first_difference: