From 002a75093c61b5c5966ea05820ad72ef3baed5ab Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Patrick=20Sch=C3=A4fer?= <patrick.schaefer@hu-berlin.de>
Date: Mon, 26 May 2025 14:07:43 +0200
Subject: [PATCH 01/13] add dynamic alphabet allocation

---
 aeon/distances/tests/test_symbolic_mindist.py |  53 +++++
 .../collection/dictionary_based/_sfa_fast.py  | 181 +++++++++++++-----
 .../collection/dictionary_based/_sfa_whole.py |  25 ++-
 3 files changed, 211 insertions(+), 48 deletions(-)

diff --git a/aeon/distances/tests/test_symbolic_mindist.py b/aeon/distances/tests/test_symbolic_mindist.py
index 2d81e04ddb..35ab500fde 100644
--- a/aeon/distances/tests/test_symbolic_mindist.py
+++ b/aeon/distances/tests/test_symbolic_mindist.py
@@ -170,3 +170,56 @@ def test_sfa_whole_mindist():
         assert mindist_sfa <= ed
         assert mindist_dft_sfa >= mindist_sfa  # a tighter lower bound
         assert mindist_dft_sfa <= ed
+
+
+def test_dynamic_alphabet_allocation():
+    """Test the SFA Min-Distance function."""
+    n_segments = 16
+    alphabet_size = 256
+
+    X_train, _ = load_unit_test("TRAIN")
+    X_test, _ = load_unit_test("TEST")
+
+    X_train = zscore(X_train.squeeze(), axis=1)
+    X_test = zscore(X_test.squeeze(), axis=1)
+    histogram_type = "equi-width"
+
+    # print("Testing")
+    for alphabet_allocation_method in {
+        "linear_scale",
+        "log_scale",
+        "sqrt_scale",
+    }:
+        sfa = SFAWhole(
+            word_length=n_segments,
+            alphabet_size=alphabet_size,
+            binning_method=histogram_type,
+            learn_alphabet_sizes=True,
+            alphabet_allocation_method=alphabet_allocation_method,
+            variance=True,  # True gives a tighter lower bound
+            norm=True,
+        )
+
+        X_train_words, X_train_dfts = sfa.fit_transform(X_train)
+        X_test_words, _ = sfa.transform(X_test)
+
+        for i in range(min(X_train.shape[0], X_test.shape[0])):
+            X = X_train[i].reshape(1, -1)
+            Y = X_test[i].reshape(1, -1)
+
+            # SFA Min-Distance
+            mindist_sfa = mindist_sfa_distance(
+                X_train_words[i], X_test_words[i], sfa.breakpoints
+            )
+
+            # DFT-SFA Min-Distance
+            mindist_dft_sfa = mindist_dft_sfa_distance(
+                X_train_dfts[i], X_test_words[i], sfa.breakpoints
+            )
+
+            # Euclidean Distance
+            ed = np.linalg.norm(X[0] - Y[0])
+
+            assert mindist_sfa <= ed
+            assert mindist_dft_sfa >= mindist_sfa  # a tighter lower bound
+            assert mindist_dft_sfa <= ed
diff --git a/aeon/transformations/collection/dictionary_based/_sfa_fast.py b/aeon/transformations/collection/dictionary_based/_sfa_fast.py
index 0244d73572..d281449ae4 100644
--- a/aeon/transformations/collection/dictionary_based/_sfa_fast.py
+++ b/aeon/transformations/collection/dictionary_based/_sfa_fast.py
@@ -41,6 +41,12 @@
     "quantile",
 }
 
+alphabet_allocation_methods = {
+    "linear_scale",
+    "log_scale",
+    "sqrt_scale",
+}
+
 simplefilter(action="ignore", category=NumbaPendingDeprecationWarning)
 simplefilter(action="ignore", category=NumbaTypeSafetyWarning)
 
@@ -65,6 +71,12 @@ class SFAFast(BaseCollectionTransformer):
         Number of values to discretise each value to.
     window_size : int, default = 12
         Size of window for sliding. Input series length for whole series transform.
+    learn_alphabet_sizes : boolean, default = False
+        If True, dynamic alphabet sizes are learned based on the variance of the Fourier
+        coefficients.
+    alphabet_allocation_method : str, default = None
+        The method used to learn the dynamic alphabet sizes. One of
+        {"linear_scale", "log_scale", "sqrt_scale"}.
     norm : boolean, default = False
         Mean normalise words by dropping first fourier coefficient.
     binning_method : str, default="equi-depth"
@@ -143,6 +155,8 @@ def __init__(
         word_length=8,
         alphabet_size=4,
         window_size=12,
+        learn_alphabet_sizes=False,
+        alphabet_allocation_method=None,
         norm=False,
         binning_method="equi-depth",
         anova=False,
@@ -170,6 +184,7 @@ def __init__(
         self.word_length = word_length
 
         self.alphabet_size = alphabet_size
+        self.alphabet_sizes = None
         self.window_size = window_size
 
         self.norm = norm
@@ -192,11 +207,13 @@ def __init__(
 
         self.n_cases = 0
         self.n_timepoints = 0
-        self.letter_bits = 0
+        self.letter_bits = None
 
         self.dilation = dilation
         self.first_difference = first_difference
         self.sampling_factor = sampling_factor
+        self.learn_alphabet_sizes = learn_alphabet_sizes
+        self.alphabet_allocation_method = alphabet_allocation_method
 
         # Feature selection part
         self.feature_selection = feature_selection
@@ -230,6 +247,14 @@ def _fit_transform(self, X, y=None, return_bag_of_words=True):
                 "Please set either variance or anova Fourier coefficient selection"
             )
 
+        if self.learn_alphabet_sizes and (
+            self.alphabet_allocation_method not in alphabet_allocation_methods
+        ):
+            raise ValueError(
+                "alphabet_allocation_method must be one of: ",
+                alphabet_allocation_methods,
+            )
+
         if self.binning_method not in binning_methods:
             raise TypeError("binning_method must be one of: ", binning_methods)
 
@@ -259,13 +284,16 @@ def _fit_transform(self, X, y=None, return_bag_of_words=True):
             if (self.anova or self.variance) is True
             else self.word_length_actual
         )
+
         # make dft_length an even number (same number of reals and imags)
         self.dft_length = self.dft_length + self.dft_length % 2
         self.word_length_actual = self.word_length_actual + self.word_length_actual % 2
 
         self.support = np.arange(self.word_length_actual)
-        self.letter_bits = np.uint32(math.ceil(math.log2(self.alphabet_size)))
-        # self.word_bits = self.word_length_actual * self.letter_bits
+
+        self.letter_bits = np.zeros(self.word_length_actual, dtype=np.uint32)
+        self.letter_bits[:] = np.uint32(math.ceil(math.log2(self.alphabet_size)))
+
         X = X.squeeze(1)
 
         # subsample the samples
@@ -538,10 +566,10 @@ def _binning(self, X, y=None):
 
         if self.variance:
             # determine variance
-            dft_variance = np.var(dft, axis=0)
+            self.dft_variance = np.var(dft, axis=0)
 
             # select word-length-many indices with the largest variance
-            self.support = np.argsort(-dft_variance)[: self.word_length_actual]
+            self.support = np.argsort(-self.dft_variance)[: self.word_length_actual]
 
             # sort remaining indices
             self.support = np.sort(self.support)
@@ -569,6 +597,41 @@ def _binning(self, X, y=None):
             self.dft_length = np.max(self.support) + 1
             self.dft_length = self.dft_length + self.dft_length % 2  # even
 
+        # learn alphabet sizes
+        if self.learn_alphabet_sizes:
+            if self.dft_variance is None:
+                self.dft_variance = np.var(dft, axis=0)
+
+            symbols = np.log2(self.alphabet_size)
+            self.bit_budget = int(symbols * self.word_length)
+
+            if self.alphabet_allocation_method == "linear_scale":
+                variance = np.sqrt(self.dft_variance[self.support])
+                normed_scale = variance / variance.sum()
+
+            elif self.alphabet_allocation_method == "sqrt_scale":
+                variance = np.sqrt(np.sqrt(self.dft_variance[self.support]))
+                normed_scale = variance / variance.sum()
+
+            elif self.alphabet_allocation_method == "log_scale":
+                variance = np.log2(np.sqrt(self.dft_variance[self.support]) + 1)
+                normed_scale = variance / variance.sum()
+
+            bit_arr_raw = np.floor(normed_scale * symbols * self.word_length).astype(
+                np.uint32
+            )
+
+            # Use at most symbols+1 for each position
+            bit_arr = heal_array(bit_arr_raw, symbols + 1, self.bit_budget)
+
+            self.alphabet_sizes = [int(2 ** bit_arr[i]) for i in range(len(bit_arr))]
+            self.letter_bits = np.array(bit_arr, dtype=np.uint32)
+        else:
+            # use the same alphabet size for all positions
+            self.alphabet_sizes = [self.alphabet_size for _ in range(self.word_length)]
+
+        self.alphabet_sizes = np.array(self.alphabet_sizes)
+
         if self.binning_method == "information-gain":
             return self._igb(dft, y)
         if self.binning_method == "information-gain-mae":
@@ -597,30 +660,9 @@ def _k_bins_discretizer(self, dft):
         return breakpoints
 
     def _mcb(self, dft):
-        breakpoints = np.zeros((self.word_length_actual, self.alphabet_size))
-
-        dft = np.round(dft, 2)
-        for letter in range(self.word_length_actual):
-            column = np.sort(dft[:, letter])
-            bin_index = 0
-
-            # use equi-depth binning
-            if self.binning_method == "equi-depth":
-                target_bin_depth = len(dft) / self.alphabet_size
-
-                for bp in range(self.alphabet_size - 1):
-                    bin_index += target_bin_depth
-                    breakpoints[letter, bp] = column[int(bin_index)]
-
-            # use equi-width binning aka equi-frequency binning
-            elif self.binning_method == "equi-width":
-                target_bin_width = (column[-1] - column[0]) / self.alphabet_size
-
-                for bp in range(self.alphabet_size - 1):
-                    breakpoints[letter, bp] = (bp + 1) * target_bin_width + column[0]
-
-        breakpoints[:, self.alphabet_size - 1] = sys.float_info.max
-        return breakpoints
+        return mcb(
+            dft, self.alphabet_sizes, self.word_length_actual, self.binning_method
+        )
 
     def _igb(self, dft, y):
         breakpoints = np.zeros((self.word_length_actual, self.alphabet_size))
@@ -695,7 +737,7 @@ def get_words(self):
         """
         words = np.squeeze(self.words)
         return np.array(
-            [_get_chars(word, self.word_length, self.alphabet_size) for word in words]
+            [_get_chars(word, self.word_length, self.letter_bits) for word in words]
         )
 
     def transform_words(self, X):
@@ -723,7 +765,6 @@ def transform_words(self, X):
             self.inverse_sqrt_win_size,
             self.lower_bounding or self.lower_bounding_distances,
             self.word_length,
-            self.alphabet_size,
             self.breakpoints,
         )
 
@@ -775,17 +816,16 @@ def __setstate__(self, state):
 
 
 @njit(cache=True, fastmath=True)
-def _get_chars(word, word_length, alphabet_size):
+def _get_chars(word, word_length, letter_bits):
     chars = np.zeros(word_length, dtype=np.uint32)
-    letter_bits = int(np.log2(alphabet_size))
-    mask = (1 << letter_bits) - 1
     for i in range(word_length):
         # Extract the last bits
+        mask = (1 << letter_bits[i]) - 1
         char = word & mask
         chars[-i - 1] = char
 
         # Right shift by to move to the next group of bits
-        word >>= letter_bits
+        word >>= letter_bits[i]
 
     return chars
 
@@ -986,9 +1026,7 @@ def generate_words(
         needed_size += max(0, 2 * dfts.shape[1] - 5 * window_size)
 
     words = np.zeros((dfts.shape[0], needed_size), dtype=np.uint32)
-
-    letter_bits = np.uint32(letter_bits)
-    word_bits = word_length * letter_bits  # dfts.shape[2] * letter_bits
+    word_bits = np.uint32(np.sum(letter_bits))
 
     # special case: binary breakpoints
     if breakpoints.shape[1] == 2:
@@ -1005,7 +1043,7 @@ def generate_words(
         for a in prange(dfts.shape[0]):
             for i in range(word_length):  # range(dfts.shape[2]):
                 words[a, : dfts.shape[1]] = (
-                    words[a, : dfts.shape[1]] << letter_bits
+                    words[a, : dfts.shape[1]] << letter_bits[a]
                 ) | np.digitize(dfts[a, :, i], breakpoints[i], right=True)
 
     # add bigrams
@@ -1091,7 +1129,6 @@ def _mft(
         )
 
     transformed2 = transformed2 * inverse_sqrt_win_size
-
     if lower_bounding:
         transformed2[:, :, 1::2] = transformed2[:, :, 1::2] * -1
 
@@ -1259,12 +1296,45 @@ def create_dict(feature_names, features_idx):
     return relevant_features
 
 
+@njit(fastmath=True, cache=True, parallel=True)
+def mcb(dft, alphabet_sizes, word_length_actual, binning_method):
+    max_alphabet_size = np.max(alphabet_sizes)
+
+    breakpoints = np.zeros((word_length_actual, max_alphabet_size), dtype=np.float32)
+    breakpoints[:, :] = np.finfo(np.float32).max
+    dft = np.round(dft, 2)
+
+    for letter in prange(word_length_actual):
+        curr_alphabet_size = alphabet_sizes[letter]
+        column = np.sort(dft[:, letter])
+        bin_index = 0
+
+        # use equi-depth binning
+        if binning_method == "equi-depth":
+            target_bin_depth = len(dft) / curr_alphabet_size
+
+            for bp in range(curr_alphabet_size - 1):
+                bin_index += target_bin_depth
+                breakpoints[letter, bp] = column[int(bin_index)]
+
+        # use equi-width binning aka equi-frequency binning
+        elif binning_method == "equi-width":
+            target_bin_width = (column[-1] - column[0]) / curr_alphabet_size
+
+            for bp in range(curr_alphabet_size - 1):
+                breakpoints[letter, bp] = (bp + 1) * target_bin_width + column[0]
+
+    return breakpoints
+
+
 @njit(fastmath=True, cache=True)
 def shorten_words(words, amount, letter_bits):
     new_words = np.zeros((words.shape[0], words.shape[1]), dtype=np.uint32)
 
     # Unigrams
-    shift_len = amount * letter_bits
+    shift_len = np.sum(
+        letter_bits[:amount]
+    )  # this does not work for variable-alphabet size
     for j in prange(words.shape[1]):
         # shorten a word by set amount of letters
         new_words[:, j] = words[:, j] >> shift_len
@@ -1291,7 +1361,6 @@ def _transform_words_case(
     inverse_sqrt_win_size,
     lower_bounding,
     word_length,
-    alphabet_size,
     breakpoints,
 ):
     dfts = _mft(
@@ -1311,9 +1380,35 @@ def _transform_words_case(
     for x in prange(dfts.shape[0]):
         for window in prange(dfts.shape[1]):
             for i in prange(word_length):
-                for bp in range(alphabet_size):
+                for bp in range(breakpoints.shape[1]):
                     if dfts[x, window, i] <= breakpoints[i][bp]:
                         words[x, window, i] = bp
                         break
 
     return words, dfts
+
+
+@njit(fastmath=True, cache=True)
+def heal_array(bit_array, max_val, budget):
+    bit_array = bit_array.copy()
+
+    # cap values beyond max_val
+    for i in range(len(bit_array)):
+        bit_array[i] = min(max_val, bit_array[i])
+
+    if bit_array.sum() > budget:
+        # print("Error", bit_array, bit_array.sum(), budget, bit_array.sum() <= budget)
+        assert bit_array.sum() <= budget
+
+    # heal the array to have the correct sum == budget
+    if bit_array.sum() != budget:
+        diff = budget - bit_array.sum()
+        while diff > 0:
+            for i in range(len(bit_array)):
+                if bit_array[i] < max_val:
+                    bit_array[i] += 1
+                    diff -= 1
+                if diff == 0:
+                    break
+
+    return bit_array
diff --git a/aeon/transformations/collection/dictionary_based/_sfa_whole.py b/aeon/transformations/collection/dictionary_based/_sfa_whole.py
index 2e9f3df86a..083701dddb 100644
--- a/aeon/transformations/collection/dictionary_based/_sfa_whole.py
+++ b/aeon/transformations/collection/dictionary_based/_sfa_whole.py
@@ -14,12 +14,14 @@ class SFAWhole(SFAFast):
     """Symbolic Fourier Approximation (SFA) Transformer.
 
     A whole series transform for SFA which holds the lower bounding lemma, see [1].
+    SOFA [2] extends on SFA by introducing variance based feature selection, and dynamic
+    alphabet sizes. It produces a significantly tighter lower bound.
 
     It is implemented as a wrapper for the SFA-Fast transformer, the latter implements
     subsequence-based SFA extraction.
 
     This wrapper reduces non-needed parameters, and sets some usefull defaults for
-    lower bounding.
+    the tightest possible lower bounding.
 
     Parameters
     ----------
@@ -27,9 +29,15 @@ class SFAWhole(SFAFast):
         Length of word to shorten window to (using DFT).
     alphabet_size : int, default = 4
         Number of values to discretise each value to.
+    learn_alphabet_sizes : boolean, default = True
+        If True, dynamic alphabet sizes are learned based on the variance of the Fourier
+        coefficients.
+    alphabet_allocation_method : str, default = linear_scale
+        The method used to learn the dynamic alphabet sizes. One of
+        {"linear_scale", "log_scale", "sqrt_scale"}.
     norm : boolean, default = False
         Mean normalise words by dropping first fourier coefficient.
-    binning_method : str, default="equi-depth"
+    binning_method : str, default="equi-width"
         The binning method used to derive the breakpoints. One of {"equi-depth",
         "equi-width", "information-gain", "information-gain-mae", "kmeans", "quantile"},
     variance : boolean, default = False
@@ -51,9 +59,12 @@ class SFAWhole(SFAFast):
 
     References
     ----------
-    .. [1] Schäfer, Patrick, and Mikael Högqvist. "SFA: a symbolic fourier approximation
+    .. [1] P. Schäfer, and M. Högqvist. "SFA: a symbolic fourier approximation
     and  index for similarity search in high dimensional datasets." Proceedings of the
     15th international conference on extending database technology. 2012.
+    .. [2] P. Schäfer, J. Brand, U. Leser, B. Peng and T. Palpanas, "Fast and Exact
+    Similarity Search in Less than a Blink of an Eye," in 2025 IEEE 41st International
+    Conference on Data Engineering (ICDE), Hong Kong, 2025, pp. 2464-2477.
     """
 
     _tags = {
@@ -66,8 +77,10 @@ def __init__(
         self,
         word_length=8,
         alphabet_size=4,
+        learn_alphabet_sizes=True,
+        alphabet_allocation_method="linear_scale",
         norm=True,
-        binning_method="equi-depth",
+        binning_method="equi-width",
         variance=True,
         sampling_factor=None,
         random_state=None,
@@ -77,6 +90,8 @@ def __init__(
             word_length=word_length,
             alphabet_size=alphabet_size,
             norm=norm,
+            learn_alphabet_sizes=learn_alphabet_sizes,
+            alphabet_allocation_method=alphabet_allocation_method,
             binning_method=binning_method,
             variance=variance,
             sampling_factor=sampling_factor,
@@ -92,7 +107,7 @@ def __init__(
             skip_grams=False,
             remove_repeat_words=False,
             return_sparse=False,
-            window_size=None,  # set in fit
+            window_size=None,  # set in fit - do not remove
         )
 
     def _fit_transform(self, X, y=None):

From 21dffb752c0a4e72b25ee537e9649b6b338633c5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Patrick=20Sch=C3=A4fer?= <patrick.schaefer@hu-berlin.de>
Date: Mon, 26 May 2025 14:18:45 +0200
Subject: [PATCH 02/13] update default

---
 .../transformations/collection/dictionary_based/_sfa_whole.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/aeon/transformations/collection/dictionary_based/_sfa_whole.py b/aeon/transformations/collection/dictionary_based/_sfa_whole.py
index 083701dddb..51259cefef 100644
--- a/aeon/transformations/collection/dictionary_based/_sfa_whole.py
+++ b/aeon/transformations/collection/dictionary_based/_sfa_whole.py
@@ -32,7 +32,7 @@ class SFAWhole(SFAFast):
     learn_alphabet_sizes : boolean, default = True
         If True, dynamic alphabet sizes are learned based on the variance of the Fourier
         coefficients.
-    alphabet_allocation_method : str, default = linear_scale
+    alphabet_allocation_method : str, default = sqrt_scale
         The method used to learn the dynamic alphabet sizes. One of
         {"linear_scale", "log_scale", "sqrt_scale"}.
     norm : boolean, default = False
@@ -78,7 +78,7 @@ def __init__(
         word_length=8,
         alphabet_size=4,
         learn_alphabet_sizes=True,
-        alphabet_allocation_method="linear_scale",
+        alphabet_allocation_method="sqrt_scale",
         norm=True,
         binning_method="equi-width",
         variance=True,

From 57c5b77e1dfc49fc1426125d06060f04fac3deb6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Patrick=20Sch=C3=A4fer?= <patrick.schaefer@hu-berlin.de>
Date: Mon, 26 May 2025 14:36:36 +0200
Subject: [PATCH 03/13] fix test for availability of variable

---
 aeon/transformations/collection/dictionary_based/_sfa_fast.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aeon/transformations/collection/dictionary_based/_sfa_fast.py b/aeon/transformations/collection/dictionary_based/_sfa_fast.py
index d281449ae4..d75f575e7c 100644
--- a/aeon/transformations/collection/dictionary_based/_sfa_fast.py
+++ b/aeon/transformations/collection/dictionary_based/_sfa_fast.py
@@ -599,7 +599,7 @@ def _binning(self, X, y=None):
 
         # learn alphabet sizes
         if self.learn_alphabet_sizes:
-            if self.dft_variance is None:
+            if not hasattr(self, "dft_variance"):
                 self.dft_variance = np.var(dft, axis=0)
 
             symbols = np.log2(self.alphabet_size)

From bdffc39462f8bc8bdccbc55ef58782363677a9da Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Patrick=20Sch=C3=A4fer?= <patrick.schaefer@hu-berlin.de>
Date: Mon, 26 May 2025 21:19:14 +0200
Subject: [PATCH 04/13] bugfix

---
 .../collection/dictionary_based/_sfa_fast.py   | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/aeon/transformations/collection/dictionary_based/_sfa_fast.py b/aeon/transformations/collection/dictionary_based/_sfa_fast.py
index d75f575e7c..223a5324ef 100644
--- a/aeon/transformations/collection/dictionary_based/_sfa_fast.py
+++ b/aeon/transformations/collection/dictionary_based/_sfa_fast.py
@@ -598,7 +598,7 @@ def _binning(self, X, y=None):
             self.dft_length = self.dft_length + self.dft_length % 2  # even
 
         # learn alphabet sizes
-        if self.learn_alphabet_sizes:
+        if self.learn_alphabet_sizes and self.alphabet_allocation_method:
             if not hasattr(self, "dft_variance"):
                 self.dft_variance = np.var(dft, axis=0)
 
@@ -628,7 +628,9 @@ def _binning(self, X, y=None):
             self.letter_bits = np.array(bit_arr, dtype=np.uint32)
         else:
             # use the same alphabet size for all positions
-            self.alphabet_sizes = [self.alphabet_size for _ in range(self.word_length)]
+            self.alphabet_sizes = [
+                self.alphabet_size for _ in range(self.word_length_actual)
+            ]
 
         self.alphabet_sizes = np.array(self.alphabet_sizes)
 
@@ -830,7 +832,7 @@ def _get_chars(word, word_length, letter_bits):
     return chars
 
 
-@njit(fastmath=True, cache=True)
+@njit(fastmath=True, cache=True, parallel=True)
 def _binning_dft(
     X,
     window_size,
@@ -1013,7 +1015,7 @@ def _get_phis(window_size, length):
     return phis
 
 
-@njit(fastmath=True, cache=True)
+@njit(fastmath=True, cache=True, parallel=True)
 def generate_words(
     dfts, bigrams, skip_grams, window_size, breakpoints, word_length, letter_bits
 ):
@@ -1043,7 +1045,7 @@ def generate_words(
         for a in prange(dfts.shape[0]):
             for i in range(word_length):  # range(dfts.shape[2]):
                 words[a, : dfts.shape[1]] = (
-                    words[a, : dfts.shape[1]] << letter_bits[a]
+                    words[a, : dfts.shape[1]] << letter_bits[i]
                 ) | np.digitize(dfts[a, :, i], breakpoints[i], right=True)
 
     # add bigrams
@@ -1203,7 +1205,7 @@ def create_feature_names(sfa_words):
     return feature_names
 
 
-@njit(cache=True, fastmath=True)
+@njit(cache=True, fastmath=True, parallel=True)
 def create_bag_none(
     X_index, breakpoints, n_cases, sfa_words, word_length, remove_repeat_words
 ):
@@ -1251,7 +1253,7 @@ def create_bag_feature_selection(
     return all_win_words, relevant_features
 
 
-@njit(cache=True, fastmath=True)
+@njit(cache=True, fastmath=True, parallel=True)
 def create_bag_transform(
     X_index,
     feature_count,
@@ -1327,7 +1329,7 @@ def mcb(dft, alphabet_sizes, word_length_actual, binning_method):
     return breakpoints
 
 
-@njit(fastmath=True, cache=True)
+@njit(fastmath=True, cache=True, parallel=True)
 def shorten_words(words, amount, letter_bits):
     new_words = np.zeros((words.shape[0], words.shape[1]), dtype=np.uint32)
 

From 0ee4ec96ad1b4e831b3af775b6696edcb5694aa3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Patrick=20Sch=C3=A4fer?= <patrick.schaefer@hu-berlin.de>
Date: Mon, 26 May 2025 22:02:23 +0200
Subject: [PATCH 05/13] bugfix

---
 aeon/transformations/collection/dictionary_based/_sfa_fast.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aeon/transformations/collection/dictionary_based/_sfa_fast.py b/aeon/transformations/collection/dictionary_based/_sfa_fast.py
index 223a5324ef..89d6216a1e 100644
--- a/aeon/transformations/collection/dictionary_based/_sfa_fast.py
+++ b/aeon/transformations/collection/dictionary_based/_sfa_fast.py
@@ -1205,7 +1205,7 @@ def create_feature_names(sfa_words):
     return feature_names
 
 
-@njit(cache=True, fastmath=True, parallel=True)
+@njit(cache=True, fastmath=True)  # TODO does not work parallel=True ??
 def create_bag_none(
     X_index, breakpoints, n_cases, sfa_words, word_length, remove_repeat_words
 ):

From 884b4b71137c1cc440b6fd22da2afafc0ec02d61 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Patrick=20Sch=C3=A4fer?= <patrick.schaefer@hu-berlin.de>
Date: Fri, 30 May 2025 10:37:46 +0200
Subject: [PATCH 06/13] extend test case and fix sfafast

---
 aeon/distances/tests/test_symbolic_mindist.py |  1 +
 .../collection/dictionary_based/_sfa_fast.py  | 32 +++++++++++--------
 2 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/aeon/distances/tests/test_symbolic_mindist.py b/aeon/distances/tests/test_symbolic_mindist.py
index 35ab500fde..ebc615cf10 100644
--- a/aeon/distances/tests/test_symbolic_mindist.py
+++ b/aeon/distances/tests/test_symbolic_mindist.py
@@ -220,6 +220,7 @@ def test_dynamic_alphabet_allocation():
             # Euclidean Distance
             ed = np.linalg.norm(X[0] - Y[0])
 
+            assert np.mean(np.log2(sfa.alphabet_sizes)) == np.log2(alphabet_size)
             assert mindist_sfa <= ed
             assert mindist_dft_sfa >= mindist_sfa  # a tighter lower bound
             assert mindist_dft_sfa <= ed
diff --git a/aeon/transformations/collection/dictionary_based/_sfa_fast.py b/aeon/transformations/collection/dictionary_based/_sfa_fast.py
index 89d6216a1e..da09160bc0 100644
--- a/aeon/transformations/collection/dictionary_based/_sfa_fast.py
+++ b/aeon/transformations/collection/dictionary_based/_sfa_fast.py
@@ -607,22 +607,20 @@ def _binning(self, X, y=None):
 
             if self.alphabet_allocation_method == "linear_scale":
                 variance = np.sqrt(self.dft_variance[self.support])
-                normed_scale = variance / variance.sum()
+                normed_scale = variance / variance.mean()
 
             elif self.alphabet_allocation_method == "sqrt_scale":
                 variance = np.sqrt(np.sqrt(self.dft_variance[self.support]))
-                normed_scale = variance / variance.sum()
-
+                normed_scale = variance / variance.mean()
             elif self.alphabet_allocation_method == "log_scale":
                 variance = np.log2(np.sqrt(self.dft_variance[self.support]) + 1)
-                normed_scale = variance / variance.sum()
+                normed_scale = variance / variance.mean()
 
-            bit_arr_raw = np.floor(normed_scale * symbols * self.word_length).astype(
+            # Use at most symbols+1 for each position, and check if sum is correct
+            bit_arr = np.ceil(normed_scale * np.log2(self.alphabet_size)).astype(
                 np.uint32
             )
-
-            # Use at most symbols+1 for each position
-            bit_arr = heal_array(bit_arr_raw, symbols + 1, self.bit_budget)
+            bit_arr = heal_array(bit_arr, symbols + 1, self.bit_budget)
 
             self.alphabet_sizes = [int(2 ** bit_arr[i]) for i in range(len(bit_arr))]
             self.letter_bits = np.array(bit_arr, dtype=np.uint32)
@@ -1394,16 +1392,24 @@ def _transform_words_case(
 def heal_array(bit_array, max_val, budget):
     bit_array = bit_array.copy()
 
-    # cap values beyond max_val
+    # cap values beyond max_val and below 1
     for i in range(len(bit_array)):
         bit_array[i] = min(max_val, bit_array[i])
+        bit_array[i] = max(1, bit_array[i])
 
+    # to large: heal the array to have the correct sum == budget
     if bit_array.sum() > budget:
-        # print("Error", bit_array, bit_array.sum(), budget, bit_array.sum() <= budget)
-        assert bit_array.sum() <= budget
+        diff = bit_array.sum() - budget
+        while diff > 0:
+            for i in range(len(bit_array) - 1, 0, -1):
+                if bit_array[i] > 1:
+                    bit_array[i] -= 1
+                    diff -= 1
+                if diff == 0:
+                    break
 
-    # heal the array to have the correct sum == budget
-    if bit_array.sum() != budget:
+    # to small: heal the array to have the correct sum == budget
+    if bit_array.sum() < budget:
         diff = budget - bit_array.sum()
         while diff > 0:
             for i in range(len(bit_array)):

From ebc632114e864a2b931a5460a41a270143df7e6b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Patrick=20Sch=C3=A4fer?= <patrick.schaefer@hu-berlin.de>
Date: Tue, 3 Jun 2025 16:52:31 +0200
Subject: [PATCH 07/13] change logic

---
 .../collection/dictionary_based/_sfa_fast.py  | 45 ++++++++++---------
 1 file changed, 25 insertions(+), 20 deletions(-)

diff --git a/aeon/transformations/collection/dictionary_based/_sfa_fast.py b/aeon/transformations/collection/dictionary_based/_sfa_fast.py
index da09160bc0..ff37a5162b 100644
--- a/aeon/transformations/collection/dictionary_based/_sfa_fast.py
+++ b/aeon/transformations/collection/dictionary_based/_sfa_fast.py
@@ -616,11 +616,9 @@ def _binning(self, X, y=None):
                 variance = np.log2(np.sqrt(self.dft_variance[self.support]) + 1)
                 normed_scale = variance / variance.mean()
 
-            # Use at most symbols+1 for each position, and check if sum is correct
-            bit_arr = np.ceil(normed_scale * np.log2(self.alphabet_size)).astype(
-                np.uint32
-            )
-            bit_arr = heal_array(bit_arr, symbols + 1, self.bit_budget)
+            # Use at most symbols+2 for each position, and check if sum is correct
+            bit_arr = np.ceil(normed_scale * symbols).astype(np.uint32)
+            bit_arr = heal_array(bit_arr, symbols + 2, self.bit_budget)
 
             self.alphabet_sizes = [int(2 ** bit_arr[i]) for i in range(len(bit_arr))]
             self.letter_bits = np.array(bit_arr, dtype=np.uint32)
@@ -1393,30 +1391,37 @@ def heal_array(bit_array, max_val, budget):
     bit_array = bit_array.copy()
 
     # cap values beyond max_val and below 1
-    for i in range(len(bit_array)):
-        bit_array[i] = min(max_val, bit_array[i])
-        bit_array[i] = max(1, bit_array[i])
-
-    # to large: heal the array to have the correct sum == budget
-    if bit_array.sum() > budget:
-        diff = bit_array.sum() - budget
-        while diff > 0:
-            for i in range(len(bit_array) - 1, 0, -1):
-                if bit_array[i] > 1:
+    bit_array = np.minimum(max_val, np.maximum(1, bit_array))
+    current_sum = bit_array.sum()
+
+    # too large: heal the array to have the correct sum == budget
+    if current_sum > budget:
+        diff = current_sum - budget
+        change = True
+        while (diff > 0) and change:
+            change = False
+            for i in range(len(bit_array) - 1, -1, -1):
+                if bit_array[i] >= 1:
                     bit_array[i] -= 1
                     diff -= 1
+                    change = True
                 if diff == 0:
                     break
-
-    # to small: heal the array to have the correct sum == budget
-    if bit_array.sum() < budget:
-        diff = budget - bit_array.sum()
-        while diff > 0:
+        assert diff == 0, "Cannot reduce sum to budget"
+
+    # too small: heal the array to have the correct sum == budget
+    if current_sum < budget:
+        diff = budget - current_sum
+        change = True
+        while (diff > 0) and change:
+            change = False
             for i in range(len(bit_array)):
                 if bit_array[i] < max_val:
                     bit_array[i] += 1
                     diff -= 1
+                    change = True
                 if diff == 0:
                     break
+        assert diff == 0, "Cannot increase sum to budget"
 
     return bit_array

From 598a0de4f62d2232114106db6f649a5863e9abdf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Patrick=20Sch=C3=A4fer?= <patrick.schaefer@hu-berlin.de>
Date: Wed, 4 Jun 2025 12:41:30 +0200
Subject: [PATCH 08/13] refactor code

---
 .../collection/dictionary_based/_sfa_fast.py  | 77 +++++++++----------
 1 file changed, 37 insertions(+), 40 deletions(-)

diff --git a/aeon/transformations/collection/dictionary_based/_sfa_fast.py b/aeon/transformations/collection/dictionary_based/_sfa_fast.py
index ff37a5162b..0e641254db 100644
--- a/aeon/transformations/collection/dictionary_based/_sfa_fast.py
+++ b/aeon/transformations/collection/dictionary_based/_sfa_fast.py
@@ -606,19 +606,16 @@ def _binning(self, X, y=None):
             self.bit_budget = int(symbols * self.word_length)
 
             if self.alphabet_allocation_method == "linear_scale":
-                variance = np.sqrt(self.dft_variance[self.support])
+                variance = self.dft_variance[self.support]
                 normed_scale = variance / variance.mean()
-
             elif self.alphabet_allocation_method == "sqrt_scale":
-                variance = np.sqrt(np.sqrt(self.dft_variance[self.support]))
+                variance = np.sqrt(self.dft_variance[self.support])
                 normed_scale = variance / variance.mean()
             elif self.alphabet_allocation_method == "log_scale":
-                variance = np.log2(np.sqrt(self.dft_variance[self.support]) + 1)
+                variance = np.log2((self.dft_variance[self.support]) + 1)
                 normed_scale = variance / variance.mean()
 
-            # Use at most symbols+2 for each position, and check if sum is correct
-            bit_arr = np.ceil(normed_scale * symbols).astype(np.uint32)
-            bit_arr = heal_array(bit_arr, symbols + 2, self.bit_budget)
+            bit_arr = assign_bits_dynamically(normed_scale, self.bit_budget)
 
             self.alphabet_sizes = [int(2 ** bit_arr[i]) for i in range(len(bit_arr))]
             self.letter_bits = np.array(bit_arr, dtype=np.uint32)
@@ -1387,41 +1384,41 @@ def _transform_words_case(
 
 
 @njit(fastmath=True, cache=True)
-def heal_array(bit_array, max_val, budget):
-    bit_array = bit_array.copy()
+def assign_bits_dynamically(variance, budget, max_bit_val=9):
+    """Assign bits dynamically based on variance and budget.
+
+    The goal is to maximize the variance covered by each symbol.
+
+    Parameters
+    ----------
+    variance : 1d numpy array
+        the variance for each position.
+    budget :   float
+        the maximal number of bits to assign.
+    max_bit_val : int, optional, default=9
+        the maximum number of bits that can be assigned to a single position.
 
-    # cap values beyond max_val and below 1
-    bit_array = np.minimum(max_val, np.maximum(1, bit_array))
+    Returns
+    -------
+    bit_array : 1d numpy array
+        the number of bits assigned to each position.
+    """
+    bit_array = np.zeros(len(variance), dtype=np.int32)
+    bit_array[:] = 0
+
+    improve = variance.copy() / 2.0
+
+    # assign bits to positions
     current_sum = bit_array.sum()
+    while current_sum < budget:
+        best_pos = np.argmax(improve)
+        bit_array[best_pos] += 1
+        current_sum += 1
+
+        # recalculate the improvement
+        improve[best_pos] = variance[best_pos] / (2 ** (bit_array[best_pos] + 1))
 
-    # too large: heal the array to have the correct sum == budget
-    if current_sum > budget:
-        diff = current_sum - budget
-        change = True
-        while (diff > 0) and change:
-            change = False
-            for i in range(len(bit_array) - 1, -1, -1):
-                if bit_array[i] >= 1:
-                    bit_array[i] -= 1
-                    diff -= 1
-                    change = True
-                if diff == 0:
-                    break
-        assert diff == 0, "Cannot reduce sum to budget"
-
-    # too small: heal the array to have the correct sum == budget
-    if current_sum < budget:
-        diff = budget - current_sum
-        change = True
-        while (diff > 0) and change:
-            change = False
-            for i in range(len(bit_array)):
-                if bit_array[i] < max_val:
-                    bit_array[i] += 1
-                    diff -= 1
-                    change = True
-                if diff == 0:
-                    break
-        assert diff == 0, "Cannot increase sum to budget"
+        if bit_array[best_pos] == max_bit_val:
+            improve[best_pos] = 0
 
     return bit_array

From aaa741239f928901fd7cc015ea848bc2d39bab01 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Patrick=20Sch=C3=A4fer?= <patrick.schaefer@hu-berlin.de>
Date: Wed, 4 Jun 2025 12:42:16 +0200
Subject: [PATCH 09/13] refactor code

---
 aeon/distances/tests/test_symbolic_mindist.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aeon/distances/tests/test_symbolic_mindist.py b/aeon/distances/tests/test_symbolic_mindist.py
index ebc615cf10..ac34d5ec80 100644
--- a/aeon/distances/tests/test_symbolic_mindist.py
+++ b/aeon/distances/tests/test_symbolic_mindist.py
@@ -175,7 +175,7 @@ def test_sfa_whole_mindist():
 def test_dynamic_alphabet_allocation():
     """Test the SFA Min-Distance function."""
     n_segments = 16
-    alphabet_size = 256
+    alphabet_size = 64
 
     X_train, _ = load_unit_test("TRAIN")
     X_test, _ = load_unit_test("TEST")

From 4204182ee66632497090b83298867dd94873b905 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Patrick=20Sch=C3=A4fer?= <patrick.schaefer@hu-berlin.de>
Date: Wed, 4 Jun 2025 12:47:36 +0200
Subject: [PATCH 10/13] change default

---
 .../transformations/collection/dictionary_based/_sfa_whole.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/aeon/transformations/collection/dictionary_based/_sfa_whole.py b/aeon/transformations/collection/dictionary_based/_sfa_whole.py
index 51259cefef..083701dddb 100644
--- a/aeon/transformations/collection/dictionary_based/_sfa_whole.py
+++ b/aeon/transformations/collection/dictionary_based/_sfa_whole.py
@@ -32,7 +32,7 @@ class SFAWhole(SFAFast):
     learn_alphabet_sizes : boolean, default = True
         If True, dynamic alphabet sizes are learned based on the variance of the Fourier
         coefficients.
-    alphabet_allocation_method : str, default = sqrt_scale
+    alphabet_allocation_method : str, default = linear_scale
         The method used to learn the dynamic alphabet sizes. One of
         {"linear_scale", "log_scale", "sqrt_scale"}.
     norm : boolean, default = False
@@ -78,7 +78,7 @@ def __init__(
         word_length=8,
         alphabet_size=4,
         learn_alphabet_sizes=True,
-        alphabet_allocation_method="sqrt_scale",
+        alphabet_allocation_method="linear_scale",
         norm=True,
         binning_method="equi-width",
         variance=True,

From a02b5f645f8dc4833fa2492496b2f5226dbac61e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Patrick=20Sch=C3=A4fer?= <patrick.schaefer@hu-berlin.de>
Date: Wed, 4 Jun 2025 12:56:07 +0200
Subject: [PATCH 11/13] tidy up code

---
 aeon/transformations/collection/dictionary_based/_sfa_fast.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aeon/transformations/collection/dictionary_based/_sfa_fast.py b/aeon/transformations/collection/dictionary_based/_sfa_fast.py
index 0e641254db..8b14f9ccdc 100644
--- a/aeon/transformations/collection/dictionary_based/_sfa_fast.py
+++ b/aeon/transformations/collection/dictionary_based/_sfa_fast.py
@@ -1198,7 +1198,7 @@ def create_feature_names(sfa_words):
     return feature_names
 
 
-@njit(cache=True, fastmath=True)  # TODO does not work parallel=True ??
+@njit(cache=True, fastmath=True)  # does not work with parallel=True ??
 def create_bag_none(
     X_index, breakpoints, n_cases, sfa_words, word_length, remove_repeat_words
 ):

From 3e9c1ea6de31c6bbd79afa9f83db7a02fbdb3d88 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Patrick=20Sch=C3=A4fer?= <patrick.schaefer@hu-berlin.de>
Date: Fri, 6 Jun 2025 12:19:55 +0200
Subject: [PATCH 12/13] updates

---
 .../collection/dictionary_based/_sfa_fast.py           | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/aeon/transformations/collection/dictionary_based/_sfa_fast.py b/aeon/transformations/collection/dictionary_based/_sfa_fast.py
index 8b14f9ccdc..d15efbfdb8 100644
--- a/aeon/transformations/collection/dictionary_based/_sfa_fast.py
+++ b/aeon/transformations/collection/dictionary_based/_sfa_fast.py
@@ -615,10 +615,10 @@ def _binning(self, X, y=None):
                 variance = np.log2((self.dft_variance[self.support]) + 1)
                 normed_scale = variance / variance.mean()
 
-            bit_arr = assign_bits_dynamically(normed_scale, self.bit_budget)
-
-            self.alphabet_sizes = [int(2 ** bit_arr[i]) for i in range(len(bit_arr))]
-            self.letter_bits = np.array(bit_arr, dtype=np.uint32)
+            self.letter_bits = assign_bits_dynamically(normed_scale, self.bit_budget)
+            self.alphabet_sizes = [
+                int(2 ** self.letter_bits[i]) for i in range(len(self.letter_bits))
+            ]
         else:
             # use the same alphabet size for all positions
             self.alphabet_sizes = [
@@ -1403,7 +1403,7 @@ def assign_bits_dynamically(variance, budget, max_bit_val=9):
     bit_array : 1d numpy array
         the number of bits assigned to each position.
     """
-    bit_array = np.zeros(len(variance), dtype=np.int32)
+    bit_array = np.zeros(len(variance), dtype=np.uint32)
     bit_array[:] = 0
 
     improve = variance.copy() / 2.0

From 0e8cf701389f55af867543ec622b0e8e8ef27563 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Patrick=20Sch=C3=A4fer?= <patrick.schaefer@hu-berlin.de>
Date: Mon, 30 Jun 2025 15:00:59 +0200
Subject: [PATCH 13/13] refactor

---
 aeon/distances/tests/test_symbolic_mindist.py      |  1 -
 .../collection/dictionary_based/_sfa_fast.py       | 14 +++++---------
 .../collection/dictionary_based/_sfa_whole.py      |  5 -----
 3 files changed, 5 insertions(+), 15 deletions(-)

diff --git a/aeon/distances/tests/test_symbolic_mindist.py b/aeon/distances/tests/test_symbolic_mindist.py
index ac34d5ec80..c545b07140 100644
--- a/aeon/distances/tests/test_symbolic_mindist.py
+++ b/aeon/distances/tests/test_symbolic_mindist.py
@@ -194,7 +194,6 @@ def test_dynamic_alphabet_allocation():
             word_length=n_segments,
             alphabet_size=alphabet_size,
             binning_method=histogram_type,
-            learn_alphabet_sizes=True,
             alphabet_allocation_method=alphabet_allocation_method,
             variance=True,  # True gives a tighter lower bound
             norm=True,
diff --git a/aeon/transformations/collection/dictionary_based/_sfa_fast.py b/aeon/transformations/collection/dictionary_based/_sfa_fast.py
index de921304fe..1f8afe8db4 100644
--- a/aeon/transformations/collection/dictionary_based/_sfa_fast.py
+++ b/aeon/transformations/collection/dictionary_based/_sfa_fast.py
@@ -71,9 +71,6 @@ class SFAFast(BaseCollectionTransformer):
         Number of values to discretise each value to.
     window_size : int, default = 12
         Size of window for sliding. Input series length for whole series transform.
-    learn_alphabet_sizes : boolean, default = False
-        If True, dynamic alphabet sizes are learned based on the variance of the Fourier
-        coefficients.
     alphabet_allocation_method : str, default = None
         The method used to learn the dynamic alphabet sizes. One of
         {"linear_scale", "log_scale", "sqrt_scale"}.
@@ -84,12 +81,12 @@ class SFAFast(BaseCollectionTransformer):
         "equi-width", "information-gain", "information-gain-mae", "kmeans"},
     anova : boolean, default = False
         If True, the Fourier coefficient selection is done via a one-way ANOVA test.
-        If False, the first Fourier coefficients are selected. Only applicable if
-        labels are given.
+        If False, the first Fourier coefficients are selected.
+        It is a supervised feature selection strategy. Only applicable if labels given.
     variance : boolean, default = False
         If True, the Fourier coefficient selection is done via the largest variance.
-        If False, the first Fourier coefficients are selected. Only applicable if
-        labels are given.
+        If False, the first Fourier coefficients are selected.
+        It is an unsupervised feature selection strategy.
     dilation : int, default = 0
         When set to dilation > 1, adds dilation to the sliding window operation.
     save_words : boolean, default = False
@@ -155,7 +152,6 @@ def __init__(
         word_length=8,
         alphabet_size=4,
         window_size=12,
-        learn_alphabet_sizes=False,
         alphabet_allocation_method=None,
         norm=False,
         binning_method="equi-depth",
@@ -212,8 +208,8 @@ def __init__(
         self.dilation = dilation
         self.first_difference = first_difference
         self.sampling_factor = sampling_factor
-        self.learn_alphabet_sizes = learn_alphabet_sizes
         self.alphabet_allocation_method = alphabet_allocation_method
+        self.learn_alphabet_sizes = self.alphabet_allocation_method is not None
 
         # Feature selection part
         self.feature_selection = feature_selection
diff --git a/aeon/transformations/collection/dictionary_based/_sfa_whole.py b/aeon/transformations/collection/dictionary_based/_sfa_whole.py
index 083701dddb..0f7e4aad2a 100644
--- a/aeon/transformations/collection/dictionary_based/_sfa_whole.py
+++ b/aeon/transformations/collection/dictionary_based/_sfa_whole.py
@@ -29,9 +29,6 @@ class SFAWhole(SFAFast):
         Length of word to shorten window to (using DFT).
     alphabet_size : int, default = 4
         Number of values to discretise each value to.
-    learn_alphabet_sizes : boolean, default = True
-        If True, dynamic alphabet sizes are learned based on the variance of the Fourier
-        coefficients.
     alphabet_allocation_method : str, default = linear_scale
         The method used to learn the dynamic alphabet sizes. One of
         {"linear_scale", "log_scale", "sqrt_scale"}.
@@ -77,7 +74,6 @@ def __init__(
         self,
         word_length=8,
         alphabet_size=4,
-        learn_alphabet_sizes=True,
         alphabet_allocation_method="linear_scale",
         norm=True,
         binning_method="equi-width",
@@ -90,7 +86,6 @@ def __init__(
             word_length=word_length,
             alphabet_size=alphabet_size,
             norm=norm,
-            learn_alphabet_sizes=learn_alphabet_sizes,
             alphabet_allocation_method=alphabet_allocation_method,
             binning_method=binning_method,
             variance=variance,