From e4d257d582cc670c34085a22562b94fbd5005af1 Mon Sep 17 00:00:00 2001
From: Perro2110 <perrottamarco2011@gmail.com>
Date: Mon, 13 Jan 2025 14:41:55 +0100
Subject: [PATCH 01/44] fix if 1 lmf [TodoReview this]

---
 src/evaluate.jl | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/evaluate.jl b/src/evaluate.jl
index a460498..c84d198 100644
--- a/src/evaluate.jl
+++ b/src/evaluate.jl
@@ -323,7 +323,20 @@ function evaluaterule(
     checkmask, explanations = begin
         if compute_explanations
             # Note: This is kind of quick and dirty.
-            disjs = SoleLogics.disjuncts(SoleLogics.LeftmostDisjunctiveForm(antecedent(rule)))
+
+            #disjs = SoleLogics.disjuncts(SoleLogics.LeftmostDisjunctiveForm(antecedent(rule)))
+            
+            ante = antecedent(rule)
+            if (ante isa SyntaxBranch)
+                # Radice disgiuntiva: trasformiamo in forma “disjunctive” e poi estraiamo i disgiunti
+                dnf = SoleLogics.LeftmostDisjunctiveForm(ante)
+                disjs = SoleLogics.disjuncts(dnf)
+            else
+                # Non è un OR in radice → un singolo disgiunto
+                disjs = [ante] 
+            end
+            
+
             checkmatrix = hcat([check(disj, X; kwargs...) for disj in disjs]...)
             # @show checkmatrix
             checkmask = map(any, eachrow(checkmatrix))

From 67af7723f3ba1be3e180580ab172641d45118697 Mon Sep 17 00:00:00 2001
From: giopaglia <24519853+giopaglia@users.noreply.github.com>
Date: Mon, 20 Jan 2025 18:13:32 +0100
Subject: [PATCH 02/44] Add test, add iscomplete

---
 src/utils/models/ensembles.jl |  3 ++-
 test/runtests.jl              |  2 +-
 test/test_tree.jl             | 30 ++++++++++++++++++++++++++++++
 3 files changed, 33 insertions(+), 2 deletions(-)
 create mode 100644 test/test_tree.jl

diff --git a/src/utils/models/ensembles.jl b/src/utils/models/ensembles.jl
index 7f1d6cb..645dc6f 100644
--- a/src/utils/models/ensembles.jl
+++ b/src/utils/models/ensembles.jl
@@ -95,7 +95,6 @@ struct DecisionEnsemble{O,T<:AbstractModel,A<:Base.Callable,W<:Union{Nothing,Abs
         O = Union{outcometype.(models)...}
         DecisionEnsemble{O}(models, args...; kwargs...)
     end
-
 end
 
 
@@ -105,6 +104,8 @@ modelstype(m::DecisionEnsemble{O,T}) where {O,T} = T
 models(m::DecisionEnsemble) = m.models
 nmodels(m::DecisionEnsemble) = length(models(m))
 
+iscomplete(m::DecisionEnsemble) = any(iscomplete.(models(m)))
+
 aggregation(m::DecisionEnsemble) = m.aggregation
 weights(m::DecisionEnsemble) = m.weights
 # Returns the aggregation function, patched by weights if the model has them.
diff --git a/test/runtests.jl b/test/runtests.jl
index 8d95df5..2e946c0 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -15,7 +15,7 @@ end
 println("Julia version: ", VERSION)
 
 test_suites = [
-    ("Models", ["base.jl", ]),
+    ("Models", ["base.jl", "test_tree.jl"]),
     ("Miscellaneous", ["misc.jl", ]),
     ("Parse", ["parse.jl", ]),
     ("Rules", ["juliacon2024.jl", ]),
diff --git a/test/test_tree.jl b/test/test_tree.jl
new file mode 100644
index 0000000..04910dc
--- /dev/null
+++ b/test/test_tree.jl
@@ -0,0 +1,30 @@
+################################################
+        #              p
+        #      ┌───────┴─────────────┐
+        #      │                     r
+        #      q                 ┌───┴───┐
+        #      │                 s      "yes"
+        #  ┌───┴───┐         ┌───┴───┐
+        # "yes"   "no"      "yes"   "no"
+##################################################
+
+using SoleLogics
+using SoleModels
+
+formula_p = SoleLogics.parseformula("p")
+formula_q = SoleLogics.parseformula("q")
+formula_r = SoleLogics.parsebaseformula("r")
+formula_s = SoleLogics.parsebaseformula("s")
+
+branch_q = Branch(formula_q,("yes","no"),(;))
+branch_s = Branch(formula_s,("yes","no"),(;))
+branch_r = Branch(formula_r,(branch_s,"yes"),(;))
+
+#dt_q = DecisionTree(branch_r,(;))
+
+
+#Possibile path
+path_all = [formula_p,formula_q,formula_s,formula_r,"yes"]
+path_2 = [formula_p,formula_q,"yes"]
+path_1 = [formula_p,"yes"]
+path_0 = ["yes"]

From a287352107a32d5cceb4672a98de99743b5f86f3 Mon Sep 17 00:00:00 2001
From: Perro2110 <perrottamarco2011@gmail.com>
Date: Wed, 19 Feb 2025 17:33:35 +0100
Subject: [PATCH 03/44] refactoring name

---
 src/SoleModels.jl         |  2 +-
 src/rule-extraction.jl    | 16 ++++++++--------
 src/utils/models/other.jl |  2 +-
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/SoleModels.jl b/src/SoleModels.jl
index 6ba88eb..0fbd307 100644
--- a/src/SoleModels.jl
+++ b/src/SoleModels.jl
@@ -93,7 +93,7 @@ export subtreeheight
 include("symbolic-utils.jl")
 
 export PlainRuleExtractor
-export extractrules, listrules, joinrules
+export modalextractrules, listrules, joinrules
 
 include("rule-extraction.jl")
 
diff --git a/src/rule-extraction.jl b/src/rule-extraction.jl
index 50a86c0..d94fb05 100644
--- a/src/rule-extraction.jl
+++ b/src/rule-extraction.jl
@@ -6,7 +6,7 @@ An exact or heuristic logical method for extracting logical rule from symbolic m
 
 Refer to [SolePostHoc](https://github.com/aclai-lab/SolePostHoc.jl) for rule extraction methods.
 
-See also [`extractrules`](@ref), [`Rule`](@ref)], [`issymbolicmodel`](@ref).
+See also [`modalextractrules`](@ref), [`Rule`](@ref)], [`issymbolicmodel`](@ref).
 """
 abstract type RuleExtractor end
 
@@ -16,33 +16,33 @@ Return whether a rule extraction method is known to be exact (as opposed to heur
 isexact(::RuleExtractor) = false
 
 """
-    extractrules(re::RuleExtractor, m, args...; kwargs...)
+    modalextractrules(re::RuleExtractor, m, args...; kwargs...)
 
 Extract rules from symbolic model `m`, using a rule extraction method `re`.
 """
-function extractrules(re::RuleExtractor, m, args...; kwargs...)
-    return error("Please, provide method extractrules(::$(typeof(m)), args...; kwargs...).")
+function modalextractrules(re::RuleExtractor, m, args...; kwargs...)
+    return error("Please, provide method modalextractrules(::$(typeof(m)), args...; kwargs...).")
 end
 
 # Helpers
 function (RE::Type{<:RuleExtractor})(args...; kwargs...)
-    return extractrules(RE(), args...; kwargs...)
+    return modalextractrules(RE(), args...; kwargs...)
 end
 
 # Helpers
 function (re::RuleExtractor)(args...; kwargs...)
-    return extractrules(re, args...; kwargs...)
+    return modalextractrules(re, args...; kwargs...)
 end
 
 """
 Plain extraction method involves listing one rule per each possible symbolic path within the model.
-With this method, [`extractrules`](@ref) redirects to [`listrules`](@ref).
+With this method, [`modalextractrules`](@ref) redirects to [`listrules`](@ref).
 
 See also [`listrules`](@ref), [`Rule`](@ref)], [`issymbolicmodel`](@ref).
 """
 struct PlainRuleExtractor <: RuleExtractor end
 isexact(::PlainRuleExtractor) = true
-function extractrules(::PlainRuleExtractor, m, args...; kwargs...)
+function modalextractrules(::PlainRuleExtractor, m, args...; kwargs...)
     if haslistrules(m)
         listrules(m, args...; kwargs...)
     else
diff --git a/src/utils/models/other.jl b/src/utils/models/other.jl
index 60f79f6..bb9280b 100644
--- a/src/utils/models/other.jl
+++ b/src/utils/models/other.jl
@@ -361,7 +361,7 @@ iscomplete(m::DecisionSet) = m.iscomplete
 isnonoverlapping(m::DecisionSet) = m.isnonoverlapping
 
 function listrules(m::DecisionSet)
-    isnonoverlapping || error("Cannot listrules from non-overlapping decision set. Try `extractrules` with heuristics, instead.")
+    isnonoverlapping || error("Cannot listrules from non-overlapping decision set. Try `modalextractrules` with heuristics, instead.")
     rules(m)
 end
 

From b2312c1d053a8bf3e952d5e9588e8b843b052692 Mon Sep 17 00:00:00 2001
From: Perro2110 <perrottamarco2011@gmail.com>
Date: Tue, 11 Mar 2025 21:38:36 +0100
Subject: [PATCH 04/44] minor fix

---
 src/evaluate.jl | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/src/evaluate.jl b/src/evaluate.jl
index c84d198..4401ce3 100644
--- a/src/evaluate.jl
+++ b/src/evaluate.jl
@@ -350,11 +350,24 @@ function evaluaterule(
     end
     pos_checkmask = checkmask[classmask]
     neg_checkmask = checkmask[(!).(classmask)]
+    
+    #=
+    println("pos_checkmask: ",pos_checkmask)
+    println("neg_checkmask: ",neg_checkmask)
+
+    println("lgh pos_checkmask ",length(pos_checkmask))
+    println("lgh neg_checkmask ",length(neg_checkmask))
+    =#
+
+    # Controlli per array vuoti
+    sensitivity = length(pos_checkmask) > 0 ? sum(pos_checkmask)/length(pos_checkmask) : 0.0
+    specificity = length(neg_checkmask) > 0 ? 1-(sum(neg_checkmask)/length(neg_checkmask)) : 1.0
+
     out = (;
         classmask = classmask,
         checkmask = checkmask,
-        sensitivity = sum(pos_checkmask)/length(pos_checkmask),
-        specificity = 1-(sum(neg_checkmask)/length(neg_checkmask)),
+        sensitivity = sensitivity,
+        specificity = specificity,
         explanations = explanations,
     )
     return out

From 4bc2854394c93f9f67f1c8c7f2acdc7300712459 Mon Sep 17 00:00:00 2001
From: Perro2110 <perrottamarco2011@gmail.com>
Date: Sat, 29 Mar 2025 10:21:56 +0100
Subject: [PATCH 05/44] update image_family

---
 .cirrus.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.cirrus.yml b/.cirrus.yml
index 0a3671d..013f726 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -1,5 +1,5 @@
 freebsd_instance:
-  image_family: freebsd-14-0
+  image_family: freebsd-14-2
 task:
   name: FreeBSD
   artifacts_cache:

From afd11612e7e992aca296af414bb0d76bbd6c6c29 Mon Sep 17 00:00:00 2001
From: PasoStudio73 <paso.studio73@gmail.com>
Date: Sat, 29 Mar 2025 21:54:24 +0100
Subject: [PATCH 06/44] decisiontree and xgboost ext working

---
 ext/DecisionTreeExt.jl |  11 ++-
 ext/XGBoostExt.jl      | 157 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 166 insertions(+), 2 deletions(-)

diff --git a/ext/DecisionTreeExt.jl b/ext/DecisionTreeExt.jl
index de67fdb..1755fb5 100644
--- a/ext/DecisionTreeExt.jl
+++ b/ext/DecisionTreeExt.jl
@@ -9,7 +9,7 @@ import DecisionTree as DT
 function get_condition(featid, featval, featurenames)
     test_operator = (<)
     # @show fieldnames(typeof(tree))
-    feature = !isnothing(featurenames) ? VariableValue(featurenames[featid]) : VariableValue(featid)
+    feature = isnothing(featurenames) ? VariableValue(featid) : VariableValue(featid, featurenames[featid])
     return ScalarCondition(feature, test_operator, featval)
 end
 
@@ -106,9 +106,16 @@ function SoleModels.solemodel(
     return m
 end
 
-function SoleModels.solemodel(tree::DT.InfoNode; keep_condensed = false, featurenames = true, classlabels = tree.info.classlabels, kwargs...)
+function SoleModels.solemodel(
+    tree::DT.InfoNode{T,orig_O};
+    keep_condensed=false,
+    featurenames=true,
+    # classlabels=tree.info.classlabels,
+    kwargs...
+) where {T,orig_O}
     # @show fieldnames(typeof(tree))
     featurenames = featurenames == true ? tree.info.featurenames : featurenames
+    classlabels = haskey(tree.info, :classlabels) ? tree.info.classlabels : nothing
     
     root, info = begin
         if keep_condensed
diff --git a/ext/XGBoostExt.jl b/ext/XGBoostExt.jl
index ad346ae..7925a6b 100644
--- a/ext/XGBoostExt.jl
+++ b/ext/XGBoostExt.jl
@@ -89,5 +89,162 @@ end
 #     return Branch(antecedent, left_tree, right_tree, info)
 # end
 
+function get_condition(featidstr, featval, featurenames; test_operator)
+    featid = parse(Int, featidstr[2:end]) + 1 # considering 0-based indexing in XGBoost feature ids
+    feature = isnothing(featurenames) ? VariableValue(featid) : VariableValue(featid, featurenames[featid])
+    return ScalarCondition(feature, test_operator, featval)
+end
+
+function satisfies_conditions(row, formula)
+    # check_cond = true
+    # for atom in formula
+    #     if !atom.value.metacond.test_operator(row[atom.value.metacond.feature.i_variable], atom.value.threshold)
+    #         check_cond = false
+    #     end
+    # end
+    # return check_cond
+
+    all(atom -> atom.value.metacond.test_operator(
+                    row[atom.value.metacond.feature.i_variable],
+                    atom.value.threshold), formula
+                )
+end
+
+function bitmap_check_conditions(X, formula)
+    BitVector([satisfies_conditions(row, formula) for row in eachrow(X)])
+end
+
+function SoleModels.solemodel(
+    model::Vector{<:XGBoost.Node},
+    # args...;
+    X::AbstractMatrix,
+    y::AbstractVector;
+    weights::Union{AbstractVector{<:Number}, Nothing}=nothing,
+    classlabels = nothing,
+    featurenames = nothing,
+    keep_condensed = false,
+    kwargs...
+)
+    # TODO
+    if keep_condensed && !isnothing(classlabels)
+        # info = (;
+        #     apply_preprocess=(y -> orig_O(findfirst(x -> x == y, classlabels))),
+        #     apply_postprocess=(y -> classlabels[y]),
+        # )
+        info = (;
+            apply_preprocess=(y -> findfirst(x -> x == y, classlabels)),
+            apply_postprocess=(y -> classlabels[y]),
+        )
+        keep_condensed = !keep_condensed
+        # O = eltype(classlabels)
+    else
+        info = (;)
+        # O = orig_O
+    end
+    
+    trees = map(t -> begin
+        # isnothing(t.split) ?
+        # xgbleaf(t, Formula[], X, y; classlabels, featurenames) :
+        SoleModels.solemodel(t, X, y; classlabels, featurenames, keep_condensed, kwargs...)
+    end, model)
+
+    if !isnothing(featurenames)
+        info = merge(info, (; featurenames=featurenames, ))
+    end
+
+    info = merge(info, (;
+            leaf_values=vcat([t.info[:leaf_values] for t in trees]...),
+            supporting_predictions=vcat([t.info[:supporting_predictions] for t in trees]...),
+            supporting_labels=vcat([t.info[:supporting_labels] for t in trees]...),
+        )
+    )
+
+    return isnothing(weights) ?
+        DecisionEnsemble(trees, info) :
+        DecisionEnsemble(trees, weights, info)
+end
+
+"""
+    solemodel(tree::XGBoost.Node; fl=Formula[], fr=Formula[], classlabels=nothing, featurenames=nothing, keep_condensed=false)
+
+Traverses a learned XGBoost tree, collecting the path conditions for each branch. 
+Left paths (<) store conditions in `fl`, right paths (≥) store conditions in `fr`. 
+When reaching a leaf, calls `xgbleaf` with the path's collected conditions.
+"""
+function SoleModels.solemodel(
+    tree::XGBoost.Node,
+    X::AbstractMatrix,
+    y::AbstractVector;
+    path_conditions = Formula[],
+    classlabels=nothing,
+    featurenames=nothing,
+    keep_condensed=false
+)
+    keep_condensed && error("Cannot keep condensed XGBoost.Node.")
+
+    # xgboost trees could be composed of only one leaf, without any split
+    # isnothing(tree.split) && return nothing
+    isnothing(tree.split) && return xgbleaf(tree, Formula[], X, y; classlabels, featurenames)
+
+    antecedent = Atom(get_condition(tree.split, tree.split_condition, featurenames; test_operator=(<)))
+    
+    # Create a new path for the left branch
+    left_path = copy(path_conditions)
+    push!(left_path, Atom(get_condition(tree.split, tree.split_condition, featurenames; test_operator=(<))))
+    
+    # Create a new path for the right branch
+    right_path = copy(path_conditions)
+    push!(right_path, Atom(get_condition(tree.split, tree.split_condition, featurenames; test_operator=(≥))))
+    
+    lefttree = if isnothing(tree.children[1].split)
+        # @show SoleModels.join_antecedents(left_path)
+        xgbleaf(tree.children[1], left_path, X, y; classlabels, featurenames)
+    else
+        SoleModels.solemodel(tree.children[1], X, y; path_conditions=left_path, classlabels=classlabels, featurenames=featurenames)
+    end
+    isnothing(lefttree) && return Nothing
+    
+    righttree = if isnothing(tree.children[2].split)
+        # @show SoleModels.join_antecedents(right_path)
+        xgbleaf(tree.children[2], right_path, X, y; classlabels, featurenames)
+    else
+        SoleModels.solemodel(tree.children[2], X, y; path_conditions=right_path, classlabels=classlabels, featurenames=featurenames)
+    end
+    isnothing(righttree) && return Nothing
+
+    info = (;
+        leaf_values = [lefttree.info[:leaf_values]..., righttree.info[:leaf_values]...],
+        supporting_predictions = [lefttree.info[:supporting_predictions]..., righttree.info[:supporting_predictions]...],
+        supporting_labels = [lefttree.info[:supporting_labels]..., righttree.info[:supporting_labels]...],
+    )
+    return Branch(antecedent, lefttree, righttree, info)
+end
+
+function xgbleaf(
+    leaf::XGBoost.Node,
+    formula::Vector{<:Formula},
+    X::AbstractMatrix,
+    y::AbstractVector;
+    classlabels=nothing,
+    featurenames=nothing,
+    keep_condensed=false
+)
+    keep_condensed && error("Cannot keep condensed XGBoost.Node.")
+
+    bitX = bitmap_check_conditions(X, formula)
+    push!(bitX, 0)
+
+    labels = unique(y)
+    prediction = SoleModels.bestguess(y[bitX]; suppress_parity_warning=true)
+
+    isnothing(prediction) && (prediction = labels[findfirst(x -> x == "nothing", labels)])
+
+    info = (;
+    leaf_values = leaf.leaf,
+    supporting_predictions = fill(prediction, length(labels)),
+    supporting_labels = labels,
+)
+    return SoleModels.ConstantModel(prediction, info)
+end
 
 end

From 183de681bad596831350f0c10d54b419236ad62a Mon Sep 17 00:00:00 2001
From: Perro2110 <perrottamarco2011@gmail.com>
Date: Tue, 1 Apr 2025 21:29:53 +0200
Subject: [PATCH 07/44] minor fix in Project.toml for compatibility SoleBase =
 "0.11 - 0.13"

---
 Project.toml | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/Project.toml b/Project.toml
index 23b221e..e589a0b 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,12 @@
 name = "SoleModels"
 uuid = "4249d9c7-3290-4ddd-961c-e1d3ec2467f8"
 license = "MIT"
-authors = ["Michele GHIOTTI", "Giovanni PAGLIARINI", "Edoardo PONSANESI", "Eduard I. STAN"]
+authors = [
+    "Michele GHIOTTI",
+    "Giovanni PAGLIARINI",
+    "Edoardo PONSANESI",
+    "Eduard I. STAN",
+]
 version = "0.10.0"
 
 [deps]
@@ -61,9 +66,9 @@ ProgressMeter = "1"
 Random = "1"
 Reexport = "1"
 Revise = "3"
-SoleBase = "0.13"
+SoleBase = "0.11 - 0.13"
 SoleData = "0.15, 0.16"
-SoleLogics = "0.11 - 0.12"
+SoleLogics = "0.11 - 0.13"
 StatsBase = "0.30 - 0.34"
 Suppressor = "0.2"
 Tables = "1"
@@ -89,4 +94,19 @@ SoleData = "123f1ae1-6307-4526-ab5b-aab3a92a2b8c"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Test", "DataFrames", "Random", "MLJ", "MLJXGBoostInterface", "MultiData", "Markdown", "InteractiveUtils", "BenchmarkTools", "MLJBase", "XGBoost", "DecisionTree", "MLJDecisionTreeInterface", "SoleData"]
+test = [
+    "Test",
+    "DataFrames",
+    "Random",
+    "MLJ",
+    "MLJXGBoostInterface",
+    "MultiData",
+    "Markdown",
+    "InteractiveUtils",
+    "BenchmarkTools",
+    "MLJBase",
+    "XGBoost",
+    "DecisionTree",
+    "MLJDecisionTreeInterface",
+    "SoleData",
+]

From 8ee5cf383c5443e1dae38b7d0bb1c121a9709e4e Mon Sep 17 00:00:00 2001
From: giopaglia <24519853+giopaglia@users.noreply.github.com>
Date: Wed, 2 Apr 2025 18:02:26 +0200
Subject: [PATCH 08/44] Fix

---
 test/misc.jl      | 1 -
 test/test_tree.jl | 4 ++--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/test/misc.jl b/test/misc.jl
index cbafaf2..84245ac 100644
--- a/test/misc.jl
+++ b/test/misc.jl
@@ -11,7 +11,6 @@ using SoleModels: listrules, displaymodel, submodels
 
 io = IOBuffer()
 
-# parse_other_kind_of_formula = SoleLogics.parsebaseformula
 parse_other_kind_of_formula = SoleLogics.parseformula
 
 ################################### LeafModel #############################################
diff --git a/test/test_tree.jl b/test/test_tree.jl
index 04910dc..af4cb1b 100644
--- a/test/test_tree.jl
+++ b/test/test_tree.jl
@@ -13,8 +13,8 @@ using SoleModels
 
 formula_p = SoleLogics.parseformula("p")
 formula_q = SoleLogics.parseformula("q")
-formula_r = SoleLogics.parsebaseformula("r")
-formula_s = SoleLogics.parsebaseformula("s")
+formula_r = SoleLogics.parseformula("r")
+formula_s = SoleLogics.parseformula("s")
 
 branch_q = Branch(formula_q,("yes","no"),(;))
 branch_s = Branch(formula_s,("yes","no"),(;))

From d870851a8de02865e11dfa0560741c3ab0121fb3 Mon Sep 17 00:00:00 2001
From: PasoStudio73 <paso.studio73@gmail.com>
Date: Wed, 2 Apr 2025 21:35:15 +0200
Subject: [PATCH 09/44] debug xgboost, not yet ready

---
 test/DecisionTreeExt/forest.jl |  9 ++--
 test/DecisionTreeExt/tree.jl   |  7 ++-
 test/XgBoostExt/xgboost.jl     | 82 ++++++++++++++++++++++++++++++++++
 3 files changed, 93 insertions(+), 5 deletions(-)
 create mode 100644 test/XgBoostExt/xgboost.jl

diff --git a/test/DecisionTreeExt/forest.jl b/test/DecisionTreeExt/forest.jl
index 4e0d1b5..f82afd2 100644
--- a/test/DecisionTreeExt/forest.jl
+++ b/test/DecisionTreeExt/forest.jl
@@ -6,6 +6,7 @@ using DataFrames
 
 using MLJDecisionTreeInterface
 using SoleModels
+using Random
 
 import DecisionTree as DT
 
@@ -13,8 +14,9 @@ X, y = @load_iris
 X = DataFrame(X)
 
 train_ratio = 0.8
+rng = Xoshiro(11)
 
-train, test = partition(eachindex(y), train_ratio, shuffle=true)
+train, test = partition(eachindex(y), train_ratio; shuffle=true, rng)
 X_train, y_train = X[train, :], y[train]
 X_test, y_test = X[test, :], y[test]
 
@@ -52,10 +54,11 @@ preds = apply(solem, X_test)
 preds2 = apply!(solem, X_test, y_test)
 
 @test preds == preds2
-@test sum(preds .== y_test)/length(y_test) >= 0.8
+accuracy = sum(preds .== y_test)/length(y_test)
+@test accuracy >= 0.8
 
 # apply!(solem, X_test, y_test, mode = :append)
 
 printmodel(solem; max_depth = 7, show_intermediate_finals = true, show_metrics = true)
 
-@test_broken printmodel.(listrules(solem, min_lift = 1.0, min_ninstances = 0); show_metrics = true);
+# @test_broken printmodel.(listrules(solem, min_lift = 1.0, min_ninstances = 0); show_metrics = true);
diff --git a/test/DecisionTreeExt/tree.jl b/test/DecisionTreeExt/tree.jl
index 6936d00..a0c349a 100644
--- a/test/DecisionTreeExt/tree.jl
+++ b/test/DecisionTreeExt/tree.jl
@@ -6,6 +6,7 @@ using DataFrames
 
 using MLJDecisionTreeInterface
 using SoleModels
+using Random
 
 import DecisionTree as DT
 
@@ -13,8 +14,9 @@ X, y = @load_iris
 X = DataFrame(X)
 
 train_ratio = 0.8
+rng = Xoshiro(11)
 
-train, test = partition(eachindex(y), train_ratio, shuffle=true)
+train, test = partition(eachindex(y), train_ratio; shuffle=true, rng)
 X_train, y_train = X[train, :], y[train]
 X_test, y_test = X[test, :], y[test]
 
@@ -47,7 +49,8 @@ preds = apply(solem, X_test)
 preds2 = apply!(solem, X_test, y_test)
 
 @test preds == preds2
-@test sum(preds .== y_test)/length(y_test) > 0.7
+accuracy = sum(preds .== y_test)/length(y_test)
+@test accuracy > 0.7
 
 # apply!(solem, X_test, y_test, mode = :append)
 
diff --git a/test/XgBoostExt/xgboost.jl b/test/XgBoostExt/xgboost.jl
new file mode 100644
index 0000000..b8d9a24
--- /dev/null
+++ b/test/XgBoostExt/xgboost.jl
@@ -0,0 +1,82 @@
+using Test
+
+using MLJ
+using MLJBase
+using DataFrames
+
+using MLJXGBoostInterface
+using SoleModels
+
+import XGBoost as XGB
+
+using Random, CategoricalArrays
+
+X, y = @load_iris
+X = DataFrame(X)
+
+train_ratio = 0.8
+rng = Xoshiro(11)
+
+train, test = partition(eachindex(y), train_ratio; shuffle=true, rng)
+X_train, y_train = X[train, :], y[train]
+X_test, y_test = X[test, :], y[test]
+
+println("Training set size: ", size(X_train), " - ", size(y_train))
+println("Test set size: ", size(X_test), " - ", size(y_test))
+println("Training set type: ", typeof(X_train), " - ", typeof(y_train))
+println("Test set type: ", typeof(X_test), " - ", typeof(y_test))
+
+XGTrees = MLJ.@load XGBoostClassifier pkg=XGBoost
+
+model = XGTrees(;
+    num_round=10,
+    max_depth=6,
+    objective="multi:softmax"
+)
+
+# Bind the model and data into a machine
+mach = machine(model, X_train, y_train)
+# Fit the model
+fit!(mach)
+
+trees = XGB.trees(mach.fitresult[1])
+
+featurenames = mach.report.vals[1][1]
+ds_safetest = vcat(y, "nothing")
+
+
+solem = solemodel(trees, Matrix(X), ds_safetest)
+solem = solemodel(trees, Matrix(X), ds_safetest; featurenames)
+solem = solemodel(trees, Matrix(X), ds_safetest; featurenames, keep_condensed = false)
+
+@test SoleData.scalarlogiset(X_test; allow_propositional = true) isa PropositionalLogiset
+
+# Make test instances flow into the model
+preds = apply(solem, X_test)
+preds2 = apply!(solem, X_test, y_test)
+
+@test preds == preds2
+accuracy = sum(preds .== y_test)/length(y_test)
+@test accuracy > 0.7
+
+# apply!(solem, X_test, y_test, mode = :append)
+
+solem = @test_throws ErrorException solemodel(trees, Matrix(X), ds_safetest; featurenames, keep_condensed = true)
+solem = @test_nowarn solemodel(trees, Matrix(X), ds_safetest; featurenames, keep_condensed = false)
+
+printmodel(solem; max_depth = 7, show_intermediate_finals = true, show_metrics = true)
+
+# comparision with XGBoost.jl
+
+yl_train = CategoricalArrays.levelcode.(categorical(y_train)) .- 1
+# create and train a gradient boosted tree model of 5 trees
+bst = XGB.xgboost(
+    (X_train, yl_train),
+    num_round=10,
+    num_class=3,
+    max_depth=6,
+    objective="multi:softmax"
+)
+# obtain model predictions
+ŷ = XGB.predict(bst, X_test)
+

From 5de4302bb82c6a0f1d8cb2f83812d5287f513a77 Mon Sep 17 00:00:00 2001
From: PasoStudio73 <paso.studio73@gmail.com>
Date: Thu, 10 Apr 2025 00:29:09 +0200
Subject: [PATCH 10/44] Refining XGBoost

---
 ext/XGBoostExt.jl                   | 157 ++++++------
 src/SoleModels.jl                   |   2 +
 src/print.jl                        |   2 +-
 src/utils/models/ensembles.jl       | 354 +++++++++++++++++++---------
 src/utils/models/leaf.jl            |  37 +++
 src/utils/models/rule-and-branch.jl |  29 +++
 test/XgBoostExt/xgboost.jl          |  68 +++++-
 7 files changed, 448 insertions(+), 201 deletions(-)

diff --git a/ext/XGBoostExt.jl b/ext/XGBoostExt.jl
index 7925a6b..78d7031 100644
--- a/ext/XGBoostExt.jl
+++ b/ext/XGBoostExt.jl
@@ -3,6 +3,8 @@ module XGBoostExt
 using SoleModels
 using XGBoost
 
+using CategoricalArrays
+
 import SoleModels: alphabet, solemodel
 
 function alphabet(model::XGBoost.Booster; kwargs...)
@@ -31,8 +33,8 @@ function alphabet(model::XGBoost.Booster; kwargs...)
     _alphabet!(Atom{ScalarCondition}[], model; kwargs...)
 end
 
-
 # TODO fix and test. Problem: where are the tree weights? How do I write this in the multi-class case?
+# leaf values are actually the weight of the tree
 
 # # Convert an XGBoost.Booster to a Sole Ensemble
 # function solemodel(model::XGBoost.Booster; with_stats::Bool = true, kwargs...)
@@ -95,15 +97,12 @@ function get_condition(featidstr, featval, featurenames; test_operator)
     return ScalarCondition(feature, test_operator, featval)
 end
 
-function satisfies_conditions(row, formula)
-    # check_cond = true
-    # for atom in formula
-    #     if !atom.value.metacond.test_operator(row[atom.value.metacond.feature.i_variable], atom.value.threshold)
-    #         check_cond = false
-    #     end
-    # end
-    # return check_cond
+function get_condition(class_idx, featurenames; test_operator, featval)
+    feature = isnothing(featurenames) ? VariableValue(class_idx) : VariableValue(class_idx, featurenames[class_idx])
+    return ScalarCondition(feature, test_operator, featval)
+end
 
+function satisfies_conditions(row, formula)
     all(atom -> atom.value.metacond.test_operator(
                     row[atom.value.metacond.feature.i_variable],
                     atom.value.threshold), formula
@@ -114,54 +113,59 @@ function bitmap_check_conditions(X, formula)
     BitVector([satisfies_conditions(row, formula) for row in eachrow(X)])
 end
 
+function early_return(leaf, antecedent, clabel, classl)
+    info =(;
+    leaf_values = leaf,
+    supporting_predictions = clabel,
+    supporting_labels = [classl],
+    )
+
+    return Branch(
+            antecedent,
+            SoleModels.ConstantModel(first(clabel), info),
+            SoleModels.ConstantModel(first(clabel), info),
+            info
+        )
+end
+
+# ---------------------------------------------------------------------------- #
+#                          DecisionXGBoost solemodel                           #
+# ---------------------------------------------------------------------------- #
 function SoleModels.solemodel(
     model::Vector{<:XGBoost.Node},
-    # args...;
     X::AbstractMatrix,
     y::AbstractVector;
-    weights::Union{AbstractVector{<:Number}, Nothing}=nothing,
-    classlabels = nothing,
-    featurenames = nothing,
-    keep_condensed = false,
+    classlabels,
+    featurenames=nothing,
+    keep_condensed=false,
     kwargs...
 )
-    # TODO
-    if keep_condensed && !isnothing(classlabels)
-        # info = (;
-        #     apply_preprocess=(y -> orig_O(findfirst(x -> x == y, classlabels))),
-        #     apply_postprocess=(y -> classlabels[y]),
-        # )
-        info = (;
-            apply_preprocess=(y -> findfirst(x -> x == y, classlabels)),
-            apply_postprocess=(y -> classlabels[y]),
-        )
-        keep_condensed = !keep_condensed
-        # O = eltype(classlabels)
-    else
-        info = (;)
-        # O = orig_O
-    end
-    
-    trees = map(t -> begin
-        # isnothing(t.split) ?
-        # xgbleaf(t, Formula[], X, y; classlabels, featurenames) :
-        SoleModels.solemodel(t, X, y; classlabels, featurenames, keep_condensed, kwargs...)
-    end, model)
-
-    if !isnothing(featurenames)
-        info = merge(info, (; featurenames=featurenames, ))
+    keep_condensed && error("Cannot keep condensed XGBoost.Node.")
+
+    nclasses = length(classlabels)
+
+    trees = map(enumerate(model)) do (i, t)
+        class_idx = (i - 1) % nclasses + 1
+        clabels = categorical([classlabels[class_idx]])
+        # xgboost trees could be composed of only one leaf, without any split
+        if isnothing(t.split)
+            antecedent = Atom(get_condition(class_idx, featurenames; test_operator=(<), featval=Inf))
+            early_return(t.leaf, antecedent, clabels, classlabels[class_idx])
+        else
+            SoleModels.solemodel(t, X, y; classlabels, featurenames, class_idx, clabels, kwargs...)
+        end
     end
 
-    info = merge(info, (;
-            leaf_values=vcat([t.info[:leaf_values] for t in trees]...),
-            supporting_predictions=vcat([t.info[:supporting_predictions] for t in trees]...),
-            supporting_labels=vcat([t.info[:supporting_labels] for t in trees]...),
+    info = merge(
+        isnothing(featurenames) ? (;) : (;featurenames=featurenames),
+        (;
+            leaf_values = reduce(vcat, getindex.(getproperty.(trees, :info), :leaf_values)),
+            supporting_predictions = reduce(vcat, getindex.(getproperty.(trees, :info), :supporting_predictions)),
+            supporting_labels = reduce(vcat, getindex.(getproperty.(trees, :info), :supporting_labels))
         )
     )
 
-    return isnothing(weights) ?
-        DecisionEnsemble(trees, info) :
-        DecisionEnsemble(trees, weights, info)
+    return DecisionXGBoost(trees, info)
 end
 
 """
@@ -175,42 +179,43 @@ function SoleModels.solemodel(
     tree::XGBoost.Node,
     X::AbstractMatrix,
     y::AbstractVector;
-    path_conditions = Formula[],
-    classlabels=nothing,
+    classlabels,
+    path_conditions=Formula[],
     featurenames=nothing,
-    keep_condensed=false
+    class_idx,
+    clabels
 )
-    keep_condensed && error("Cannot keep condensed XGBoost.Node.")
-
-    # xgboost trees could be composed of only one leaf, without any split
-    # isnothing(tree.split) && return nothing
-    isnothing(tree.split) && return xgbleaf(tree, Formula[], X, y; classlabels, featurenames)
-
     antecedent = Atom(get_condition(tree.split, tree.split_condition, featurenames; test_operator=(<)))
-    
-    # Create a new path for the left branch
+
+    # create a new path for the left branch
     left_path = copy(path_conditions)
     push!(left_path, Atom(get_condition(tree.split, tree.split_condition, featurenames; test_operator=(<))))
     
-    # Create a new path for the right branch
+    # create a new path for the right branch
     right_path = copy(path_conditions)
     push!(right_path, Atom(get_condition(tree.split, tree.split_condition, featurenames; test_operator=(≥))))
     
     lefttree = if isnothing(tree.children[1].split)
         # @show SoleModels.join_antecedents(left_path)
-        xgbleaf(tree.children[1], left_path, X, y; classlabels, featurenames)
+        xgbleaf(tree.children[1], left_path, X, y)
     else
-        SoleModels.solemodel(tree.children[1], X, y; path_conditions=left_path, classlabels=classlabels, featurenames=featurenames)
+        SoleModels.solemodel(tree.children[1], X, y; path_conditions=left_path, classlabels, class_idx, clabels,featurenames)
     end
-    isnothing(lefttree) && return Nothing
-    
+    isnothing(lefttree) && 
+    begin 
+        return early_return(tree.children[1].leaf, antecedent, clabels, classlabels[class_idx])
+    end
+
     righttree = if isnothing(tree.children[2].split)
         # @show SoleModels.join_antecedents(right_path)
-        xgbleaf(tree.children[2], right_path, X, y; classlabels, featurenames)
+        xgbleaf(tree.children[2], right_path, X, y)
     else
-        SoleModels.solemodel(tree.children[2], X, y; path_conditions=right_path, classlabels=classlabels, featurenames=featurenames)
+        SoleModels.solemodel(tree.children[2], X, y; path_conditions=right_path, classlabels, class_idx, clabels, featurenames)
+    end
+    isnothing(righttree) && 
+    begin
+        return early_return(tree.children[2].leaf, antecedent, clabels, classlabels[class_idx])
     end
-    isnothing(righttree) && return Nothing
 
     info = (;
         leaf_values = [lefttree.info[:leaf_values]..., righttree.info[:leaf_values]...],
@@ -224,26 +229,20 @@ function xgbleaf(
     leaf::XGBoost.Node,
     formula::Vector{<:Formula},
     X::AbstractMatrix,
-    y::AbstractVector;
-    classlabels=nothing,
-    featurenames=nothing,
-    keep_condensed=false
+    y::AbstractVector
 )
-    keep_condensed && error("Cannot keep condensed XGBoost.Node.")
-
     bitX = bitmap_check_conditions(X, formula)
-    push!(bitX, 0)
-
-    labels = unique(y)
     prediction = SoleModels.bestguess(y[bitX]; suppress_parity_warning=true)
+    labels = unique(y)
 
-    isnothing(prediction) && (prediction = labels[findfirst(x -> x == "nothing", labels)])
+    isnothing(prediction) && return nothing
 
     info = (;
-    leaf_values = leaf.leaf,
-    supporting_predictions = fill(prediction, length(labels)),
-    supporting_labels = labels,
-)
+        leaf_values = leaf.leaf,
+        supporting_predictions = fill(prediction, length(labels)),
+        supporting_labels = labels,
+    )
+
     return SoleModels.ConstantModel(prediction, info)
 end
 
diff --git a/src/SoleModels.jl b/src/SoleModels.jl
index 6ba88eb..a3ae567 100644
--- a/src/SoleModels.jl
+++ b/src/SoleModels.jl
@@ -62,6 +62,8 @@ export DecisionEnsemble, models
 export DecisionForest, trees
 export DecisionSet, rules, nrules
 
+export DecisionXGBoost
+
 export MixedModel
 
 export haslistrules, solemodel
diff --git a/src/print.jl b/src/print.jl
index b111210..efc4e13 100644
--- a/src/print.jl
+++ b/src/print.jl
@@ -521,7 +521,7 @@ end
 
 function printmodel(
     io::IO,
-    m::DecisionEnsemble;
+    m::Union{DecisionEnsemble, DecisionXGBoost};
     header = DEFAULT_HEADER,
     indentation_str = "",
     indentation = default_indentation,
diff --git a/src/utils/models/ensembles.jl b/src/utils/models/ensembles.jl
index 7f1d6cb..6738264 100644
--- a/src/utils/models/ensembles.jl
+++ b/src/utils/models/ensembles.jl
@@ -252,123 +252,119 @@ function ntrees(m::DecisionForest)
     length(trees(m))
 end
 
-
-
-
-"""
-A `MaxDecisionBag` is an ensemble of models, weighted by a set of other models.
-In this simplified implementation, only the model with the highest (`max`) weight is responsible for the outcome.
-
-See also [`DecisionForest`](@ref), [`DecisionTree`](@ref), [`DecisionEnsemble`](@ref), [`MaxDecisionBag`](@ref).
-"""
-struct MaxDecisionBag{O,TO<:AbstractModel,TU<:AbstractModel
-    # ,A<:Base.Callable
-    # ,W<:Union{Nothing,AbstractVector}
-    } <: AbstractDecisionEnsemble{O}
-    output_producing_models::Vector{TO}
-    weight_producing_models::Vector{TU}
-    # aggregation::A
-    # weights::W
-    info::NamedTuple
-
-    function MaxDecisionBag{O}(
-        output_producing_models::Vector,
-        weight_producing_models::Vector,
-        # aggregation::Union{Nothing,Base.Callable},
-        # weights::Union{Nothing,AbstractVector},
-        info::NamedTuple = (;);
-        suppress_parity_warning = nothing,
-    ) where {O}
-        @assert length(output_producing_models) > 0 "Cannot instantiate empty bagoutput-producing models!"
-        @assert length(weight_producing_models) > 0 "Cannot instantiate empty bagweight-producing models!"
-        @assert length(output_producing_models) == length(weight_producing_models) "Cannot instantiate bag with different numbers of output and weight producing models: $(length(output_producing_models)) != $(length(weight_producing_models))."
-        output_producing_models = wrap.(output_producing_models)
-        weight_producing_models = wrap.(weight_producing_models)
-        # if isnothing(aggregation)
-        #     # if a suppress_parity_warning parameter is provided, then the aggregation's suppress_parity_warning defaults to it;
-        #     #  otherwise, it defaults to bestguess's suppress_parity_warning
-        #     if isnothing(suppress_parity_warning)
-        #         aggregation = function (args...; kwargs...) bestguess(args...; kwargs...) end
-        #     else
-        #         aggregation = function (args...; suppress_parity_warning = suppress_parity_warning, kwargs...) bestguess(args...; suppress_parity_warning, kwargs...) end
-        #     end
-        # else
-        #     isnothing(suppress_parity_warning) || @warn "Unexpected value for suppress_parity_warning: $(suppress_parity_warning)."
-        # end
-        TO = typeof(output_producing_models)
-        TU = typeof(weight_producing_models)
-        # W = typeof(weights)
-        # A = typeof(aggregation)
-        new{O,TO,TU}(output_producing_models, weight_producing_models, aggregation, info) # , weights
-    end
+# """
+# A `MaxDecisionBag` is an ensemble of models, weighted by a set of other models.
+
+# See also [`DecisionForest`](@ref), [`DecisionTree`](@ref), [`DecisionEnsemble`](@ref), [`MaxDecisionBag`](@ref).
+# """
+# struct DecisionBag{O,TO<:AbstractModel,TU<:AbstractModel
+#     # ,A<:Base.Callable
+#     # ,W<:Union{Nothing,AbstractVector}
+#     } <: AbstractDecisionEnsemble{O}
+#     output_producing_models::Vector{TO}
+#     weight_producing_models::Vector{TU}
+#     # aggregation::A
+#     # weights::W
+#     info::NamedTuple
+
+#     function DecisionBag{O}(
+#         output_producing_models::Vector,
+#         weight_producing_models::Vector,
+#         # aggregation::Union{Nothing,Base.Callable},
+#         # weights::Union{Nothing,AbstractVector},
+#         info::NamedTuple = (;);
+#         suppress_parity_warning = nothing,
+#     ) where {O}
+#         @assert length(output_producing_models) > 0 "Cannot instantiate empty bagoutput-producing models!"
+#         @assert length(weight_producing_models) > 0 "Cannot instantiate empty bagweight-producing models!"
+#         @assert length(output_producing_models) == length(weight_producing_models) "Cannot instantiate bag with different numbers of output and weight producing models: $(length(output_producing_models)) != $(length(weight_producing_models))."
+#         output_producing_models = wrap.(output_producing_models)
+#         weight_producing_models = wrap.(weight_producing_models)
+#         # if isnothing(aggregation)
+#         #     # if a suppress_parity_warning parameter is provided, then the aggregation's suppress_parity_warning defaults to it;
+#         #     #  otherwise, it defaults to bestguess's suppress_parity_warning
+#         #     if isnothing(suppress_parity_warning)
+#         #         aggregation = function (args...; kwargs...) bestguess(args...; kwargs...) end
+#         #     else
+#         #         aggregation = function (args...; suppress_parity_warning = suppress_parity_warning, kwargs...) bestguess(args...; suppress_parity_warning, kwargs...) end
+#         #     end
+#         # else
+#         #     isnothing(suppress_parity_warning) || @warn "Unexpected value for suppress_parity_warning: $(suppress_parity_warning)."
+#         # end
+#         TO = typeof(output_producing_models)
+#         TU = typeof(weight_producing_models)
+#         # W = typeof(weights)
+#         # A = typeof(aggregation)
+#         new{O,TO,TU}(output_producing_models, weight_producing_models, aggregation, info) # , weights
+#     end
     
-    function MaxDecisionBag(
-        output_producing_models::Vector,
-        weight_producing_models::Vector,
-        args...; kwargs...
-    )
-        @assert length(output_producing_models) > 0 "Cannot instantiate empty bagoutput-producing models!"
-        @assert length(weight_producing_models) > 0 "Cannot instantiate empty bagweight-producing models!"
-        @assert length(output_producing_models) == length(weight_producing_models) "Cannot instantiate bag with different numbers of output and weight producing models: $(length(output_producing_models)) != $(length(weight_producing_models))."
-        output_producing_models = wrap.(output_producing_models)
-        weight_producing_models = wrap.(weight_producing_models)
-        O = Union{outcometype.(output_producing_models)...}
-        MaxDecisionBag{O}(output_producing_models, weight_producing_models, args...; kwargs...)
-    end
-end
+#     function MaxDecisionBag(
+#         output_producing_models::Vector,
+#         weight_producing_models::Vector,
+#         args...; kwargs...
+#     )
+#         @assert length(output_producing_models) > 0 "Cannot instantiate empty bagoutput-producing models!"
+#         @assert length(weight_producing_models) > 0 "Cannot instantiate empty bagweight-producing models!"
+#         @assert length(output_producing_models) == length(weight_producing_models) "Cannot instantiate bag with different numbers of output and weight producing models: $(length(output_producing_models)) != $(length(weight_producing_models))."
+#         output_producing_models = wrap.(output_producing_models)
+#         weight_producing_models = wrap.(weight_producing_models)
+#         O = Union{outcometype.(output_producing_models)...}
+#         MaxDecisionBag{O}(output_producing_models, weight_producing_models, args...; kwargs...)
+#     end
+# end
 
-isensemble(m::MaxDecisionBag) = true
+# isensemble(m::MaxDecisionBag) = true
 
-function apply(m::MaxDecisionBag, d::AbstractInterpretation; suppress_parity_warning = false, kwargs...)
-    weights = [apply(wm, d; suppress_parity_warning, kwargs...) for wm in m.weight_producing_models]
-    om = m.output_producing_models[argmax(weights)]
-    pred = apply(om, d; suppress_parity_warning, kwargs...)
-    # preds = [apply(om, d; suppress_parity_warning, kwargs...) for om in m.output_producing_models]
-    # pred = aggregation(m)(preds, weights; suppress_parity_warning)
-    pred
-end
+# function apply(m::MaxDecisionBag, d::AbstractInterpretation; suppress_parity_warning = false, kwargs...)
+#     weights = [apply(wm, d; suppress_parity_warning, kwargs...) for wm in m.weight_producing_models]
+#     om = m.output_producing_models[argmax(weights)]
+#     pred = apply(om, d; suppress_parity_warning, kwargs...)
+#     # preds = [apply(om, d; suppress_parity_warning, kwargs...) for om in m.output_producing_models]
+#     # pred = aggregation(m)(preds, weights; suppress_parity_warning)
+#     pred
+# end
 
-# TODO Add a keyword argument that toggles the soft or hard behavior. The hard behavior is one where you first find the bestguess among the weights, and then perform the apply only on the first
+# # TODO Add a keyword argument that toggles the soft or hard behavior. The hard behavior is one where you first find the bestguess among the weights, and then perform the apply only on the first
 
-# TODO parallelize
-function apply(
-    m::MaxDecisionBag,
-    d::AbstractInterpretationSet;
-    suppress_parity_warning = false,
-    kwargs...
-)
-    weights = hcat([apply(wm, d; suppress_parity_warning, kwargs...) for wm in m.weight_producing_models]...)
-    preds = __apply_post(m, preds)
-    preds = [
-        apply(m.output_producing_models[im], d; suppress_parity_warning, kwargs...)
-        for im in argmax(weights; dims=2)
-    ]
-    preds = __apply_pre(m, d, preds)
-    return preds
-end
+# # TODO parallelize
+# function apply(
+#     m::MaxDecisionBag,
+#     d::AbstractInterpretationSet;
+#     suppress_parity_warning = false,
+#     kwargs...
+# )
+#     weights = hcat([apply(wm, d; suppress_parity_warning, kwargs...) for wm in m.weight_producing_models]...)
+#     preds = __apply_post(m, preds)
+#     preds = [
+#         apply(m.output_producing_models[im], d; suppress_parity_warning, kwargs...)
+#         for im in argmax(weights; dims=2)
+#     ]
+#     preds = __apply_pre(m, d, preds)
+#     return preds
+# end
 
-function apply!(m::MaxDecisionBag, d::AbstractInterpretationSet, y::AbstractVector; mode = :replace, leavesonly = false, suppress_parity_warning = false, kwargs...)
-    y = __apply_pre(m, d, y)
-    weights = hcat([apply!(wm, d, y; mode, leavesonly, suppress_parity_warning, kwargs...) for wm in m.weight_producing_models]...)
-    preds = __apply_post(m, preds)
-    preds = [
-        apply!(m.output_producing_models[im], d, y; mode, leavesonly, suppress_parity_warning, kwargs...)
-        for im in argmax(weights; dims=2)
-    ]
-    preds = __apply_pre(m, d, preds)
-    return __apply!(m, mode, preds, y, leavesonly)
-end
+# function apply!(m::MaxDecisionBag, d::AbstractInterpretationSet, y::AbstractVector; mode = :replace, leavesonly = false, suppress_parity_warning = false, kwargs...)
+#     y = __apply_pre(m, d, y)
+#     weights = hcat([apply!(wm, d, y; mode, leavesonly, suppress_parity_warning, kwargs...) for wm in m.weight_producing_models]...)
+#     preds = __apply_post(m, preds)
+#     preds = [
+#         apply!(m.output_producing_models[im], d, y; mode, leavesonly, suppress_parity_warning, kwargs...)
+#         for im in argmax(weights; dims=2)
+#     ]
+#     preds = __apply_pre(m, d, preds)
+#     return __apply!(m, mode, preds, y, leavesonly)
+# end
 
-"""
-TODO explain. The output of XGBoost via the strategy "multi:softmax".
-"""
-const MaxTreeBag{O,W<:RLabel,A<:typeof(+),WW<:RLabel} = MaxDecisionBag{O,ConstantModel{O},DecisionEnsemble{W,DecisionTree,A,WW}}
+# """
+# TODO explain. The output of XGBoost via the strategy "multi:softmax".
+# """
+# const MaxTreeBag{O,W<:RLabel,A<:typeof(+),WW<:RLabel} = MaxDecisionBag{O,ConstantModel{O},DecisionEnsemble{W,DecisionTree,A,WW}}
 
-function unique_with_indices(x)
-    unique_vals = unique(x)
-    indices = [findall(==(val), x) for val in unique_vals]
-    return unique_vals, indices
-end
+# function unique_with_indices(x)
+#     unique_vals = unique(x)
+#     indices = [findall(==(val), x) for val in unique_vals]
+#     return unique_vals, indices
+# end
 
 # function apply!(
 #     dbag::SoleModels.DecisionBag, 
@@ -398,3 +394,143 @@ end
 #     dbag.info.supporting_predictions = top_prediction
 # end
 
+# ---------------------------------------------------------------------------- #
+#                            DecisionXGBoost struct                            #
+# ---------------------------------------------------------------------------- #
+"""
+A `DecisionXGBoost` is an ensemble of models, weighted by leaf values, exp.summed during apply.
+
+See also [`DecisionForest`](@ref), [`DecisionTree`](@ref), [`DecisionEnsemble`](@ref), [`MaxDecisionBag`](@ref).
+"""
+struct DecisionXGBoost{O,T<:AbstractModel,A<:Base.Callable} <: AbstractDecisionEnsemble{O}
+    models::Vector{T}
+    aggregation::A
+    info::NamedTuple
+
+    function DecisionXGBoost{O}(
+        models::AbstractVector{T},
+        aggregation::Union{Nothing,Base.Callable},
+        info::NamedTuple = (;);
+        return_sum::Bool=false
+    ) where {O,T<:AbstractModel}
+        @assert length(models) > 0 "Cannot instantiate empty ensemble!"
+        models = wrap.(models)
+
+        if isnothing(aggregation)
+                aggregation = function(args...; return_sum=false) bestguess(args...; return_sum) end
+        end
+
+        A = typeof(aggregation)
+        new{O,T,A}(collect(models), aggregation, info)
+    end
+    
+    function DecisionXGBoost{O}(
+        models::AbstractVector;
+        kwargs...
+    ) where {O}
+        info = (;)
+        DecisionXGBoost{O}(models, nothing, info; kwargs...)
+    end
+
+    function DecisionXGBoost{O}(
+        models::AbstractVector,
+        info::NamedTuple;
+        kwargs...
+    ) where {O}
+        DecisionXGBoost{O}(models, nothing, info; kwargs...)
+    end
+
+    function DecisionXGBoost(
+        models::AbstractVector,
+        args...; kwargs...
+    )
+        @assert length(models) > 0 "Cannot instantiate empty ensemble!"
+        models = wrap.(models)
+        O = Union{outcometype.(models)...}
+        DecisionXGBoost{O}(models, args...; kwargs...)
+    end
+end
+
+isensemble(m::DecisionXGBoost) = true
+
+modelstype(m::DecisionXGBoost{O,T}) where {O,T} = T
+models(m::DecisionXGBoost) = m.models
+nmodels(m::DecisionXGBoost) = length(models(m))
+
+aggregation(m::DecisionXGBoost) = m.aggregation
+scored_aggregation(m::DecisionXGBoost) = aggregation(m)
+
+"""
+    function height(m::DecisionXGBoost)
+
+Return the maximum height across all the [`DecisionTree`](@ref)s within `m`.
+
+See also [`DecisionXGBoost`](@ref), `DecisionForest`](@ref), [`DecisionTree`](@ref).
+"""
+height(m::DecisionXGBoost) = subtreeheight(m)
+
+immediatesubmodels(m::DecisionXGBoost) = trees(m)
+nimmediatesubmodels(m::DecisionXGBoost) = length(trees(m))
+listimmediaterules(m::DecisionXGBoost; kwargs...) = error("TODO implement")
+
+# ---------------------------------------------------------------------------- #
+#                            DecisionXGBoost apply                             #
+# ---------------------------------------------------------------------------- #
+function apply(
+    m::DecisionXGBoost,
+    id::AbstractInterpretation;
+    suppress_parity_warning=false,
+    kwargs...
+)
+    preds = [apply_leaf_scores(subm, d; suppress_parity_warning, kwargs...) for subm in models(m)]
+    preds = __apply_post(m, preds)
+    scored_aggregation(m)(preds, sort(unique(m.info.supporting_labels)); suppress_parity_warning)
+end
+
+# TODO parallelize
+function apply(
+    m::DecisionXGBoost,
+    d::AbstractInterpretationSet;
+    suppress_parity_warning=false,
+    kwargs...
+)
+    # we expect X_test * classlabels * nrounds trees, because for every round,
+    # XGBoost for every round, creates a tree for every classlabel.
+    # So, in every subm model, we'll find as much trees as classlabels.
+    preds = hcat([apply_leaf_scores(subm, d; suppress_parity_warning, kwargs...) for subm in models(m)]...)
+    preds = __apply_post(m, preds)
+    preds = [
+        scored_aggregation(m)(pred, sort(unique(m.info.supporting_labels)))
+        for pred in eachrow(preds)
+    ]
+    return preds
+end
+
+# TODO parallelize
+# function apply!(
+#     m::DecisionXGBoost,
+#     d::AbstractInterpretationSet,
+#     y::AbstractVector;
+#     mode = :replace,
+#     leavesonly = false,
+#     # show_progress = false, # length(ntrees(m)) > 15,
+#     suppress_parity_warning = false,
+#     kwargs...
+# )
+#     # @show y
+#     y = __apply_pre(m, d, y)
+#     # _d = SupportedLogiset(d) TODO?
+#     # @show y
+#     preds = hcat([apply!(subm, d, y; mode, leavesonly, kwargs...) for subm in models(m)]...)
+
+#     preds = __apply_post(m, preds)
+
+#     preds = [
+#         weighted_aggregation(m)(preds[i,:]; suppress_parity_warning, kwargs...)
+#         for i in 1:size(preds,1)
+#     ]
+
+#     preds = __apply_pre(m, d, preds)
+#     return __apply!(m, mode, preds, y, leavesonly)
+# end
+
diff --git a/src/utils/models/leaf.jl b/src/utils/models/leaf.jl
index 599f91a..d49101d 100644
--- a/src/utils/models/leaf.jl
+++ b/src/utils/models/leaf.jl
@@ -95,6 +95,43 @@ end
 convert(::Type{ConstantModel{O}}, o::O) where {O} = ConstantModel{O}(o)
 convert(::Type{<:AbstractModel{F}}, m::ConstantModel) where {F} = ConstantModel{F}(m)
 
+# ---------------------------------------------------------------------------- #
+#                            DecisionXGBoost apply                             #
+# ---------------------------------------------------------------------------- #
+outcome_leaf_value(m::ConstantModel) = m.info.leaf_values
+
+apply_leaf_scores(m::ConstantModel, i::AbstractInterpretation; kwargs...) = outcome(m)
+apply_leaf_scores(
+    m::ConstantModel,
+    d::AbstractInterpretationSet,
+    i_instance::Integer;
+    kwargs...
+) = (outcome(m), outcome_leaf_value(m))
+apply_leaf_scores(
+    m::ConstantModel,
+    d::AbstractInterpretationSet;
+    kwargs...
+) = Fill((outcome(m), outcome_leaf_value(m)), ninstances(d))
+
+function apply_leaf_scores!(
+    m::ConstantModel,
+    d::AbstractInterpretationSet,
+    y::AbstractVector;
+    mode = :replace,
+    leavesonly = false,
+    kwargs...
+)
+    # @assert length(y) == ninstances(d) "$(length(y)) == $(ninstances(d))"
+    if mode == :replace
+        recursivelyemptysupports!(m, leavesonly)
+        mode = :append
+    end
+
+    preds = fill((outcome(m), outcome_leaf_value(m)), ninstances(d))
+
+    return __apply!(m, mode, preds, y, leavesonly)
+end
+
 ############################################################################################
 ################################### FunctionModel ##########################################
 ############################################################################################
diff --git a/src/utils/models/rule-and-branch.jl b/src/utils/models/rule-and-branch.jl
index b845e31..e45df00 100644
--- a/src/utils/models/rule-and-branch.jl
+++ b/src/utils/models/rule-and-branch.jl
@@ -347,6 +347,35 @@ function apply(
     preds
 end
 
+# ---------------------------------------------------------------------------- #
+#                            DecisionXGBoost apply                             #
+# ---------------------------------------------------------------------------- #
+function apply_leaf_scores(
+    m::Branch,
+    d::AbstractInterpretationSet;
+    check_args::Tuple = (),
+    check_kwargs::NamedTuple = (;),
+    kwargs...
+)
+    checkmask = checkantecedent(m, d, check_args...; check_kwargs...)
+    preds = Vector(undef,length(checkmask))
+    preds[checkmask] .= apply_leaf_scores(
+        posconsequent(m),
+        slicedataset(d, checkmask; return_view = true, allow_no_instances = true);
+        check_args = check_args,
+        check_kwargs = check_kwargs,
+        kwargs...
+    )
+    preds[(!).(checkmask)] .= apply_leaf_scores(
+        negconsequent(m),
+        slicedataset(d, (!).(checkmask); return_view = true, allow_no_instances = true);
+        check_args = check_args,
+        check_kwargs = check_kwargs,
+        kwargs...
+    )
+    preds
+end
+
 function apply!(
     m::Branch,
     d::AbstractInterpretationSet,
diff --git a/test/XgBoostExt/xgboost.jl b/test/XgBoostExt/xgboost.jl
index b8d9a24..de05f1d 100644
--- a/test/XgBoostExt/xgboost.jl
+++ b/test/XgBoostExt/xgboost.jl
@@ -7,6 +7,7 @@ using DataFrames
 using MLJXGBoostInterface
 using SoleModels
 
+import MLJModelInterface as MMI
 import XGBoost as XGB
 
 using Random, CategoricalArrays
@@ -29,7 +30,7 @@ println("Test set type: ", typeof(X_test), " - ", typeof(y_test))
 XGTrees = MLJ.@load XGBoostClassifier pkg=XGBoost
 
 model = XGTrees(;
-    num_round=10,
+    num_round=1,
     max_depth=6,
     objective="multi:softmax"
 )
@@ -41,28 +42,31 @@ fit!(mach)
 
 trees = XGB.trees(mach.fitresult[1])
 
-featurenames = mach.report.vals[1][1]
-ds_safetest = vcat(y, "nothing")
-
+get_encoding(classes_seen) = Dict(MMI.int(c) => c for c in MMI.classes(classes_seen))
+get_classlabels(encoding)  = [string(encoding[i]) for i in sort(keys(encoding) |> collect)]
+encoding     = get_encoding(mach.fitresult[2])
+classlabels  = get_classlabels(encoding)
+featurenames = mach.report.vals[1].features
+# ds_safetest = vcat(y_train, "nothing")
 
-solem = solemodel(trees, Matrix(X), ds_safetest)
-solem = solemodel(trees, Matrix(X), ds_safetest; featurenames)
-solem = solemodel(trees, Matrix(X), ds_safetest; featurenames, keep_condensed = false)
+# solem = solemodel(trees, Matrix(X_train), y_train)
+solem = solemodel(trees, Matrix(X_train), y_train; classlabels, featurenames)
+solem = solemodel(trees, Matrix(X_train), y_train; classlabels, featurenames, keep_condensed = false)
 
 @test SoleData.scalarlogiset(X_test; allow_propositional = true) isa PropositionalLogiset
 
 # Make test instances flow into the model
 preds = apply(solem, X_test)
-preds2 = apply!(solem, X_test, y_test)
+# preds2 = apply!(solem, X_test, y_test)
 
-@test preds == preds2
+# @test preds == preds2
 accuracy = sum(preds .== y_test)/length(y_test)
-@test accuracy > 0.7
+@test accuracy > 0.9
 
 # apply!(solem, X_test, y_test, mode = :append)
 
-solem = @test_throws ErrorException solemodel(trees, Matrix(X), ds_safetest; featurenames, keep_condensed = true)
-solem = @test_nowarn solemodel(trees, Matrix(X), ds_safetest; featurenames, keep_condensed = false)
+solem = @test_throws ErrorException solemodel(trees, Matrix(X_train), y_train; classlabels, keep_condensed = true)
+solem = @test_nowarn solemodel(trees, Matrix(X_train), y_train; classlabels, keep_condensed = false)
 
 printmodel(solem; max_depth = 7, show_intermediate_finals = true, show_metrics = true)
 
@@ -80,3 +84,43 @@ bst = XGB.xgboost(
 # obtain model predictions
 ŷ = XGB.predict(bst, X_test)
 
+predsl = CategoricalArrays.levelcode.(categorical(preds)) .- 1
+@test predsl == ŷ
+
+outperform = 0
+underperform = 0
+i = 0
+
+for seed in 1:40
+    rng = Xoshiro(seed)
+    train, test = partition(eachindex(y), train_ratio; shuffle=true, rng)
+    X_train, y_train = X[train, :], y[train]
+    X_test, y_test = X[test, :], y[test]
+    for num_round in 10:10:100
+        for eta in 0.1:0.1:0.9
+            model = XGTrees(; num_round, eta, objective="multi:softmax")
+            mach = machine(model, X_train, y_train)
+            fit!(mach)
+            trees = XGB.trees(mach.fitresult[1])
+            solem = solemodel(trees, Matrix(X_train), y_train; classlabels, featurenames)
+            preds = apply(solem, X_test)
+            predsl = CategoricalArrays.levelcode.(categorical(preds))
+
+            yl_train = CategoricalArrays.levelcode.(categorical(y_train)) .- 1
+            bst = XGB.xgboost((X_train, yl_train); num_round, eta, num_class=3, objective="multi:softmax")
+            ŷ = XGB.predict(bst, X_test)
+
+            sole_accuracy = sum(predsl .== CategoricalArrays.levelcode.(categorical(y_test)))/length(y_test)
+            xgb_accuracy = sum(ŷ .== CategoricalArrays.levelcode.(categorical(y_test)) .- 1)/length(y_test)
+
+            sole_accuracy > xgb_accuracy && global outperform += 1
+            sole_accuracy < xgb_accuracy && global underperform += 1
+            i += 1
+        end
+    end
+end
+
+@test outperform > underperform
+println("SoleModel outperformed XGBoost in $outperform out of $i tests.")
+println("SoleModel underperform XGBoost in $underperform out of $i tests.")
+

From 2ad644250ac864757f73c250b25fbc616bce5456 Mon Sep 17 00:00:00 2001
From: PasoStudio73 <paso.studio73@gmail.com>
Date: Sat, 12 Apr 2025 00:23:10 +0200
Subject: [PATCH 11/44] xgboost float predict issue

---
 test/XgBoostExt/xgboost_predict_issue.jl | 446 +++++++++++++++++++++++
 1 file changed, 446 insertions(+)
 create mode 100644 test/XgBoostExt/xgboost_predict_issue.jl

diff --git a/test/XgBoostExt/xgboost_predict_issue.jl b/test/XgBoostExt/xgboost_predict_issue.jl
new file mode 100644
index 0000000..a91a66e
--- /dev/null
+++ b/test/XgBoostExt/xgboost_predict_issue.jl
@@ -0,0 +1,446 @@
+using MLJ
+using DataFrames
+using MLJXGBoostInterface
+import MLJModelInterface as MMI
+using SoleModels
+import XGBoost as XGB
+using CategoricalArrays
+using Random
+
+function predict_xgboost_bag(trees, X; n_classes=0, objective="binary:logistic")
+    n_samples = size(X, 1)
+    ntree_limit = length(trees)
+    n_classes == 0 && throw(ArgumentError("n_classes must be specified for multi-class predictions"))
+    
+    # Initialize predictions
+    if startswith(objective, "multi:softprob") || startswith(objective, "multi:softmax")
+        # For multi-class probabilities, we need a matrix
+        raw_preds = zeros(Float64, n_samples, n_classes)
+    else
+        # For binary and regression, a vector is sufficient
+        raw_preds = zeros(Float64, n_samples)
+    end
+    
+    # Iterate through trees and accumulate predictions
+    for i in 1:ntree_limit
+        tree = trees[i]
+        tree_preds = predict_tree(tree, X)
+        @show tree_preds
+        if startswith(objective, "multi:softprob") || startswith(objective, "multi:softmax")
+            # For multi-class softprob, each tree outputs predictions for a specific class
+            class_idx = (i - 1) % n_classes + 1
+            raw_preds[:, class_idx] .+= tree_preds
+            @show class_idx
+            @show raw_preds
+        else
+            # For binary or regression, simply add the predictions
+            raw_preds .+= tree_preds
+        end
+    end
+    # Apply appropriate transformation based on objective
+    if objective == "binary:logistic"
+        # Apply sigmoid transformation
+        return 1.0 ./ (1.0 .+ exp.(-raw_preds))
+    elseif objective == "multi:softprob"
+        # Apply softmax transformation
+        exp_preds = exp.(raw_preds)
+        row_sums = sum(exp_preds, dims=2)
+        @show exp_preds
+        @show row_sums
+        @show exp_preds ./ row_sums
+        return exp_preds ./ row_sums
+    elseif objective == "multi:softmax"
+        # Return class with highest score
+        if n_classes > 1
+            _, indices = findmax(raw_preds, dims=2)
+            return [idx[2] for idx in indices]
+        else
+            return raw_preds .> 0
+        end
+    elseif objective == "count:poisson"
+        # Apply exponential transformation for Poisson
+        return exp.(raw_preds)
+    else
+        # For regression or other objectives, return raw predictions
+        return raw_preds
+    end
+end
+
+function predict_tree(tree, X)
+    n_samples = size(X, 1)
+    predictions = zeros(Float64, n_samples)
+    
+    for i in 1:n_samples
+        predictions[i] = traverse_tree(tree, X[i, :])
+    end
+    return predictions
+end
+
+function traverse_tree(tree, x)
+    # Start at root node
+    node = tree  # Adjust based on your tree structure
+    
+    # Traverse until reaching a leaf
+    while !isempty(node.children)
+        # Get the split feature and value
+        feature_idx = node.split
+        split_value = node.split_condition
+        
+        # Decide which child to go to
+        if x[feature_idx] < split_value
+            node = node.children[1]
+        else
+            node = node.children[2]
+        end
+    end
+    # Return the leaf value
+    return node.leaf
+end
+
+X, y = @load_iris
+X = DataFrame(X)
+train_ratio = 0.8
+seed, num_round, eta = 3, 1, 0.1
+rng = Xoshiro(seed)
+train, test = partition(eachindex(y), train_ratio; shuffle=true, rng)
+X_train, y_train = X[train, :], y[train]
+X_test, y_test = X[test, :], y[test]
+
+XGTrees = MLJ.@load XGBoostClassifier pkg=XGBoost
+model = XGTrees(; num_round, eta, objective="multi:softprob")
+mach = machine(model, X_train, y_train)
+fit!(mach)
+# mlj_predict = predict(mach, DataFrame(X_test[27,:])) # WORKING
+mlj_predict = predict(mach, DataFrame(X_test[28,:])) # NOT WORKING
+trees = XGB.trees(mach.fitresult[1])
+get_encoding(classes_seen) = Dict(MMI.int(c) => c for c in MMI.classes(classes_seen))
+get_classlabels(encoding)  = [string(encoding[i]) for i in sort(keys(encoding) |> collect)]
+encoding     = get_encoding(mach.fitresult[2])
+classlabels  = get_classlabels(encoding)
+@show classlabels
+featurenames = mach.report.vals[1].features
+solem = solemodel(trees, Matrix(X_train), y_train; classlabels, featurenames)
+# preds = apply(solem, DataFrame(X_test[27,:])) # WORKING
+preds = apply(solem, DataFrame(X_test[28,:])) # NOT WORKING
+predsl = CategoricalArrays.levelcode.(categorical(preds)) .- 1
+
+yl_train = CategoricalArrays.levelcode.(categorical(y_train)) .- 1
+bst = XGB.xgboost((X_train, yl_train); num_round, eta, num_class=3, objective="multi:softprob")
+xtrs = XGB.trees(bst)
+# yyy = XGB.predict(bst, DataFrame(X_test[27,:])) # WORKING
+yyy = XGB.predict(bst, DataFrame(X_test[28,:])) # NOT WORKING
+
+
+# # For multi-class classification
+rename!(X_test, [:f0, :f1, :f2, :f3])
+# class_probs = predict_xgboost_bag(trees, DataFrame(X_test[27,:]); n_classes=3, objective="multi:softprob") # WORKING
+class_probs = predict_xgboost_bag(trees, DataFrame(X_test[28,:]); n_classes=3, objective="multi:softprob") # NOT WORKING
+class_preds = [argmax(probs) for probs in eachrow(class_probs)] .-1
+
+isapprox(Float32.(class_probs), yyy, atol=1e-5)
+
+# # For regression
+# reg_preds = predict_xgboost_bag(mtrs, X_test, objective="reg:squarederror")
+
+# num_round = 20
+# eta = 0.3
+# yl_train = CategoricalArrays.levelcode.(categorical(y_train)) .- 1
+# bst = XGB.xgboost((X_train, yl_train); num_round, eta, num_class=3, objective="multi:softmax")
+# ŷ = XGB.predict(bst, X_test)
+
+### TREE 1
+"""
+xtrs[1].cover = 53.3333282
+xtrs[1].gain = 55.7546806
+xtrs[1].nmissing = 2
+xtrs[1].yes = 1
+xtrs[1].no = 2
+xtrs[1].split = "petal_length"
+xtrs[1].split_condition = 3.0
+
+xtrs[1].children[1].cover = 16.8888874
+xtrs[1].children[1].id = 1
+xtrs[1].children[1].leaf = 0.141614899
+
+xtrs[1].children[2].cover = 36.4444427
+xtrs[1].children[2].id = 2
+xtrs[1].children[2].leaf = -0.072997041
+
+solem.models[1].info =
+(leaf_values = [0.141614899, -0.072997041],
+ supporting_predictions = CategoricalValue{String, UInt32}["setosa", "setosa", "setosa", "virginica", "virginica", "virginica"],
+ supporting_labels = ["setosa", "virginica", "versicolor", "setosa", "virginica", "versicolor"],)
+
+ sole.models[1].antecedent =
+Atom{ScalarCondition{Float64, VariableValue{Int64, Symbol}, ScalarMetaCondition{VariableValue{Int64, Symbol}, typeof(<)}}}: [petal_length] < 3.0
+
+solem.models[1].posconsequent.outcome = CategoricalValue{String, UInt32} "setosa"
+solem.models[1].posconsequent.info =
+(leaf_values = 0.141614899,
+ supporting_predictions = CategoricalValue{String, UInt32}["setosa", "setosa", "setosa"],
+ supporting_labels = ["setosa", "virginica", "versicolor"],)
+
+solem.models[1].negconsequent.outcome = CategoricalValue{String, UInt32} "virginica"
+ solem.models[1].negconsequent.info =
+(leaf_values = -0.072997041,
+ supporting_predictions = CategoricalValue{String, UInt32}["virginica", "virginica", "virginica"],
+ supporting_labels = ["setosa", "virginica", "versicolor"],)
+"""
+
+### TREE 2
+"""
+xtrs[2].cover = 53.3333282
+xtrs[2].gain = 11.9339008
+xtrs[2].nmissing = 2
+xtrs[2].yes = 1
+xtrs[2].no = 2
+xtrs[2].split = "petal_length"
+xtrs[2].split_condition = 3.0
+
+    xtrs[2]children[1].cover = 16.8888874
+    xtrs[2]children[1].id = 1
+    xtrs[2]children[1].leaf = -0.070807457
+
+    xtrs[2].children[2].cover = 36.4444427
+    xtrs[2].children[2].gain = 35.383049
+    xtrs[2].children[2].nmissing = 4
+    xtrs[2].children[2].yes = 3
+    xtrs[2].children[2].no = 4
+    xtrs[2].children[2].split = "petal_length"
+    xtrs[2].children[2].split_condition = 4.9000001
+
+        xtrs[2].children[2].children[1].cover = 17.7777767
+        xtrs[2].children[2].children[1].gain = 4.09395218
+        xtrs[2].children[2].children[1].nmissing = 6
+        xtrs[2].children[2].children[1].yes = 5
+        xtrs[2].children[2].children[1].no = 6
+        xtrs[2].children[2].children[1].split = "petal_width"
+        xtrs[2].children[2].children[1].split_condition = 1.70000005
+
+            xtrs[2].children[2].children[1].children[1].cover = 15.999999
+            xtrs[2].children[2].children[1].children[1].id = 5
+            xtrs[2].children[2].children[1].children[1].leaf = 0.141176477
+
+            xtrs[2].children[2].children[1].children[2].cover = 1.77777767
+            xtrs[2].children[2].children[1].children[2].id = 6
+            xtrs[2].children[2].children[1].children[2].leaf = -0.0120000029
+
+        xtrs[2].children[2].children[2].cover = 18.666666
+        xtrs[2].children[2].children[2].gain = 0.264455795
+        xtrs[2].children[2].children[2].nmissing = 8
+        xtrs[2].children[2].children[2].yes = 7
+        xtrs[2].children[2].children[2].no = 8
+        xtrs[2].children[2].children[2].split = "petal_width"
+        xtrs[2].children[2].children[2].split_condition = 1.70000005
+
+            xtrs[2].children[2].children[2].children[1].cover = 2.22222209
+            xtrs[2].children[2].children[2].children[1].id = 7
+            xtrs[2].children[2].children[2].children[1].leaf = -0.0206896588
+
+            xtrs[2].children[2].children[2].children[2].cover = 16.4444427
+            xtrs[2].children[2].children[2].children[2].id = 8
+            xtrs[2].children[2].children[2].children[2].leaf = -0.0707006454
+
+solem.models[2].info =
+(leaf_values = [-0.070807457, 0.141176477, -0.0120000029, -0.0206896588, -0.0707006454],
+ supporting_predictions = CategoricalValue{String, UInt32}["setosa", "setosa", "setosa", "versicolor", "versicolor", "versicolor", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica"],
+ supporting_labels = ["setosa", "virginica", "versicolor", "setosa", "virginica", "versicolor", "setosa", "virginica", "versicolor", "setosa", "virginica", "versicolor", "setosa", "virginica", "versicolor"],)
+solem.models[2].antecedent =
+Atom{ScalarCondition{Float64, VariableValue{Int64, Symbol}, ScalarMetaCondition{VariableValue{Int64, Symbol}, typeof(<)}}}: [petal_length] < 3.0
+
+    solem.models[2].posconsequent.outcome = CategoricalValue{String, UInt32} "setosa"
+    solem.models[2].posconsequent.info =
+    (leaf_values = -0.070807457,
+    supporting_predictions = CategoricalValue{String, UInt32}["setosa", "setosa", "setosa"],
+    supporting_labels = ["setosa", "virginica", "versicolor"],)
+
+    solem.models[2].negconsequent.antecedent =
+    Atom{ScalarCondition{Float64, VariableValue{Int64, Symbol}, ScalarMetaCondition{VariableValue{Int64, Symbol}, typeof(<)}}}: [petal_length] < 4.9000001
+    solem.models[2].negconsequent.info =
+    (leaf_values = [0.141176477, -0.0120000029, -0.0206896588, -0.0707006454],
+    supporting_predictions = CategoricalValue{String, UInt32}["versicolor", "versicolor", "versicolor", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica"],
+    supporting_labels = ["setosa", "virginica", "versicolor", "setosa", "virginica", "versicolor", "setosa", "virginica", "versicolor", "setosa", "virginica", "versicolor"],)
+
+        solem.models[2].negconsequent.posconsequent.antecedent =
+        Atom{ScalarCondition{Float64, VariableValue{Int64, Symbol}, ScalarMetaCondition{VariableValue{Int64, Symbol}, typeof(<)}}}: [petal_width] < 1.70000005
+        solem.models[2].negconsequent.posconsequent.info =
+        (leaf_values = [0.141176477, -0.0120000029],
+        supporting_predictions = CategoricalValue{String, UInt32}["versicolor", "versicolor", "versicolor", "virginica", "virginica", "virginica"],
+        supporting_labels = ["setosa", "virginica", "versicolor", "setosa", "virginica", "versicolor"],)
+
+            solem.models[2].negconsequent.posconsequent.posconsequent.outcome = CategoricalValue{String, UInt32} "versicolor"
+            solem.models[2].negconsequent.posconsequent.posconsequent.info =
+            (leaf_values = 0.141176477,
+            supporting_predictions = CategoricalValue{String, UInt32}["versicolor", "versicolor", "versicolor"],
+            supporting_labels = ["setosa", "virginica", "versicolor"],)
+
+            solem.models[2].negconsequent.posconsequent.negconsequent.outcome = CategoricalValue{String, UInt32} "virginica"
+            solem.models[2].negconsequent.posconsequent.negconsequent.info =
+            (leaf_values = -0.0120000029,
+            supporting_predictions = CategoricalValue{String, UInt32}["virginica", "virginica", "virginica"],
+            supporting_labels = ["setosa", "virginica", "versicolor"],)
+
+        solem.models[2].negconsequent.negconsequent.antecedent =
+        Atom{ScalarCondition{Float64, VariableValue{Int64, Symbol}, ScalarMetaCondition{VariableValue{Int64, Symbol}, typeof(<)}}}: [petal_width] < 1.70000005
+        solem.models[2].negconsequent.negconsequent.info =
+        (leaf_values = [-0.0206896588, -0.0707006454],
+        supporting_predictions = CategoricalValue{String, UInt32}["virginica", "virginica", "virginica", "virginica", "virginica", "virginica"],
+        supporting_labels = ["setosa", "virginica", "versicolor", "setosa", "virginica", "versicolor"],)
+
+            solem.models[2].negconsequent.negconsequent.posconsequent.outcome = CategoricalValue{String, UInt32} "virginica"
+            solem.models[2].negconsequent.negconsequent.posconsequent.info =
+            (leaf_values = -0.0206896588,
+            supporting_predictions = CategoricalValue{String, UInt32}["virginica", "virginica", "virginica"],
+            supporting_labels = ["setosa", "virginica", "versicolor"],)
+
+            solem.models[2].negconsequent.negconsequent.negconsequent.outcome = CategoricalValue{String, UInt32} "virginica"
+            solem.models[2].negconsequent.negconsequent.negconsequent.info =
+            (leaf_values = -0.0707006454,
+            supporting_predictions = CategoricalValue{String, UInt32}["virginica", "virginica", "virginica"],
+            supporting_labels = ["setosa", "virginica", "versicolor"],)
+"""
+
+### TREE 3
+"""
+xtrs[3].cover = 53.3333282
+xtrs[3].gain = 51.9276886
+xtrs[3].nmissing = 2
+xtrs[3].yes = 1
+xtrs[3].no = 2
+xtrs[3].split = "petal_length"
+xtrs[3].split_condition = 4.80000019
+
+    xtrs[3].children[1].cover = 32.8888855
+    xtrs[3].children[1].gain = 0.676908493
+    xtrs[3].children[1].nmissing = 4
+    xtrs[3].children[1].yes = 3
+    xtrs[3].children[1].no = 4
+    xtrs[3].children[1].split = "petal_width"
+    xtrs[3].children[1].split_condition = 1.60000002
+
+        xtrs[3].children[1].children[1].cover = 31.5555534
+        xtrs[3].children[1].children[1].id = 3
+        xtrs[3].children[1].children[1].leaf = -0.0726962537
+
+        xtrs[3].children[1].children[2].cover = 1.33333325
+        xtrs[3].children[1].children[2].id = 4
+        xtrs[3].children[1].children[2].leaf = -2.55448485e-9
+
+    xtrs[3].children[2].cover = 20.4444427
+    xtrs[3].children[2].gain = 1.53349686
+    xtrs[3].children[2].nmissing = 6
+    xtrs[3].children[2].yes = 5
+    xtrs[3].children[2].no = 6
+    xtrs[3].children[2].split = "petal_length"
+    xtrs[3].children[2].split_condition = 4.9000001
+
+        xtrs[3].children[2].children[1].cover = 1.77777767
+        xtrs[3].children[2].children[1].id = 5
+        xtrs[3].children[2].children[1].leaf = 0.0239999983
+
+        xtrs[3].children[2].children[2].cover = 18.666666
+        xtrs[3].children[2].children[2].id = 6
+        xtrs[3].children[2].children[2].leaf = 0.137288138
+
+solem.models[3].info =
+(leaf_values = [-0.0726962537, -2.55448485e-9, 0.0239999983, 0.137288138],
+ supporting_predictions = CategoricalValue{String, UInt32}["setosa", "setosa", "setosa", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica"],
+ supporting_labels = ["setosa", "virginica", "versicolor", "setosa", "virginica", "versicolor", "setosa", "virginica", "versicolor", "setosa", "virginica", "versicolor"],)
+solem.models[3].antecedent = Atom{ScalarCondition{Float64, VariableValue{Int64, Symbol}, ScalarMetaCondition{VariableValue{Int64, Symbol}, typeof(<)}}}: [petal_length] < 4.80000019
+
+    solem.models[3].posconsequent.info =
+    (leaf_values = [-0.0726962537, -2.55448485e-9],
+    supporting_predictions = CategoricalValue{String, UInt32}["setosa", "setosa", "setosa", "virginica", "virginica", "virginica"],
+    supporting_labels = ["setosa", "virginica", "versicolor", "setosa", "virginica", "versicolor"],)
+    solem.models[3].posconsequent.antecedent = Atom{ScalarCondition{Float64, VariableValue{Int64, Symbol}, ScalarMetaCondition{VariableValue{Int64, Symbol}, typeof(<)}}}: [petal_width] < 1.60000002
+
+        solem.models[3].posconsequent.posconsequent.info
+        (leaf_values = -0.0726962537,
+        supporting_predictions = CategoricalValue{String, UInt32}["setosa", "setosa", "setosa"],
+        supporting_labels = ["setosa", "virginica", "versicolor"],)
+        solem.models[3].posconsequent.posconsequent.outcome = CategoricalValue{String, UInt32} "setosa"
+
+        solem.models[3].posconsequent.negconsequent.info =
+        (leaf_values = -2.55448485e-9,
+        supporting_predictions = CategoricalValue{String, UInt32}["virginica", "virginica", "virginica"],
+        supporting_labels = ["setosa", "virginica", "versicolor"],)
+        solem.models[3].posconsequent.negconsequent.outcome = CategoricalValue{String, UInt32} "virginica"
+
+    solem.models[3].negconsequent.info =
+    (leaf_values = [0.0239999983, 0.137288138],
+    supporting_predictions = CategoricalValue{String, UInt32}["virginica", "virginica", "virginica", "virginica", "virginica", "virginica"],
+    supporting_labels = ["setosa", "virginica", "versicolor", "setosa", "virginica", "versicolor"],)
+    solem.models[3].negconsequent.antecedent = Atom{ScalarCondition{Float64, VariableValue{Int64, Symbol}, ScalarMetaCondition{VariableValue{Int64, Symbol}, typeof(<)}}}: [petal_length] < 4.9000001
+
+        solem.models[3].negconsequent.posconsequent.info =
+        (leaf_values = 0.0239999983,
+        supporting_predictions = CategoricalValue{String, UInt32}["virginica", "virginica", "virginica"],
+        supporting_labels = ["setosa", "virginica", "versicolor"],)
+        solem.models[3].negconsequent.posconsequent.outcome = CategoricalValue{String, UInt32} "virginica"
+
+        solem.models[3].negconsequent.negconsequent.info =
+        (leaf_values = 0.137288138,
+        supporting_predictions = CategoricalValue{String, UInt32}["virginica", "virginica", "virginica"],
+        supporting_labels = ["setosa", "virginica", "versicolor"],)
+        solem.models[3].negconsequent.negconsequent.outcome = CategoricalValue{String, UInt32} "virginica"
+"""
+
+# calculating the probabilities
+
+#  Row │ sepal_length  sepal_width  petal_length  petal_width 
+#      │ Float64       Float64      Float64       Float64     
+# ─────┼──────────────────────────────────────────────────────
+#    1 │          6.9          3.1           4.9          1.5
+
+### TREE 1: probability of setosa
+"""
+"petal_length" < 3.0 -- no >> leaf = -0.072997041
+"""
+
+### TREE 2: probability of versicolor
+"""
+"petal_length" < 3.0 -- no > "petal_length" < 4.9000001 -- yes > "petal_width" < 1.70000005 -- yes >> leaf = 0.141176477
+"""
+
+### TREE 1: probability of virginica
+"""
+"petal_length" < 4.80000019 -- no > "petal_length" < 4.9000001 -- yes >> leaf = 0.0239999983
+"""
+
+### calculatin multi:softprob
+"""
+exp_preds = exp.(-0.072997041 0.141176477 0.0239999983) = 0.929604  1.15163  1.02429
+row_sums = sum(exp_preds, dims=2) = 3.1055217627515077
+probability = exp_preds / row_sums = 0.299339  0.370832  0.329829
+
+XGBoost probability:  0.304161  0.320495  0.375344
+"""
+
+"""
+### ragionamento per assurdo: problema di arrotondamento ###
+"petal_length" = 4.9
+"petal_length" < 4.9000001 viene valutato come false
+
+quindi: 
+# tree 2
+"petal_length" < 3.0 -- no > "petal_length" < 4.9000001 -- no > "petal_width" < 1.70000005 -- yes >> leaf = -0.0206896588
+# tree 3
+"petal_length" < 4.80000019 -- no > "petal_length" < 4.9000001 -- no >> leaf = 0.137288138
+"""
+exp_preds = exp.([-0.072997041, -0.0206896588, 0.137288138])
+row_sums = sum(exp_preds)
+probability = exp_preds ./ row_sums
+
+"""
+# 3-element Vector{Float64}:
+#  0.3041612750760762
+#  0.320494608175597
+#  0.3753441167483268
+
+#  XGBoost probability:  0.304161  0.320495  0.375344
+
+PROBLEMA RISOLTO
+se si valuta
+4.9 < 4.9000001 false
+allora si ottiene il risaultato del predict XGBoost
+"""
\ No newline at end of file

From dd2350190d8e86b40321f33b24ae115d9ba5802d Mon Sep 17 00:00:00 2001
From: PasoStudio73 <paso.studio73@gmail.com>
Date: Mon, 14 Apr 2025 15:23:34 +0200
Subject: [PATCH 12/44] xgboost predict found areas requireing fixes

---
 ext/XGBoostExt.jl                        | 16 +++++++++--
 test/XgBoostExt/xgboost.jl               | 25 +++++++++++++++-
 test/XgBoostExt/xgboost_predict_issue.jl | 36 ++++++++++++++++++------
 3 files changed, 65 insertions(+), 12 deletions(-)

diff --git a/ext/XGBoostExt.jl b/ext/XGBoostExt.jl
index 78d7031..22cf984 100644
--- a/ext/XGBoostExt.jl
+++ b/ext/XGBoostExt.jl
@@ -115,7 +115,9 @@ end
 
 function early_return(leaf, antecedent, clabel, classl)
     info =(;
-    leaf_values = leaf,
+    # leaf_values = leaf,
+    ### debug convert to Float32 TODO delete
+    leaf_values = Float32(leaf),
     supporting_predictions = clabel,
     supporting_labels = [classl],
     )
@@ -150,6 +152,8 @@ function SoleModels.solemodel(
         # xgboost trees could be composed of only one leaf, without any split
         if isnothing(t.split)
             antecedent = Atom(get_condition(class_idx, featurenames; test_operator=(<), featval=Inf))
+            ### debug different test_operator TODO delete
+            # antecedent = Atom(get_condition(class_idx, featurenames; test_operator=(≤), featval=Inf))
             early_return(t.leaf, antecedent, clabels, classlabels[class_idx])
         else
             SoleModels.solemodel(t, X, y; classlabels, featurenames, class_idx, clabels, kwargs...)
@@ -186,14 +190,20 @@ function SoleModels.solemodel(
     clabels
 )
     antecedent = Atom(get_condition(tree.split, tree.split_condition, featurenames; test_operator=(<)))
+    ### debug different test_operator TODO delete
+    # antecedent = Atom(get_condition(tree.split, tree.split_condition, featurenames; test_operator=(≤)))
 
     # create a new path for the left branch
     left_path = copy(path_conditions)
     push!(left_path, Atom(get_condition(tree.split, tree.split_condition, featurenames; test_operator=(<))))
+    ### debug different test_operator TODO delete
+    # push!(left_path, Atom(get_condition(tree.split, tree.split_condition, featurenames; test_operator=(≤))))
     
     # create a new path for the right branch
     right_path = copy(path_conditions)
     push!(right_path, Atom(get_condition(tree.split, tree.split_condition, featurenames; test_operator=(≥))))
+    ### debug different test_operator TODO delete
+    # push!(right_path, Atom(get_condition(tree.split, tree.split_condition, featurenames; test_operator=(>))))
     
     lefttree = if isnothing(tree.children[1].split)
         # @show SoleModels.join_antecedents(left_path)
@@ -238,7 +248,9 @@ function xgbleaf(
     isnothing(prediction) && return nothing
 
     info = (;
-        leaf_values = leaf.leaf,
+        # leaf_values = leaf.leaf,
+        ### debug convert to Float32 TODO delete
+        leaf_values = Float32(leaf.leaf),
         supporting_predictions = fill(prediction, length(labels)),
         supporting_labels = labels,
     )
diff --git a/test/XgBoostExt/xgboost.jl b/test/XgBoostExt/xgboost.jl
index de05f1d..9a1ef99 100644
--- a/test/XgBoostExt/xgboost.jl
+++ b/test/XgBoostExt/xgboost.jl
@@ -90,6 +90,7 @@ predsl = CategoricalArrays.levelcode.(categorical(preds)) .- 1
 outperform = 0
 underperform = 0
 i = 0
+j = 0
 
 for seed in 1:40
     rng = Xoshiro(seed)
@@ -110,17 +111,39 @@ for seed in 1:40
             bst = XGB.xgboost((X_train, yl_train); num_round, eta, num_class=3, objective="multi:softmax")
             ŷ = XGB.predict(bst, X_test)
 
+            (predsl .-1) != ŷ && global j += 1
+
             sole_accuracy = sum(predsl .== CategoricalArrays.levelcode.(categorical(y_test)))/length(y_test)
             xgb_accuracy = sum(ŷ .== CategoricalArrays.levelcode.(categorical(y_test)) .- 1)/length(y_test)
 
             sole_accuracy > xgb_accuracy && global outperform += 1
             sole_accuracy < xgb_accuracy && global underperform += 1
-            i += 1
+            global i += 1
         end
     end
 end
 
 @test outperform > underperform
+println("Different predictions: $j out of $i tests.")
 println("SoleModel outperformed XGBoost in $outperform out of $i tests.")
 println("SoleModel underperform XGBoost in $underperform out of $i tests.")
+println("Total different accuracy: ", outperform + underperform)
+
+"""
+Test with original test operator = <
+Different predictions: 1051 out of 3600 tests.
+SoleModel outperformed XGBoost in 744 out of 3600 tests.
+SoleModel underperform XGBoost in 231 out of 3600 tests.
+Total different accuracy: 975
+"""
+
+"""
+Test with custom test operator = <=
+Different predictions: 1538 out of 3600 tests.
+SoleModel outperformed XGBoost in 1231 out of 3600 tests.
+SoleModel underperform XGBoost in 202 out of 3600 tests.
+Total different accuracy: 1433
+
+da notare come è aumentata l'accuratezza. Ma comunque stiamo lavorando su iris, magari è un caso isolato
+"""
 
diff --git a/test/XgBoostExt/xgboost_predict_issue.jl b/test/XgBoostExt/xgboost_predict_issue.jl
index a91a66e..5a73a29 100644
--- a/test/XgBoostExt/xgboost_predict_issue.jl
+++ b/test/XgBoostExt/xgboost_predict_issue.jl
@@ -7,6 +7,13 @@ import XGBoost as XGB
 using CategoricalArrays
 using Random
 
+# References:
+# https://github.com/chengjunhou/xgb2sql/issues/1
+# https://xgboost.readthedocs.io/en/latest/R-package/xgboostfromJSON.html
+
+# per me
+# https://xgboost.readthedocs.io/en/latest/build.html
+
 function predict_xgboost_bag(trees, X; n_classes=0, objective="binary:logistic")
     n_samples = size(X, 1)
     ntree_limit = length(trees)
@@ -15,10 +22,10 @@ function predict_xgboost_bag(trees, X; n_classes=0, objective="binary:logistic")
     # Initialize predictions
     if startswith(objective, "multi:softprob") || startswith(objective, "multi:softmax")
         # For multi-class probabilities, we need a matrix
-        raw_preds = zeros(Float64, n_samples, n_classes)
+        raw_preds = zeros(Float32, n_samples, n_classes)
     else
         # For binary and regression, a vector is sufficient
-        raw_preds = zeros(Float64, n_samples)
+        raw_preds = zeros(Float32, n_samples)
     end
     
     # Iterate through trees and accumulate predictions
@@ -45,9 +52,9 @@ function predict_xgboost_bag(trees, X; n_classes=0, objective="binary:logistic")
         # Apply softmax transformation
         exp_preds = exp.(raw_preds)
         row_sums = sum(exp_preds, dims=2)
-        @show exp_preds
-        @show row_sums
-        @show exp_preds ./ row_sums
+        @show typeof(exp_preds)
+        @show typeof(row_sums)
+        @show typeof(exp_preds ./ row_sums)
         return exp_preds ./ row_sums
     elseif objective == "multi:softmax"
         # Return class with highest score
@@ -68,11 +75,12 @@ end
 
 function predict_tree(tree, X)
     n_samples = size(X, 1)
-    predictions = zeros(Float64, n_samples)
+    predictions = zeros(Float32, n_samples)
     
     for i in 1:n_samples
         predictions[i] = traverse_tree(tree, X[i, :])
     end
+    @show typeof(predictions)
     return predictions
 end
 
@@ -84,9 +92,11 @@ function traverse_tree(tree, x)
     while !isempty(node.children)
         # Get the split feature and value
         feature_idx = node.split
-        split_value = node.split_condition
+        split_value = Float32(node.split_condition)
         
         # Decide which child to go to
+        @show typeof(x[feature_idx])
+        @show typeof(split_value)
         if x[feature_idx] < split_value
             node = node.children[1]
         else
@@ -94,7 +104,8 @@ function traverse_tree(tree, x)
         end
     end
     # Return the leaf value
-    return node.leaf
+    @show typeof(node.leaf)
+    return Float32(node.leaf)
 end
 
 X, y = @load_iris
@@ -137,7 +148,14 @@ rename!(X_test, [:f0, :f1, :f2, :f3])
 class_probs = predict_xgboost_bag(trees, DataFrame(X_test[28,:]); n_classes=3, objective="multi:softprob") # NOT WORKING
 class_preds = [argmax(probs) for probs in eachrow(class_probs)] .-1
 
-isapprox(Float32.(class_probs), yyy, atol=1e-5)
+X_train32 = DataFrame(Float32.(Matrix(X_train)), [:f0, :f1, :f2, :f3])
+bst32 = XGB.xgboost((X_train32, yl_train); num_round, eta, num_class=3, objective="multi:softprob")
+xtrs32 = XGB.trees(bst32)
+X_test32 = DataFrame(reshape(Float32.(Vector(X_test[28,:])), 1, :), [:f0, :f1, :f2, :f3])
+class_probs = predict_xgboost_bag(xtrs32, X_test32; n_classes=3, objective="multi:softprob") # NOT WORKING
+
+
+# isapprox(Float32.(class_probs), yyy, atol=1e-5) # appunto per ricordarsi "atol"
 
 # # For regression
 # reg_preds = predict_xgboost_bag(mtrs, X_test, objective="reg:squarederror")

From 93fba4de571cd8e6732c36025c34625f4b521ff0 Mon Sep 17 00:00:00 2001
From: PasoStudio73 <paso.studio73@gmail.com>
Date: Mon, 14 Apr 2025 22:36:39 +0200
Subject: [PATCH 13/44] XGBoost test passed

---
 ext/XGBoostExt.jl                        |  45 ++-
 test/XgBoostExt/xgboost.jl               |  44 +--
 test/XgBoostExt/xgboost_predict_issue.jl | 339 +----------------------
 3 files changed, 39 insertions(+), 389 deletions(-)

diff --git a/ext/XGBoostExt.jl b/ext/XGBoostExt.jl
index 22cf984..5e58c62 100644
--- a/ext/XGBoostExt.jl
+++ b/ext/XGBoostExt.jl
@@ -140,6 +140,7 @@ function SoleModels.solemodel(
     classlabels,
     featurenames=nothing,
     keep_condensed=false,
+    use_float32::Bool=true,
     kwargs...
 )
     keep_condensed && error("Cannot keep condensed XGBoost.Node.")
@@ -152,11 +153,10 @@ function SoleModels.solemodel(
         # xgboost trees could be composed of only one leaf, without any split
         if isnothing(t.split)
             antecedent = Atom(get_condition(class_idx, featurenames; test_operator=(<), featval=Inf))
-            ### debug different test_operator TODO delete
-            # antecedent = Atom(get_condition(class_idx, featurenames; test_operator=(≤), featval=Inf))
-            early_return(t.leaf, antecedent, clabels, classlabels[class_idx])
+            leaf = use_float32 ? Float32(t.leaf) : t.leaf
+            early_return(leaf, antecedent, clabels, classlabels[class_idx])
         else
-            SoleModels.solemodel(t, X, y; classlabels, featurenames, class_idx, clabels, kwargs...)
+            SoleModels.solemodel(t, X, y; classlabels, class_idx, clabels, featurenames, use_float32, kwargs...)
         end
     end
 
@@ -184,32 +184,28 @@ function SoleModels.solemodel(
     X::AbstractMatrix,
     y::AbstractVector;
     classlabels,
-    path_conditions=Formula[],
-    featurenames=nothing,
     class_idx,
-    clabels
+    clabels,
+    featurenames=nothing,
+    path_conditions=Formula[],
+    use_float32::Bool,
 )
-    antecedent = Atom(get_condition(tree.split, tree.split_condition, featurenames; test_operator=(<)))
-    ### debug different test_operator TODO delete
-    # antecedent = Atom(get_condition(tree.split, tree.split_condition, featurenames; test_operator=(≤)))
+split_condition = use_float32 ? Float32(tree.split_condition) : tree.split_condition
+    antecedent = Atom(get_condition(tree.split, split_condition, featurenames; test_operator=(<)))
 
     # create a new path for the left branch
     left_path = copy(path_conditions)
-    push!(left_path, Atom(get_condition(tree.split, tree.split_condition, featurenames; test_operator=(<))))
-    ### debug different test_operator TODO delete
-    # push!(left_path, Atom(get_condition(tree.split, tree.split_condition, featurenames; test_operator=(≤))))
+    push!(left_path, Atom(get_condition(tree.split, split_condition, featurenames; test_operator=(<))))
     
     # create a new path for the right branch
     right_path = copy(path_conditions)
-    push!(right_path, Atom(get_condition(tree.split, tree.split_condition, featurenames; test_operator=(≥))))
-    ### debug different test_operator TODO delete
-    # push!(right_path, Atom(get_condition(tree.split, tree.split_condition, featurenames; test_operator=(>))))
+    push!(right_path, Atom(get_condition(tree.split, split_condition, featurenames; test_operator=(≥))))
     
     lefttree = if isnothing(tree.children[1].split)
         # @show SoleModels.join_antecedents(left_path)
-        xgbleaf(tree.children[1], left_path, X, y)
+        xgbleaf(tree.children[1], left_path, X, y; use_float32)
     else
-        SoleModels.solemodel(tree.children[1], X, y; path_conditions=left_path, classlabels, class_idx, clabels,featurenames)
+        SoleModels.solemodel(tree.children[1], X, y; path_conditions=left_path, classlabels, class_idx, clabels, featurenames, use_float32)
     end
     isnothing(lefttree) && 
     begin 
@@ -218,9 +214,9 @@ function SoleModels.solemodel(
 
     righttree = if isnothing(tree.children[2].split)
         # @show SoleModels.join_antecedents(right_path)
-        xgbleaf(tree.children[2], right_path, X, y)
+        xgbleaf(tree.children[2], right_path, X, y; use_float32)
     else
-        SoleModels.solemodel(tree.children[2], X, y; path_conditions=right_path, classlabels, class_idx, clabels, featurenames)
+        SoleModels.solemodel(tree.children[2], X, y; path_conditions=right_path, classlabels, class_idx, clabels, featurenames, use_float32)
     end
     isnothing(righttree) && 
     begin
@@ -239,7 +235,8 @@ function xgbleaf(
     leaf::XGBoost.Node,
     formula::Vector{<:Formula},
     X::AbstractMatrix,
-    y::AbstractVector
+    y::AbstractVector;
+    use_float32::Bool,
 )
     bitX = bitmap_check_conditions(X, formula)
     prediction = SoleModels.bestguess(y[bitX]; suppress_parity_warning=true)
@@ -247,10 +244,10 @@ function xgbleaf(
 
     isnothing(prediction) && return nothing
 
+    leaf_values = use_float32 ? Float32(leaf.leaf) : leaf.leaf
+
     info = (;
-        # leaf_values = leaf.leaf,
-        ### debug convert to Float32 TODO delete
-        leaf_values = Float32(leaf.leaf),
+        leaf_values,
         supporting_predictions = fill(prediction, length(labels)),
         supporting_labels = labels,
     )
diff --git a/test/XgBoostExt/xgboost.jl b/test/XgBoostExt/xgboost.jl
index 9a1ef99..84983b1 100644
--- a/test/XgBoostExt/xgboost.jl
+++ b/test/XgBoostExt/xgboost.jl
@@ -87,11 +87,6 @@ ŷ = XGB.predict(bst, X_test)
 predsl = CategoricalArrays.levelcode.(categorical(preds)) .- 1
 @test predsl == ŷ
 
-outperform = 0
-underperform = 0
-i = 0
-j = 0
-
 for seed in 1:40
     rng = Xoshiro(seed)
     train, test = partition(eachindex(y), train_ratio; shuffle=true, rng)
@@ -103,47 +98,16 @@ for seed in 1:40
             mach = machine(model, X_train, y_train)
             fit!(mach)
             trees = XGB.trees(mach.fitresult[1])
-            solem = solemodel(trees, Matrix(X_train), y_train; classlabels, featurenames)
-            preds = apply(solem, X_test)
+            solem = solemodel(trees, Matrix(X_train), y_train; classlabels, featurenames, use_float32=true)
+            X_test_f32 = mapcols(col -> Float32.(col), X_test)
+            preds = apply(solem, X_test_f32)
             predsl = CategoricalArrays.levelcode.(categorical(preds))
 
             yl_train = CategoricalArrays.levelcode.(categorical(y_train)) .- 1
             bst = XGB.xgboost((X_train, yl_train); num_round, eta, num_class=3, objective="multi:softmax")
             ŷ = XGB.predict(bst, X_test)
 
-            (predsl .-1) != ŷ && global j += 1
-
-            sole_accuracy = sum(predsl .== CategoricalArrays.levelcode.(categorical(y_test)))/length(y_test)
-            xgb_accuracy = sum(ŷ .== CategoricalArrays.levelcode.(categorical(y_test)) .- 1)/length(y_test)
-
-            sole_accuracy > xgb_accuracy && global outperform += 1
-            sole_accuracy < xgb_accuracy && global underperform += 1
-            global i += 1
+            @test (predsl .-1) == ŷ
         end
     end
 end
-
-@test outperform > underperform
-println("Different predictions: $j out of $i tests.")
-println("SoleModel outperformed XGBoost in $outperform out of $i tests.")
-println("SoleModel underperform XGBoost in $underperform out of $i tests.")
-println("Total different accuracy: ", outperform + underperform)
-
-"""
-Test with original test operator = <
-Different predictions: 1051 out of 3600 tests.
-SoleModel outperformed XGBoost in 744 out of 3600 tests.
-SoleModel underperform XGBoost in 231 out of 3600 tests.
-Total different accuracy: 975
-"""
-
-"""
-Test with custom test operator = <=
-Different predictions: 1538 out of 3600 tests.
-SoleModel outperformed XGBoost in 1231 out of 3600 tests.
-SoleModel underperform XGBoost in 202 out of 3600 tests.
-Total different accuracy: 1433
-
-da notare come è aumentata l'accuratezza. Ma comunque stiamo lavorando su iris, magari è un caso isolato
-"""
-
diff --git a/test/XgBoostExt/xgboost_predict_issue.jl b/test/XgBoostExt/xgboost_predict_issue.jl
index 5a73a29..e6d0a9f 100644
--- a/test/XgBoostExt/xgboost_predict_issue.jl
+++ b/test/XgBoostExt/xgboost_predict_issue.jl
@@ -32,13 +32,11 @@ function predict_xgboost_bag(trees, X; n_classes=0, objective="binary:logistic")
     for i in 1:ntree_limit
         tree = trees[i]
         tree_preds = predict_tree(tree, X)
-        @show tree_preds
+
         if startswith(objective, "multi:softprob") || startswith(objective, "multi:softmax")
             # For multi-class softprob, each tree outputs predictions for a specific class
             class_idx = (i - 1) % n_classes + 1
             raw_preds[:, class_idx] .+= tree_preds
-            @show class_idx
-            @show raw_preds
         else
             # For binary or regression, simply add the predictions
             raw_preds .+= tree_preds
@@ -52,9 +50,6 @@ function predict_xgboost_bag(trees, X; n_classes=0, objective="binary:logistic")
         # Apply softmax transformation
         exp_preds = exp.(raw_preds)
         row_sums = sum(exp_preds, dims=2)
-        @show typeof(exp_preds)
-        @show typeof(row_sums)
-        @show typeof(exp_preds ./ row_sums)
         return exp_preds ./ row_sums
     elseif objective == "multi:softmax"
         # Return class with highest score
@@ -80,7 +75,6 @@ function predict_tree(tree, X)
     for i in 1:n_samples
         predictions[i] = traverse_tree(tree, X[i, :])
     end
-    @show typeof(predictions)
     return predictions
 end
 
@@ -95,8 +89,6 @@ function traverse_tree(tree, x)
         split_value = Float32(node.split_condition)
         
         # Decide which child to go to
-        @show typeof(x[feature_idx])
-        @show typeof(split_value)
         if x[feature_idx] < split_value
             node = node.children[1]
         else
@@ -104,7 +96,6 @@ function traverse_tree(tree, x)
         end
     end
     # Return the leaf value
-    @show typeof(node.leaf)
     return Float32(node.leaf)
 end
 
@@ -121,18 +112,25 @@ XGTrees = MLJ.@load XGBoostClassifier pkg=XGBoost
 model = XGTrees(; num_round, eta, objective="multi:softprob")
 mach = machine(model, X_train, y_train)
 fit!(mach)
-# mlj_predict = predict(mach, DataFrame(X_test[27,:])) # WORKING
-mlj_predict = predict(mach, DataFrame(X_test[28,:])) # NOT WORKING
+# mlj_predict = predict(mach, DataFrame(X_test[27,:]))
+mlj_predict = predict(mach, DataFrame(X_test[28,:]))
+
 trees = XGB.trees(mach.fitresult[1])
 get_encoding(classes_seen) = Dict(MMI.int(c) => c for c in MMI.classes(classes_seen))
 get_classlabels(encoding)  = [string(encoding[i]) for i in sort(keys(encoding) |> collect)]
 encoding     = get_encoding(mach.fitresult[2])
 classlabels  = get_classlabels(encoding)
-@show classlabels
 featurenames = mach.report.vals[1].features
+
+solem = solemodel(trees, Matrix(X_train), y_train; classlabels, featurenames, use_float32=false)
+preds = apply(solem, DataFrame(reshape(Float32.(Vector(X_test[28,:])), 1, :), :auto)) # NOT WORKING
+
+solem = solemodel(trees, Matrix(X_train), y_train; classlabels, featurenames, use_float32=true)
+preds = apply(solem, DataFrame(reshape(Float32.(Vector(X_test[28,:])), 1, :), :auto)) # WORKING
+
 solem = solemodel(trees, Matrix(X_train), y_train; classlabels, featurenames)
-# preds = apply(solem, DataFrame(X_test[27,:])) # WORKING
-preds = apply(solem, DataFrame(X_test[28,:])) # NOT WORKING
+preds = apply(solem, DataFrame(reshape(Float32.(Vector(X_test[28,:])), 1, :), :auto)) # WORKING
+
 predsl = CategoricalArrays.levelcode.(categorical(preds)) .- 1
 
 yl_train = CategoricalArrays.levelcode.(categorical(y_train)) .- 1
@@ -152,313 +150,4 @@ X_train32 = DataFrame(Float32.(Matrix(X_train)), [:f0, :f1, :f2, :f3])
 bst32 = XGB.xgboost((X_train32, yl_train); num_round, eta, num_class=3, objective="multi:softprob")
 xtrs32 = XGB.trees(bst32)
 X_test32 = DataFrame(reshape(Float32.(Vector(X_test[28,:])), 1, :), [:f0, :f1, :f2, :f3])
-class_probs = predict_xgboost_bag(xtrs32, X_test32; n_classes=3, objective="multi:softprob") # NOT WORKING
-
-
-# isapprox(Float32.(class_probs), yyy, atol=1e-5) # appunto per ricordarsi "atol"
-
-# # For regression
-# reg_preds = predict_xgboost_bag(mtrs, X_test, objective="reg:squarederror")
-
-# num_round = 20
-# eta = 0.3
-# yl_train = CategoricalArrays.levelcode.(categorical(y_train)) .- 1
-# bst = XGB.xgboost((X_train, yl_train); num_round, eta, num_class=3, objective="multi:softmax")
-# ŷ = XGB.predict(bst, X_test)
-
-### TREE 1
-"""
-xtrs[1].cover = 53.3333282
-xtrs[1].gain = 55.7546806
-xtrs[1].nmissing = 2
-xtrs[1].yes = 1
-xtrs[1].no = 2
-xtrs[1].split = "petal_length"
-xtrs[1].split_condition = 3.0
-
-xtrs[1].children[1].cover = 16.8888874
-xtrs[1].children[1].id = 1
-xtrs[1].children[1].leaf = 0.141614899
-
-xtrs[1].children[2].cover = 36.4444427
-xtrs[1].children[2].id = 2
-xtrs[1].children[2].leaf = -0.072997041
-
-solem.models[1].info =
-(leaf_values = [0.141614899, -0.072997041],
- supporting_predictions = CategoricalValue{String, UInt32}["setosa", "setosa", "setosa", "virginica", "virginica", "virginica"],
- supporting_labels = ["setosa", "virginica", "versicolor", "setosa", "virginica", "versicolor"],)
-
- sole.models[1].antecedent =
-Atom{ScalarCondition{Float64, VariableValue{Int64, Symbol}, ScalarMetaCondition{VariableValue{Int64, Symbol}, typeof(<)}}}: [petal_length] < 3.0
-
-solem.models[1].posconsequent.outcome = CategoricalValue{String, UInt32} "setosa"
-solem.models[1].posconsequent.info =
-(leaf_values = 0.141614899,
- supporting_predictions = CategoricalValue{String, UInt32}["setosa", "setosa", "setosa"],
- supporting_labels = ["setosa", "virginica", "versicolor"],)
-
-solem.models[1].negconsequent.outcome = CategoricalValue{String, UInt32} "virginica"
- solem.models[1].negconsequent.info =
-(leaf_values = -0.072997041,
- supporting_predictions = CategoricalValue{String, UInt32}["virginica", "virginica", "virginica"],
- supporting_labels = ["setosa", "virginica", "versicolor"],)
-"""
-
-### TREE 2
-"""
-xtrs[2].cover = 53.3333282
-xtrs[2].gain = 11.9339008
-xtrs[2].nmissing = 2
-xtrs[2].yes = 1
-xtrs[2].no = 2
-xtrs[2].split = "petal_length"
-xtrs[2].split_condition = 3.0
-
-    xtrs[2]children[1].cover = 16.8888874
-    xtrs[2]children[1].id = 1
-    xtrs[2]children[1].leaf = -0.070807457
-
-    xtrs[2].children[2].cover = 36.4444427
-    xtrs[2].children[2].gain = 35.383049
-    xtrs[2].children[2].nmissing = 4
-    xtrs[2].children[2].yes = 3
-    xtrs[2].children[2].no = 4
-    xtrs[2].children[2].split = "petal_length"
-    xtrs[2].children[2].split_condition = 4.9000001
-
-        xtrs[2].children[2].children[1].cover = 17.7777767
-        xtrs[2].children[2].children[1].gain = 4.09395218
-        xtrs[2].children[2].children[1].nmissing = 6
-        xtrs[2].children[2].children[1].yes = 5
-        xtrs[2].children[2].children[1].no = 6
-        xtrs[2].children[2].children[1].split = "petal_width"
-        xtrs[2].children[2].children[1].split_condition = 1.70000005
-
-            xtrs[2].children[2].children[1].children[1].cover = 15.999999
-            xtrs[2].children[2].children[1].children[1].id = 5
-            xtrs[2].children[2].children[1].children[1].leaf = 0.141176477
-
-            xtrs[2].children[2].children[1].children[2].cover = 1.77777767
-            xtrs[2].children[2].children[1].children[2].id = 6
-            xtrs[2].children[2].children[1].children[2].leaf = -0.0120000029
-
-        xtrs[2].children[2].children[2].cover = 18.666666
-        xtrs[2].children[2].children[2].gain = 0.264455795
-        xtrs[2].children[2].children[2].nmissing = 8
-        xtrs[2].children[2].children[2].yes = 7
-        xtrs[2].children[2].children[2].no = 8
-        xtrs[2].children[2].children[2].split = "petal_width"
-        xtrs[2].children[2].children[2].split_condition = 1.70000005
-
-            xtrs[2].children[2].children[2].children[1].cover = 2.22222209
-            xtrs[2].children[2].children[2].children[1].id = 7
-            xtrs[2].children[2].children[2].children[1].leaf = -0.0206896588
-
-            xtrs[2].children[2].children[2].children[2].cover = 16.4444427
-            xtrs[2].children[2].children[2].children[2].id = 8
-            xtrs[2].children[2].children[2].children[2].leaf = -0.0707006454
-
-solem.models[2].info =
-(leaf_values = [-0.070807457, 0.141176477, -0.0120000029, -0.0206896588, -0.0707006454],
- supporting_predictions = CategoricalValue{String, UInt32}["setosa", "setosa", "setosa", "versicolor", "versicolor", "versicolor", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica"],
- supporting_labels = ["setosa", "virginica", "versicolor", "setosa", "virginica", "versicolor", "setosa", "virginica", "versicolor", "setosa", "virginica", "versicolor", "setosa", "virginica", "versicolor"],)
-solem.models[2].antecedent =
-Atom{ScalarCondition{Float64, VariableValue{Int64, Symbol}, ScalarMetaCondition{VariableValue{Int64, Symbol}, typeof(<)}}}: [petal_length] < 3.0
-
-    solem.models[2].posconsequent.outcome = CategoricalValue{String, UInt32} "setosa"
-    solem.models[2].posconsequent.info =
-    (leaf_values = -0.070807457,
-    supporting_predictions = CategoricalValue{String, UInt32}["setosa", "setosa", "setosa"],
-    supporting_labels = ["setosa", "virginica", "versicolor"],)
-
-    solem.models[2].negconsequent.antecedent =
-    Atom{ScalarCondition{Float64, VariableValue{Int64, Symbol}, ScalarMetaCondition{VariableValue{Int64, Symbol}, typeof(<)}}}: [petal_length] < 4.9000001
-    solem.models[2].negconsequent.info =
-    (leaf_values = [0.141176477, -0.0120000029, -0.0206896588, -0.0707006454],
-    supporting_predictions = CategoricalValue{String, UInt32}["versicolor", "versicolor", "versicolor", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica"],
-    supporting_labels = ["setosa", "virginica", "versicolor", "setosa", "virginica", "versicolor", "setosa", "virginica", "versicolor", "setosa", "virginica", "versicolor"],)
-
-        solem.models[2].negconsequent.posconsequent.antecedent =
-        Atom{ScalarCondition{Float64, VariableValue{Int64, Symbol}, ScalarMetaCondition{VariableValue{Int64, Symbol}, typeof(<)}}}: [petal_width] < 1.70000005
-        solem.models[2].negconsequent.posconsequent.info =
-        (leaf_values = [0.141176477, -0.0120000029],
-        supporting_predictions = CategoricalValue{String, UInt32}["versicolor", "versicolor", "versicolor", "virginica", "virginica", "virginica"],
-        supporting_labels = ["setosa", "virginica", "versicolor", "setosa", "virginica", "versicolor"],)
-
-            solem.models[2].negconsequent.posconsequent.posconsequent.outcome = CategoricalValue{String, UInt32} "versicolor"
-            solem.models[2].negconsequent.posconsequent.posconsequent.info =
-            (leaf_values = 0.141176477,
-            supporting_predictions = CategoricalValue{String, UInt32}["versicolor", "versicolor", "versicolor"],
-            supporting_labels = ["setosa", "virginica", "versicolor"],)
-
-            solem.models[2].negconsequent.posconsequent.negconsequent.outcome = CategoricalValue{String, UInt32} "virginica"
-            solem.models[2].negconsequent.posconsequent.negconsequent.info =
-            (leaf_values = -0.0120000029,
-            supporting_predictions = CategoricalValue{String, UInt32}["virginica", "virginica", "virginica"],
-            supporting_labels = ["setosa", "virginica", "versicolor"],)
-
-        solem.models[2].negconsequent.negconsequent.antecedent =
-        Atom{ScalarCondition{Float64, VariableValue{Int64, Symbol}, ScalarMetaCondition{VariableValue{Int64, Symbol}, typeof(<)}}}: [petal_width] < 1.70000005
-        solem.models[2].negconsequent.negconsequent.info =
-        (leaf_values = [-0.0206896588, -0.0707006454],
-        supporting_predictions = CategoricalValue{String, UInt32}["virginica", "virginica", "virginica", "virginica", "virginica", "virginica"],
-        supporting_labels = ["setosa", "virginica", "versicolor", "setosa", "virginica", "versicolor"],)
-
-            solem.models[2].negconsequent.negconsequent.posconsequent.outcome = CategoricalValue{String, UInt32} "virginica"
-            solem.models[2].negconsequent.negconsequent.posconsequent.info =
-            (leaf_values = -0.0206896588,
-            supporting_predictions = CategoricalValue{String, UInt32}["virginica", "virginica", "virginica"],
-            supporting_labels = ["setosa", "virginica", "versicolor"],)
-
-            solem.models[2].negconsequent.negconsequent.negconsequent.outcome = CategoricalValue{String, UInt32} "virginica"
-            solem.models[2].negconsequent.negconsequent.negconsequent.info =
-            (leaf_values = -0.0707006454,
-            supporting_predictions = CategoricalValue{String, UInt32}["virginica", "virginica", "virginica"],
-            supporting_labels = ["setosa", "virginica", "versicolor"],)
-"""
-
-### TREE 3
-"""
-xtrs[3].cover = 53.3333282
-xtrs[3].gain = 51.9276886
-xtrs[3].nmissing = 2
-xtrs[3].yes = 1
-xtrs[3].no = 2
-xtrs[3].split = "petal_length"
-xtrs[3].split_condition = 4.80000019
-
-    xtrs[3].children[1].cover = 32.8888855
-    xtrs[3].children[1].gain = 0.676908493
-    xtrs[3].children[1].nmissing = 4
-    xtrs[3].children[1].yes = 3
-    xtrs[3].children[1].no = 4
-    xtrs[3].children[1].split = "petal_width"
-    xtrs[3].children[1].split_condition = 1.60000002
-
-        xtrs[3].children[1].children[1].cover = 31.5555534
-        xtrs[3].children[1].children[1].id = 3
-        xtrs[3].children[1].children[1].leaf = -0.0726962537
-
-        xtrs[3].children[1].children[2].cover = 1.33333325
-        xtrs[3].children[1].children[2].id = 4
-        xtrs[3].children[1].children[2].leaf = -2.55448485e-9
-
-    xtrs[3].children[2].cover = 20.4444427
-    xtrs[3].children[2].gain = 1.53349686
-    xtrs[3].children[2].nmissing = 6
-    xtrs[3].children[2].yes = 5
-    xtrs[3].children[2].no = 6
-    xtrs[3].children[2].split = "petal_length"
-    xtrs[3].children[2].split_condition = 4.9000001
-
-        xtrs[3].children[2].children[1].cover = 1.77777767
-        xtrs[3].children[2].children[1].id = 5
-        xtrs[3].children[2].children[1].leaf = 0.0239999983
-
-        xtrs[3].children[2].children[2].cover = 18.666666
-        xtrs[3].children[2].children[2].id = 6
-        xtrs[3].children[2].children[2].leaf = 0.137288138
-
-solem.models[3].info =
-(leaf_values = [-0.0726962537, -2.55448485e-9, 0.0239999983, 0.137288138],
- supporting_predictions = CategoricalValue{String, UInt32}["setosa", "setosa", "setosa", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica", "virginica"],
- supporting_labels = ["setosa", "virginica", "versicolor", "setosa", "virginica", "versicolor", "setosa", "virginica", "versicolor", "setosa", "virginica", "versicolor"],)
-solem.models[3].antecedent = Atom{ScalarCondition{Float64, VariableValue{Int64, Symbol}, ScalarMetaCondition{VariableValue{Int64, Symbol}, typeof(<)}}}: [petal_length] < 4.80000019
-
-    solem.models[3].posconsequent.info =
-    (leaf_values = [-0.0726962537, -2.55448485e-9],
-    supporting_predictions = CategoricalValue{String, UInt32}["setosa", "setosa", "setosa", "virginica", "virginica", "virginica"],
-    supporting_labels = ["setosa", "virginica", "versicolor", "setosa", "virginica", "versicolor"],)
-    solem.models[3].posconsequent.antecedent = Atom{ScalarCondition{Float64, VariableValue{Int64, Symbol}, ScalarMetaCondition{VariableValue{Int64, Symbol}, typeof(<)}}}: [petal_width] < 1.60000002
-
-        solem.models[3].posconsequent.posconsequent.info
-        (leaf_values = -0.0726962537,
-        supporting_predictions = CategoricalValue{String, UInt32}["setosa", "setosa", "setosa"],
-        supporting_labels = ["setosa", "virginica", "versicolor"],)
-        solem.models[3].posconsequent.posconsequent.outcome = CategoricalValue{String, UInt32} "setosa"
-
-        solem.models[3].posconsequent.negconsequent.info =
-        (leaf_values = -2.55448485e-9,
-        supporting_predictions = CategoricalValue{String, UInt32}["virginica", "virginica", "virginica"],
-        supporting_labels = ["setosa", "virginica", "versicolor"],)
-        solem.models[3].posconsequent.negconsequent.outcome = CategoricalValue{String, UInt32} "virginica"
-
-    solem.models[3].negconsequent.info =
-    (leaf_values = [0.0239999983, 0.137288138],
-    supporting_predictions = CategoricalValue{String, UInt32}["virginica", "virginica", "virginica", "virginica", "virginica", "virginica"],
-    supporting_labels = ["setosa", "virginica", "versicolor", "setosa", "virginica", "versicolor"],)
-    solem.models[3].negconsequent.antecedent = Atom{ScalarCondition{Float64, VariableValue{Int64, Symbol}, ScalarMetaCondition{VariableValue{Int64, Symbol}, typeof(<)}}}: [petal_length] < 4.9000001
-
-        solem.models[3].negconsequent.posconsequent.info =
-        (leaf_values = 0.0239999983,
-        supporting_predictions = CategoricalValue{String, UInt32}["virginica", "virginica", "virginica"],
-        supporting_labels = ["setosa", "virginica", "versicolor"],)
-        solem.models[3].negconsequent.posconsequent.outcome = CategoricalValue{String, UInt32} "virginica"
-
-        solem.models[3].negconsequent.negconsequent.info =
-        (leaf_values = 0.137288138,
-        supporting_predictions = CategoricalValue{String, UInt32}["virginica", "virginica", "virginica"],
-        supporting_labels = ["setosa", "virginica", "versicolor"],)
-        solem.models[3].negconsequent.negconsequent.outcome = CategoricalValue{String, UInt32} "virginica"
-"""
-
-# calculating the probabilities
-
-#  Row │ sepal_length  sepal_width  petal_length  petal_width 
-#      │ Float64       Float64      Float64       Float64     
-# ─────┼──────────────────────────────────────────────────────
-#    1 │          6.9          3.1           4.9          1.5
-
-### TREE 1: probability of setosa
-"""
-"petal_length" < 3.0 -- no >> leaf = -0.072997041
-"""
-
-### TREE 2: probability of versicolor
-"""
-"petal_length" < 3.0 -- no > "petal_length" < 4.9000001 -- yes > "petal_width" < 1.70000005 -- yes >> leaf = 0.141176477
-"""
-
-### TREE 1: probability of virginica
-"""
-"petal_length" < 4.80000019 -- no > "petal_length" < 4.9000001 -- yes >> leaf = 0.0239999983
-"""
-
-### calculatin multi:softprob
-"""
-exp_preds = exp.(-0.072997041 0.141176477 0.0239999983) = 0.929604  1.15163  1.02429
-row_sums = sum(exp_preds, dims=2) = 3.1055217627515077
-probability = exp_preds / row_sums = 0.299339  0.370832  0.329829
-
-XGBoost probability:  0.304161  0.320495  0.375344
-"""
-
-"""
-### ragionamento per assurdo: problema di arrotondamento ###
-"petal_length" = 4.9
-"petal_length" < 4.9000001 viene valutato come false
-
-quindi: 
-# tree 2
-"petal_length" < 3.0 -- no > "petal_length" < 4.9000001 -- no > "petal_width" < 1.70000005 -- yes >> leaf = -0.0206896588
-# tree 3
-"petal_length" < 4.80000019 -- no > "petal_length" < 4.9000001 -- no >> leaf = 0.137288138
-"""
-exp_preds = exp.([-0.072997041, -0.0206896588, 0.137288138])
-row_sums = sum(exp_preds)
-probability = exp_preds ./ row_sums
-
-"""
-# 3-element Vector{Float64}:
-#  0.3041612750760762
-#  0.320494608175597
-#  0.3753441167483268
-
-#  XGBoost probability:  0.304161  0.320495  0.375344
-
-PROBLEMA RISOLTO
-se si valuta
-4.9 < 4.9000001 false
-allora si ottiene il risaultato del predict XGBoost
-"""
\ No newline at end of file
+class_probs32 = predict_xgboost_bag(xtrs32, X_test32; n_classes=3, objective="multi:softprob") # NOT WORKING

From ae4e5a1323bd5f1e0fc927d2fca34d7befb5c8b8 Mon Sep 17 00:00:00 2001
From: PasoStudio73 <paso.studio73@gmail.com>
Date: Tue, 15 Apr 2025 00:02:50 +0200
Subject: [PATCH 14/44] working on tests

---
 ext/XGBoostExt.jl                             |  2 +-
 test/XGBoostExt.jl                            | 47 -------------------
 .../xgboost_classifier.jl}                    |  2 +
 .../xgboost_predict_issue.jl                  |  0
 test/misc.jl                                  |  4 +-
 test/runtests.jl                              |  2 +-
 6 files changed, 6 insertions(+), 51 deletions(-)
 delete mode 100644 test/XGBoostExt.jl
 rename test/{XgBoostExt/xgboost.jl => XGBoostExt/xgboost_classifier.jl} (98%)
 rename test/{XgBoostExt => XGBoostExt}/xgboost_predict_issue.jl (100%)

diff --git a/ext/XGBoostExt.jl b/ext/XGBoostExt.jl
index 5e58c62..25df6d3 100644
--- a/ext/XGBoostExt.jl
+++ b/ext/XGBoostExt.jl
@@ -8,7 +8,7 @@ using CategoricalArrays
 import SoleModels: alphabet, solemodel
 
 function alphabet(model::XGBoost.Booster; kwargs...)
-    error("TODO fix and test.")
+    # error("TODO fix and test.")
     function _alphabet!(a::Vector, model::XGBoost.Booster; kwargs...)
         return a
     end
diff --git a/test/XGBoostExt.jl b/test/XGBoostExt.jl
deleted file mode 100644
index 1ae6c42..0000000
--- a/test/XGBoostExt.jl
+++ /dev/null
@@ -1,47 +0,0 @@
-
-# Import necessary libraries
-using MLJ
-using DataFrames
-
-# Load the Iris dataset
-X, y = @load_iris
-X = DataFrame(X)
-
-# Convert the target variable to categorical
-y = coerce(y, Multiclass)
-
-# Split the dataset into training and testing sets
-train, test = partition(eachindex(y), 0.8, shuffle=true)
-X_train, X_test = X[train, :], X[test, :]
-y_train, y_test = y[train], y[test]
-
-# Load the XGBoost classifier
-XGBoostClassifier = @load XGBoostClassifier pkg=XGBoost
-
-# Create the model and set hyperparameters
-mljmodel = XGBoostClassifier()
-
-# Wrap the model with the data
-mach = machine(mljmodel, X_train, y_train)
-
-# Train the model
-fit!(mach)
-
-# Make predictions
-y_pred = predict(mach, X_test)
-
-# Evaluate test accuracy
-acc = mean(mode.(y_pred) .== y_test)
-
-# Print the test accuracy
-println("Test Accuracy: $acc")
-
-
-
-using SoleModels
-
-@test_nowarn alphabet(fitted_params(mach).fitresult[1])
-
-model = fitted_params(mach).fitresult[1]
-
-@test_broken solemodel(model)
diff --git a/test/XgBoostExt/xgboost.jl b/test/XGBoostExt/xgboost_classifier.jl
similarity index 98%
rename from test/XgBoostExt/xgboost.jl
rename to test/XGBoostExt/xgboost_classifier.jl
index 84983b1..b2aa9cc 100644
--- a/test/XgBoostExt/xgboost.jl
+++ b/test/XGBoostExt/xgboost_classifier.jl
@@ -87,6 +87,8 @@ ŷ = XGB.predict(bst, X_test)
 predsl = CategoricalArrays.levelcode.(categorical(preds)) .- 1
 @test predsl == ŷ
 
+@test_nowarn alphabet(fitted_params(mach).fitresult[1])
+
 for seed in 1:40
     rng = Xoshiro(seed)
     train, test = partition(eachindex(y), train_ratio; shuffle=true, rng)
diff --git a/test/XgBoostExt/xgboost_predict_issue.jl b/test/XGBoostExt/xgboost_predict_issue.jl
similarity index 100%
rename from test/XgBoostExt/xgboost_predict_issue.jl
rename to test/XGBoostExt/xgboost_predict_issue.jl
diff --git a/test/misc.jl b/test/misc.jl
index cbafaf2..4af76b4 100644
--- a/test/misc.jl
+++ b/test/misc.jl
@@ -1,6 +1,6 @@
-using Revise
+# using Revise
 
-using Reexport
+# using Reexport
 using FunctionWrappers: FunctionWrapper
 using Test
 using SoleLogics
diff --git a/test/runtests.jl b/test/runtests.jl
index 8d95df5..b6d170a 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -22,7 +22,7 @@ test_suites = [
     ("Linear forms", ["linear-form-utilities.jl", ]),
     ("Pluto Demo", ["$(dirname(dirname(pathof(SoleModels))))/pluto-demo.jl", ]),
     ("DecisionTreeExt", ["DecisionTreeExt/tree.jl", "DecisionTreeExt/forest.jl"]),
-    ("XGBoostExt", ["XGBoostExt.jl"]),
+    ("XGBoostExt", ["XGBoostExt/xgboost_classifier.jl"]),
 ]
 
 @testset "SoleModels.jl" begin

From 52deed0eb603cfb8da24718c54734773f7dd10a7 Mon Sep 17 00:00:00 2001
From: PasoStudio73 <paso.studio73@gmail.com>
Date: Tue, 15 Apr 2025 00:37:37 +0200
Subject: [PATCH 15/44] codecov working

---
 Project.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Project.toml b/Project.toml
index 6a4f80a..caab585 100644
--- a/Project.toml
+++ b/Project.toml
@@ -33,6 +33,7 @@ ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea"
 [weakdeps]
 DecisionTree = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb"
 XGBoost = "009559a3-9522-5dbb-924b-0b6ed2b22bb9"
+MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea"
 
 [extensions]
 DecisionTreeExt = "DecisionTree"

From 7c4f0256487bc6b6ad454c6a67df98e143c565e0 Mon Sep 17 00:00:00 2001
From: PasoStudio73 <paso.studio73@gmail.com>
Date: Tue, 15 Apr 2025 01:04:50 +0200
Subject: [PATCH 16/44] removed MLJXGBoostInterface in tests

---
 Project.toml                             | 1 -
 ext/XGBoostExt.jl                        | 4 +---
 test/XGBoostExt/xgboost_classifier.jl    | 2 +-
 test/XGBoostExt/xgboost_predict_issue.jl | 5 ++++-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/Project.toml b/Project.toml
index caab585..6a4f80a 100644
--- a/Project.toml
+++ b/Project.toml
@@ -33,7 +33,6 @@ ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea"
 [weakdeps]
 DecisionTree = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb"
 XGBoost = "009559a3-9522-5dbb-924b-0b6ed2b22bb9"
-MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea"
 
 [extensions]
 DecisionTreeExt = "DecisionTree"
diff --git a/ext/XGBoostExt.jl b/ext/XGBoostExt.jl
index 25df6d3..18ea25b 100644
--- a/ext/XGBoostExt.jl
+++ b/ext/XGBoostExt.jl
@@ -115,9 +115,7 @@ end
 
 function early_return(leaf, antecedent, clabel, classl)
     info =(;
-    # leaf_values = leaf,
-    ### debug convert to Float32 TODO delete
-    leaf_values = Float32(leaf),
+    leaf_values = leaf,
     supporting_predictions = clabel,
     supporting_labels = [classl],
     )
diff --git a/test/XGBoostExt/xgboost_classifier.jl b/test/XGBoostExt/xgboost_classifier.jl
index b2aa9cc..f02de30 100644
--- a/test/XGBoostExt/xgboost_classifier.jl
+++ b/test/XGBoostExt/xgboost_classifier.jl
@@ -4,7 +4,7 @@ using MLJ
 using MLJBase
 using DataFrames
 
-using MLJXGBoostInterface
+# using MLJXGBoostInterface
 using SoleModels
 
 import MLJModelInterface as MMI
diff --git a/test/XGBoostExt/xgboost_predict_issue.jl b/test/XGBoostExt/xgboost_predict_issue.jl
index e6d0a9f..0a9af41 100644
--- a/test/XGBoostExt/xgboost_predict_issue.jl
+++ b/test/XGBoostExt/xgboost_predict_issue.jl
@@ -1,6 +1,6 @@
 using MLJ
 using DataFrames
-using MLJXGBoostInterface
+# using MLJXGBoostInterface
 import MLJModelInterface as MMI
 using SoleModels
 import XGBoost as XGB
@@ -124,12 +124,15 @@ featurenames = mach.report.vals[1].features
 
 solem = solemodel(trees, Matrix(X_train), y_train; classlabels, featurenames, use_float32=false)
 preds = apply(solem, DataFrame(reshape(Float32.(Vector(X_test[28,:])), 1, :), :auto)) # NOT WORKING
+@test preds[1] == "versicolor"
 
 solem = solemodel(trees, Matrix(X_train), y_train; classlabels, featurenames, use_float32=true)
 preds = apply(solem, DataFrame(reshape(Float32.(Vector(X_test[28,:])), 1, :), :auto)) # WORKING
+@test preds[1] == "virginica"
 
 solem = solemodel(trees, Matrix(X_train), y_train; classlabels, featurenames)
 preds = apply(solem, DataFrame(reshape(Float32.(Vector(X_test[28,:])), 1, :), :auto)) # WORKING
+@test preds[1] == "virginica"
 
 predsl = CategoricalArrays.levelcode.(categorical(preds)) .- 1
 

From 659322252fe95d98b17b8dee0411c5e04e0c71a5 Mon Sep 17 00:00:00 2001
From: PasoStudio73 <paso.studio73@gmail.com>
Date: Tue, 15 Apr 2025 01:15:58 +0200
Subject: [PATCH 17/44] still fixing codecov

---
 Project.toml | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/Project.toml b/Project.toml
index 6a4f80a..8122531 100644
--- a/Project.toml
+++ b/Project.toml
@@ -52,10 +52,6 @@ Graphs = "1.8"
 HTTP = "1.9"
 IterTools = "1"
 Lazy = "0.15.1"
-MLJ = "0.19 - 0.20"
-MLJBase = "1.6 - 1.7"
-MLJDecisionTreeInterface = "0.4"
-MLJModelInterface = "1.8"
 PrettyTables = "2.2"
 ProgressMeter = "1"
 Random = "1"

From 2828e50296a04ba3544cbbd9450388e5dc5a99d6 Mon Sep 17 00:00:00 2001
From: PasoStudio73 <paso.studio73@gmail.com>
Date: Tue, 15 Apr 2025 01:16:47 +0200
Subject: [PATCH 18/44] again

---
 Project.toml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 8122531..81745e0 100644
--- a/Project.toml
+++ b/Project.toml
@@ -76,7 +76,6 @@ MLJ = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7"
 MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
 MLJDecisionTreeInterface = "c6f25543-311c-4c74-83dc-3ea6d1015661"
 MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea"
-MLJXGBoostInterface = "54119dfa-1dab-4055-a167-80440f4f7a91"
 Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
 MultiData = "8cc5100c-b3d1-4f82-90cb-0ea93d317aba"
 PlutoUI = "7f904dfe-b85e-4ff6-b463-dae2292396a8"

From 7c3fbde3d2b338278f88ea767f43d7bb6723c5df Mon Sep 17 00:00:00 2001
From: PasoStudio73 <paso.studio73@gmail.com>
Date: Tue, 15 Apr 2025 01:21:59 +0200
Subject: [PATCH 19/44] guess what?

---
 Project.toml                             | 2 +-
 test/XGBoostExt/xgboost_classifier.jl    | 1 -
 test/XGBoostExt/xgboost_predict_issue.jl | 2 +-
 3 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/Project.toml b/Project.toml
index 81745e0..fa6d55d 100644
--- a/Project.toml
+++ b/Project.toml
@@ -84,4 +84,4 @@ SoleData = "123f1ae1-6307-4526-ab5b-aab3a92a2b8c"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Test", "DataFrames", "Random", "MLJ", "MLJXGBoostInterface", "MultiData", "Markdown", "InteractiveUtils", "BenchmarkTools", "MLJBase", "XGBoost", "DecisionTree", "MLJDecisionTreeInterface", "SoleData"]
+test = ["Test", "DataFrames", "Random", "MLJ", "MultiData", "Markdown", "InteractiveUtils", "BenchmarkTools", "MLJBase", "XGBoost", "DecisionTree", "MLJDecisionTreeInterface", "SoleData"]
diff --git a/test/XGBoostExt/xgboost_classifier.jl b/test/XGBoostExt/xgboost_classifier.jl
index f02de30..2c1e76e 100644
--- a/test/XGBoostExt/xgboost_classifier.jl
+++ b/test/XGBoostExt/xgboost_classifier.jl
@@ -4,7 +4,6 @@ using MLJ
 using MLJBase
 using DataFrames
 
-# using MLJXGBoostInterface
 using SoleModels
 
 import MLJModelInterface as MMI
diff --git a/test/XGBoostExt/xgboost_predict_issue.jl b/test/XGBoostExt/xgboost_predict_issue.jl
index 0a9af41..198093f 100644
--- a/test/XGBoostExt/xgboost_predict_issue.jl
+++ b/test/XGBoostExt/xgboost_predict_issue.jl
@@ -1,6 +1,6 @@
 using MLJ
 using DataFrames
-# using MLJXGBoostInterface
+
 import MLJModelInterface as MMI
 using SoleModels
 import XGBoost as XGB

From a9acf2ec447430748d827475eaffa454f8006d99 Mon Sep 17 00:00:00 2001
From: PasoStudio73 <paso.studio73@gmail.com>
Date: Tue, 15 Apr 2025 01:33:52 +0200
Subject: [PATCH 20/44] update ci.yml breaking!

---
 .github/workflows/ci.backup | 21 +++++++++++++++++++++
 .github/workflows/ci.yml    | 27 +++++++++++++++++++++------
 2 files changed, 42 insertions(+), 6 deletions(-)
 create mode 100644 .github/workflows/ci.backup

diff --git a/.github/workflows/ci.backup b/.github/workflows/ci.backup
new file mode 100644
index 0000000..cdf957d
--- /dev/null
+++ b/.github/workflows/ci.backup
@@ -0,0 +1,21 @@
+name: Upload coverage reports to Codecov
+on: [push, pull_request]
+jobs:
+  run:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v2
+      - name: Set up Julia 1.9.0
+        uses: julia-actions/setup-julia@v1
+        with:
+          version: "1.9.0"
+      - uses: julia-actions/julia-buildpkg@v1
+      - uses: julia-actions/julia-runtest@v1
+      - uses: julia-actions/julia-processcoverage@v1
+      - uses: codecov/codecov-action@v5
+        with:
+          token: ${{ secrets.CODECOV_TOKEN }}
+          slug: aclai-lab/SoleModels.jl
+
+
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ba475a2..45b1c86 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -6,15 +6,30 @@ jobs:
     steps:
       - name: Checkout
         uses: actions/checkout@v2
+      
       - name: Set up Julia 1.9.0
         uses: julia-actions/setup-julia@v1
         with:
           version: "1.9.0"
-      - uses: julia-actions/julia-buildpkg@v1
-      - uses: julia-actions/julia-runtest@v1
-      - uses: julia-actions/julia-processcoverage@v1
-      - uses: codecov/codecov-action@v5
+      
+      - name: Build package
+        uses: julia-actions/julia-buildpkg@v1
+      
+      - name: Build test dependencies
+        run: |
+          using Pkg
+          Pkg.develop(PackageSpec(path=pwd()))
+          Pkg.instantiate()
+        shell: julia --project=test {0}
+      
+      - name: Run tests
+        uses: julia-actions/julia-runtest@v1
+      
+      - name: Process coverage
+        uses: julia-actions/julia-processcoverage@v1
+      
+      - name: Upload coverage
+        uses: codecov/codecov-action@v5
         with:
           token: ${{ secrets.CODECOV_TOKEN }}
-          slug: aclai-lab/SoleModels.jl
-
+          slug: aclai-lab/SoleModels.jl
\ No newline at end of file

From 828c4468ae48593ea22419163e94cbcc4780bf0b Mon Sep 17 00:00:00 2001
From: PasoStudio73 <paso.studio73@gmail.com>
Date: Tue, 15 Apr 2025 01:48:30 +0200
Subject: [PATCH 21/44] again

---
 .github/workflows/ci.yml | 26 ++++++--------------------
 1 file changed, 6 insertions(+), 20 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 45b1c86..f836f3d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -6,30 +6,16 @@ jobs:
     steps:
       - name: Checkout
         uses: actions/checkout@v2
-      
       - name: Set up Julia 1.9.0
         uses: julia-actions/setup-julia@v1
         with:
           version: "1.9.0"
-      
-      - name: Build package
-        uses: julia-actions/julia-buildpkg@v1
-      
-      - name: Build test dependencies
-        run: |
-          using Pkg
-          Pkg.develop(PackageSpec(path=pwd()))
-          Pkg.instantiate()
-        shell: julia --project=test {0}
-      
-      - name: Run tests
-        uses: julia-actions/julia-runtest@v1
-      
-      - name: Process coverage
-        uses: julia-actions/julia-processcoverage@v1
-      
-      - name: Upload coverage
-        uses: codecov/codecov-action@v5
+      - uses: julia-actions/julia-buildpkg@v1
+      - uses: julia-actions/julia-runtest@v1
+        with:
+          project: test 
+      - uses: julia-actions/julia-processcoverage@v1
+      - uses: codecov/codecov-action@v5
         with:
           token: ${{ secrets.CODECOV_TOKEN }}
           slug: aclai-lab/SoleModels.jl
\ No newline at end of file

From a7ced5b4ca44cb96f753b37bcfc200c2e645cbcd Mon Sep 17 00:00:00 2001
From: PasoStudio73 <paso.studio73@gmail.com>
Date: Tue, 15 Apr 2025 01:53:25 +0200
Subject: [PATCH 22/44] last try

---
 .github/workflows/ci.yml | 29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index f836f3d..a05082c 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -6,16 +6,33 @@ jobs:
     steps:
       - name: Checkout
         uses: actions/checkout@v2
+      
       - name: Set up Julia 1.9.0
         uses: julia-actions/setup-julia@v1
         with:
           version: "1.9.0"
-      - uses: julia-actions/julia-buildpkg@v1
-      - uses: julia-actions/julia-runtest@v1
-        with:
-          project: test 
-      - uses: julia-actions/julia-processcoverage@v1
-      - uses: codecov/codecov-action@v5
+      
+      - name: Build package
+        uses: julia-actions/julia-buildpkg@v1
+      
+      - name: Setup test environment
+        run: |
+          using Pkg
+          Pkg.develop(PackageSpec(path=pwd()))
+          Pkg.instantiate()
+        shell: julia --project=test {0}
+      
+      - name: Run tests
+        run: |
+          using Pkg
+          Pkg.test("SoleModels", coverage=true)
+        shell: julia --project=test {0}
+      
+      - name: Process coverage
+        uses: julia-actions/julia-processcoverage@v1
+      
+      - name: Upload coverage
+        uses: codecov/codecov-action@v5
         with:
           token: ${{ secrets.CODECOV_TOKEN }}
           slug: aclai-lab/SoleModels.jl
\ No newline at end of file

From fba387a4d7e7db0ddd33d8df582a8b49ed9cd797 Mon Sep 17 00:00:00 2001
From: PasoStudio73 <paso.studio73@gmail.com>
Date: Tue, 15 Apr 2025 12:28:30 +0200
Subject: [PATCH 23/44] reverted ci.yml

---
 .github/workflows/ci.backup | 21 ---------------------
 .github/workflows/ci.yml    | 27 ++++-----------------------
 2 files changed, 4 insertions(+), 44 deletions(-)
 delete mode 100644 .github/workflows/ci.backup

diff --git a/.github/workflows/ci.backup b/.github/workflows/ci.backup
deleted file mode 100644
index cdf957d..0000000
--- a/.github/workflows/ci.backup
+++ /dev/null
@@ -1,21 +0,0 @@
-name: Upload coverage reports to Codecov
-on: [push, pull_request]
-jobs:
-  run:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v2
-      - name: Set up Julia 1.9.0
-        uses: julia-actions/setup-julia@v1
-        with:
-          version: "1.9.0"
-      - uses: julia-actions/julia-buildpkg@v1
-      - uses: julia-actions/julia-runtest@v1
-      - uses: julia-actions/julia-processcoverage@v1
-      - uses: codecov/codecov-action@v5
-        with:
-          token: ${{ secrets.CODECOV_TOKEN }}
-          slug: aclai-lab/SoleModels.jl
-
-
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a05082c..9cdf0cf 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -6,33 +6,14 @@ jobs:
     steps:
       - name: Checkout
         uses: actions/checkout@v2
-      
       - name: Set up Julia 1.9.0
         uses: julia-actions/setup-julia@v1
         with:
           version: "1.9.0"
-      
-      - name: Build package
-        uses: julia-actions/julia-buildpkg@v1
-      
-      - name: Setup test environment
-        run: |
-          using Pkg
-          Pkg.develop(PackageSpec(path=pwd()))
-          Pkg.instantiate()
-        shell: julia --project=test {0}
-      
-      - name: Run tests
-        run: |
-          using Pkg
-          Pkg.test("SoleModels", coverage=true)
-        shell: julia --project=test {0}
-      
-      - name: Process coverage
-        uses: julia-actions/julia-processcoverage@v1
-      
-      - name: Upload coverage
-        uses: codecov/codecov-action@v5
+      - uses: julia-actions/julia-buildpkg@v1
+      - uses: julia-actions/julia-runtest@v1
+      - uses: julia-actions/julia-processcoverage@v1
+      - uses: codecov/codecov-action@v5
         with:
           token: ${{ secrets.CODECOV_TOKEN }}
           slug: aclai-lab/SoleModels.jl
\ No newline at end of file

From 1e0971f640f533e76b3c7b6a7eb516b7ec36f726 Mon Sep 17 00:00:00 2001
From: PasoStudio73 <paso.studio73@gmail.com>
Date: Tue, 15 Apr 2025 13:28:47 +0200
Subject: [PATCH 24/44] atom getter

---
 ext/XGBoostExt.jl | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/ext/XGBoostExt.jl b/ext/XGBoostExt.jl
index 18ea25b..e25661e 100644
--- a/ext/XGBoostExt.jl
+++ b/ext/XGBoostExt.jl
@@ -102,10 +102,14 @@ function get_condition(class_idx, featurenames; test_operator, featval)
     return ScalarCondition(feature, test_operator, featval)
 end
 
+get_operator(atom::Atom{<:ScalarCondition}) = atom.value.metacond.test_operator
+get_i_variable(atom::Atom{<:ScalarCondition}) = atom.value.metacond.feature.i_variable
+get_threshold(atom::Atom{<:ScalarCondition}) = atom.value.threshold
+
 function satisfies_conditions(row, formula)
-    all(atom -> atom.value.metacond.test_operator(
-                    row[atom.value.metacond.feature.i_variable],
-                    atom.value.threshold), formula
+    all(atom -> get_operator(atom)(
+                    row[get_i_variable(atom)],
+                    get_threshold(atom)), formula
                 )
 end
 

From 53161b1624796bdc649ce748276db08227350f74 Mon Sep 17 00:00:00 2001
From: PasoStudio73 <paso.studio73@gmail.com>
Date: Tue, 15 Apr 2025 16:43:49 +0200
Subject: [PATCH 25/44] adaboost test

---
 ext/XGBoostExt.jl                     |   3 +
 test/DecisionTreeExt/adaboost.jl      | 144 ++++++++++++++++++++++++++
 test/XGBoostExt/xgboost_classifier.jl |  46 ++++----
 test/runtests.jl                      |   2 +-
 4 files changed, 173 insertions(+), 22 deletions(-)
 create mode 100644 test/DecisionTreeExt/adaboost.jl

diff --git a/ext/XGBoostExt.jl b/ext/XGBoostExt.jl
index e25661e..ff19594 100644
--- a/ext/XGBoostExt.jl
+++ b/ext/XGBoostExt.jl
@@ -7,6 +7,9 @@ using CategoricalArrays
 
 import SoleModels: alphabet, solemodel
 
+# ---------------------------------------------------------------------------- #
+#                          DecisionXGBoost alphabet                            #
+# ---------------------------------------------------------------------------- #
 function alphabet(model::XGBoost.Booster; kwargs...)
     # error("TODO fix and test.")
     function _alphabet!(a::Vector, model::XGBoost.Booster; kwargs...)
diff --git a/test/DecisionTreeExt/adaboost.jl b/test/DecisionTreeExt/adaboost.jl
new file mode 100644
index 0000000..da3fff7
--- /dev/null
+++ b/test/DecisionTreeExt/adaboost.jl
@@ -0,0 +1,144 @@
+using Test
+
+using MLJ
+using MLJBase
+using DataFrames
+
+using MLJDecisionTreeInterface
+using SoleModels
+using Random
+
+import DecisionTree as DT
+
+X, y = @load_iris
+X = DataFrame(X)
+
+train_ratio = 0.7
+rng = Xoshiro(11)
+
+train, test = partition(eachindex(y), train_ratio; shuffle=true, rng)
+X_train, y_train = X[train, :], y[train]
+X_test, y_test = X[test, :], y[test]
+
+println("Training set size: ", size(X_train), " - ", size(y_train))
+println("Test set size: ", size(X_test), " - ", size(y_test))
+println("Training set type: ", typeof(X_train), " - ", typeof(y_train))
+println("Test set type: ", typeof(X_test), " - ", typeof(y_test))
+
+# ---------------------------------------------------------------------------- #
+#                              AdaBoost solemodel                              #
+# ---------------------------------------------------------------------------- #
+Stump = MLJ.@load AdaBoostStumpClassifier pkg=DecisionTree
+
+model = Stump(;
+    n_iter=10, 
+    feature_importance=:impurity, 
+    rng
+)
+
+# Bind the model and data into a machine
+mach = machine(model, X_train, y_train)
+# Fit the model
+fit!(mach)
+
+weights = mach.fitresult[2]
+classlabels = sort(mach.fitresult[3])
+featurenames = MLJ.report(mach).features
+
+solem = solemodel(MLJ.fitted_params(mach).stumps; weights, classlabels, featurenames)
+solem = solemodel(MLJ.fitted_params(mach).stumps; weights, classlabels, featurenames, keep_condensed = false)
+
+@test SoleData.scalarlogiset(X_test; allow_propositional = true) isa PropositionalLogiset
+
+# Make test instances flow into the model
+preds = apply(solem, X_test)
+preds2 = apply!(solem, X_test, y_test)
+
+@test preds == preds2
+
+# apply!(solem, X_test, y_test, mode = :append)
+
+printmodel(solem; max_depth = 7, show_intermediate_finals = true, show_metrics = true)
+
+# @test_broken printmodel.(listrules(solem, min_lift = 1.0, min_ninstances = 0); show_metrics = true);
+
+# ---------------------------------------------------------------------------- #
+#                            AdaBoost decisiontree                             #
+# ---------------------------------------------------------------------------- #
+# train adaptive-boosted stumps, using 10 iterations
+dt_model, dt_coeffs = DT.build_adaboost_stumps(y_train, Matrix(X_train), 10)
+# apply learned model
+dt_preds = apply_adaboost_stumps(dt_model, dt_coeffs, Matrix(X_test))
+# get the probability of each label
+dt_proba = apply_adaboost_stumps_proba(dt_model, dt_coeffs, Matrix(X_test), classlabels)
+
+@test preds == dt_preds
+
+# ---------------------------------------------------------------------------- #
+#                                    Accuracy                                  #
+# ---------------------------------------------------------------------------- #
+ada_accuracy = sum(preds .== y_test)/length(y_test)
+# @test accuracy >= 0.8
+
+# decision tree
+Tree = MLJ.@load DecisionTreeClassifier pkg=DecisionTree
+dt_model = Tree(max_depth=-1, min_samples_leaf=1, min_samples_split=2)
+dt_mach = machine(dt_model, X_train, y_train)
+fit!(dt_mach)
+dt_solem = solemodel(fitted_params(dt_mach).tree)
+dt_preds = apply(dt_solem, X_test)
+dt_accuracy = sum(dt_preds .== y_test)/length(y_test)
+
+# random forest
+Forest = MLJ.@load RandomForestClassifier pkg=DecisionTree
+rm_model = Forest(max_depth=3, min_samples_leaf=1, min_samples_split=2, n_trees=10)
+rm_mach = machine(rm_model, X_train, y_train)
+fit!(rm_mach)
+classlabels = (rm_mach).fitresult[2]
+classlabels = classlabels[sortperm((rm_mach).fitresult[3])]
+featurenames = report(rm_mach).features
+rm_solem = solemodel(fitted_params(rm_mach).forest; classlabels, featurenames)
+rm_preds = apply(rm_solem, X_test)
+rm_accuracy = sum(rm_preds .== y_test)/length(y_test)
+
+println("AdaBoost     accuracy: ", ada_accuracy)
+println("DecisionTree accuracy: ", dt_accuracy)
+println("RandomForest accuracy: ", rm_accuracy)
+
+@test ada_accuracy ≥ rm_accuracy ≥ dt_accuracy
+
+# ---------------------------------------------------------------------------- #
+#                                Data Validation                               #
+# ---------------------------------------------------------------------------- #
+@testset "data validation" begin
+    Stump = MLJ.@load AdaBoostStumpClassifier pkg=DecisionTree
+
+    for train_ratio in 0.5:0.1:0.9
+        for seed in 1:40
+            train, test = partition(eachindex(y), train_ratio; shuffle=true, rng=Xoshiro(seed))
+            X_train, y_train = X[train, :], y[train]
+            X_test, y_test = X[test, :], y[test]
+
+            for n_iter in 10:10:100
+                # solemodel
+                model = Stump(; n_iter, rng=Xoshiro(seed))
+                mach = machine(model, X_train, y_train)
+                fit!(mach)
+                weights = mach.fitresult[2]
+                classlabels = sort(mach.fitresult[3])
+                featurenames = MLJ.report(mach).features
+                solem = solemodel(MLJ.fitted_params(mach).stumps; weights, classlabels, featurenames)
+                preds = apply(solem, X_test)
+
+                # decisiontree
+                yl_train = CategoricalArrays.levelcode.(y_train)
+                dt_model, dt_coeffs = DT.build_adaboost_stumps(yl_train, Matrix(X_train), n_iter; rng=Xoshiro(seed))
+                dt_preds = apply_adaboost_stumps(dt_model, dt_coeffs, Matrix(X_test))
+
+                code_preds = CategoricalArrays.levelcode.(preds)
+                @test code_preds == dt_preds
+            end
+        end
+    end
+end
+
diff --git a/test/XGBoostExt/xgboost_classifier.jl b/test/XGBoostExt/xgboost_classifier.jl
index 2c1e76e..3c9e7e1 100644
--- a/test/XGBoostExt/xgboost_classifier.jl
+++ b/test/XGBoostExt/xgboost_classifier.jl
@@ -88,27 +88,31 @@ predsl = CategoricalArrays.levelcode.(categorical(preds)) .- 1
 
 @test_nowarn alphabet(fitted_params(mach).fitresult[1])
 
-for seed in 1:40
-    rng = Xoshiro(seed)
-    train, test = partition(eachindex(y), train_ratio; shuffle=true, rng)
-    X_train, y_train = X[train, :], y[train]
-    X_test, y_test = X[test, :], y[test]
-    for num_round in 10:10:100
-        for eta in 0.1:0.1:0.9
-            model = XGTrees(; num_round, eta, objective="multi:softmax")
-            mach = machine(model, X_train, y_train)
-            fit!(mach)
-            trees = XGB.trees(mach.fitresult[1])
-            solem = solemodel(trees, Matrix(X_train), y_train; classlabels, featurenames, use_float32=true)
-            X_test_f32 = mapcols(col -> Float32.(col), X_test)
-            preds = apply(solem, X_test_f32)
-            predsl = CategoricalArrays.levelcode.(categorical(preds))
-
-            yl_train = CategoricalArrays.levelcode.(categorical(y_train)) .- 1
-            bst = XGB.xgboost((X_train, yl_train); num_round, eta, num_class=3, objective="multi:softmax")
-            ŷ = XGB.predict(bst, X_test)
-
-            @test (predsl .-1) == ŷ
+# ---------------------------------------------------------------------------- #
+#                                Data Validation                               #
+# ---------------------------------------------------------------------------- #
+@testset "data validation" begin
+    for seed in 1:40
+        train, test = partition(eachindex(y), train_ratio; shuffle=true, rng=Xoshiro(seed))
+        X_train, y_train = X[train, :], y[train]
+        X_test, y_test = X[test, :], y[test]
+        for num_round in 10:10:100
+            for eta in 0.1:0.1:0.9
+                model = XGTrees(; num_round, eta, objective="multi:softmax")
+                mach = machine(model, X_train, y_train)
+                fit!(mach)
+                trees = XGB.trees(mach.fitresult[1])
+                solem = solemodel(trees, Matrix(X_train), y_train; classlabels, featurenames, use_float32=true)
+                X_test_f32 = mapcols(col -> Float32.(col), X_test)
+                preds = apply(solem, X_test_f32)
+                predsl = CategoricalArrays.levelcode.(categorical(preds))
+
+                yl_train = CategoricalArrays.levelcode.(categorical(y_train)) .- 1
+                bst = XGB.xgboost((X_train, yl_train); num_round, eta, num_class=3, objective="multi:softmax")
+                ŷ = XGB.predict(bst, X_test)
+
+                @test (predsl .-1) == ŷ
+            end
         end
     end
 end
diff --git a/test/runtests.jl b/test/runtests.jl
index b6d170a..4e779db 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -21,7 +21,7 @@ test_suites = [
     ("Rules", ["juliacon2024.jl", ]),
     ("Linear forms", ["linear-form-utilities.jl", ]),
     ("Pluto Demo", ["$(dirname(dirname(pathof(SoleModels))))/pluto-demo.jl", ]),
-    ("DecisionTreeExt", ["DecisionTreeExt/tree.jl", "DecisionTreeExt/forest.jl"]),
+    ("DecisionTreeExt", ["DecisionTreeExt/tree.jl", "DecisionTreeExt/forest.jl", "DecisionTreeExt/adaboost.jl"]),
     ("XGBoostExt", ["XGBoostExt/xgboost_classifier.jl"]),
 ]
 

From d7a5d15d5e7d743ad604b5b2b360d4577c1b445e Mon Sep 17 00:00:00 2001
From: PasoStudio73 <paso.studio73@gmail.com>
Date: Wed, 16 Apr 2025 16:50:51 +0200
Subject: [PATCH 26/44] xgboost classifier ready, tested and working

---
 ext/XGBoostExt.jl                     |  18 ++--
 test/DecisionTreeExt/adaboost.jl      |  19 ++--
 test/XGBoostExt/xgboost_classifier.jl | 134 +++++++++++++++++---------
 3 files changed, 107 insertions(+), 64 deletions(-)

diff --git a/ext/XGBoostExt.jl b/ext/XGBoostExt.jl
index ff19594..fdf9639 100644
--- a/ext/XGBoostExt.jl
+++ b/ext/XGBoostExt.jl
@@ -122,9 +122,9 @@ end
 
 function early_return(leaf, antecedent, clabel, classl)
     info =(;
-    leaf_values = leaf,
-    supporting_predictions = clabel,
-    supporting_labels = [classl],
+    leaf_values=leaf,
+    supporting_predictions=clabel,
+    supporting_labels=[classl],
     )
 
     return Branch(
@@ -212,10 +212,6 @@ split_condition = use_float32 ? Float32(tree.split_condition) : tree.split_condi
     else
         SoleModels.solemodel(tree.children[1], X, y; path_conditions=left_path, classlabels, class_idx, clabels, featurenames, use_float32)
     end
-    isnothing(lefttree) && 
-    begin 
-        return early_return(tree.children[1].leaf, antecedent, clabels, classlabels[class_idx])
-    end
 
     righttree = if isnothing(tree.children[2].split)
         # @show SoleModels.join_antecedents(right_path)
@@ -223,10 +219,6 @@ split_condition = use_float32 ? Float32(tree.split_condition) : tree.split_condi
     else
         SoleModels.solemodel(tree.children[2], X, y; path_conditions=right_path, classlabels, class_idx, clabels, featurenames, use_float32)
     end
-    isnothing(righttree) && 
-    begin
-        return early_return(tree.children[2].leaf, antecedent, clabels, classlabels[class_idx])
-    end
 
     info = (;
         leaf_values = [lefttree.info[:leaf_values]..., righttree.info[:leaf_values]...],
@@ -244,7 +236,11 @@ function xgbleaf(
     use_float32::Bool,
 )
     bitX = bitmap_check_conditions(X, formula)
+
+    # this could happens when the split condition doesn't match any class
+    !any(bitX) && (bitX = trues(length(y)))
     prediction = SoleModels.bestguess(y[bitX]; suppress_parity_warning=true)
+
     labels = unique(y)
 
     isnothing(prediction) && return nothing
diff --git a/test/DecisionTreeExt/adaboost.jl b/test/DecisionTreeExt/adaboost.jl
index da3fff7..1c2b921 100644
--- a/test/DecisionTreeExt/adaboost.jl
+++ b/test/DecisionTreeExt/adaboost.jl
@@ -3,6 +3,7 @@ using Test
 using MLJ
 using MLJBase
 using DataFrames
+using CategoricalArrays
 
 using MLJDecisionTreeInterface
 using SoleModels
@@ -14,7 +15,7 @@ X, y = @load_iris
 X = DataFrame(X)
 
 train_ratio = 0.7
-rng = Xoshiro(11)
+rng = Xoshiro(1)
 
 train, test = partition(eachindex(y), train_ratio; shuffle=true, rng)
 X_train, y_train = X[train, :], y[train]
@@ -39,7 +40,7 @@ model = Stump(;
 # Bind the model and data into a machine
 mach = machine(model, X_train, y_train)
 # Fit the model
-fit!(mach)
+fit!(mach, verbosity=0)
 
 weights = mach.fitresult[2]
 classlabels = sort(mach.fitresult[3])
@@ -68,9 +69,9 @@ printmodel(solem; max_depth = 7, show_intermediate_finals = true, show_metrics =
 # train adaptive-boosted stumps, using 10 iterations
 dt_model, dt_coeffs = DT.build_adaboost_stumps(y_train, Matrix(X_train), 10)
 # apply learned model
-dt_preds = apply_adaboost_stumps(dt_model, dt_coeffs, Matrix(X_test))
+dt_preds = DT.apply_adaboost_stumps(dt_model, dt_coeffs, Matrix(X_test))
 # get the probability of each label
-dt_proba = apply_adaboost_stumps_proba(dt_model, dt_coeffs, Matrix(X_test), classlabels)
+dt_proba = DT.apply_adaboost_stumps_proba(dt_model, dt_coeffs, Matrix(X_test), classlabels)
 
 @test preds == dt_preds
 
@@ -84,16 +85,16 @@ ada_accuracy = sum(preds .== y_test)/length(y_test)
 Tree = MLJ.@load DecisionTreeClassifier pkg=DecisionTree
 dt_model = Tree(max_depth=-1, min_samples_leaf=1, min_samples_split=2)
 dt_mach = machine(dt_model, X_train, y_train)
-fit!(dt_mach)
+fit!(dt_mach, verbosity=0)
 dt_solem = solemodel(fitted_params(dt_mach).tree)
 dt_preds = apply(dt_solem, X_test)
 dt_accuracy = sum(dt_preds .== y_test)/length(y_test)
 
 # random forest
 Forest = MLJ.@load RandomForestClassifier pkg=DecisionTree
-rm_model = Forest(max_depth=3, min_samples_leaf=1, min_samples_split=2, n_trees=10)
+rm_model = Forest(; max_depth=3, min_samples_leaf=1, min_samples_split=2, n_trees=10, rng)
 rm_mach = machine(rm_model, X_train, y_train)
-fit!(rm_mach)
+fit!(rm_mach, verbosity=0)
 classlabels = (rm_mach).fitresult[2]
 classlabels = classlabels[sortperm((rm_mach).fitresult[3])]
 featurenames = report(rm_mach).features
@@ -123,7 +124,7 @@ println("RandomForest accuracy: ", rm_accuracy)
                 # solemodel
                 model = Stump(; n_iter, rng=Xoshiro(seed))
                 mach = machine(model, X_train, y_train)
-                fit!(mach)
+                fit!(mach, verbosity=0)
                 weights = mach.fitresult[2]
                 classlabels = sort(mach.fitresult[3])
                 featurenames = MLJ.report(mach).features
@@ -133,7 +134,7 @@ println("RandomForest accuracy: ", rm_accuracy)
                 # decisiontree
                 yl_train = CategoricalArrays.levelcode.(y_train)
                 dt_model, dt_coeffs = DT.build_adaboost_stumps(yl_train, Matrix(X_train), n_iter; rng=Xoshiro(seed))
-                dt_preds = apply_adaboost_stumps(dt_model, dt_coeffs, Matrix(X_test))
+                dt_preds = DT.apply_adaboost_stumps(dt_model, dt_coeffs, Matrix(X_test))
 
                 code_preds = CategoricalArrays.levelcode.(preds)
                 @test code_preds == dt_preds
diff --git a/test/XGBoostExt/xgboost_classifier.jl b/test/XGBoostExt/xgboost_classifier.jl
index 3c9e7e1..d25b661 100644
--- a/test/XGBoostExt/xgboost_classifier.jl
+++ b/test/XGBoostExt/xgboost_classifier.jl
@@ -14,7 +14,7 @@ using Random, CategoricalArrays
 X, y = @load_iris
 X = DataFrame(X)
 
-train_ratio = 0.8
+train_ratio = 0.7
 rng = Xoshiro(11)
 
 train, test = partition(eachindex(y), train_ratio; shuffle=true, rng)
@@ -26,92 +26,138 @@ println("Test set size: ", size(X_test), " - ", size(y_test))
 println("Training set type: ", typeof(X_train), " - ", typeof(y_train))
 println("Test set type: ", typeof(X_test), " - ", typeof(y_test))
 
+# ---------------------------------------------------------------------------- #
+#                              XGBoost solemodel                               #
+# ---------------------------------------------------------------------------- #
 XGTrees = MLJ.@load XGBoostClassifier pkg=XGBoost
 
 model = XGTrees(;
-    num_round=1,
-    max_depth=6,
+    num_round=10,
+    tree_method="exact",
     objective="multi:softmax"
 )
 
 # Bind the model and data into a machine
 mach = machine(model, X_train, y_train)
 # Fit the model
-fit!(mach)
-
-trees = XGB.trees(mach.fitresult[1])
+fit!(mach; verbosity=0)
 
 get_encoding(classes_seen) = Dict(MMI.int(c) => c for c in MMI.classes(classes_seen))
 get_classlabels(encoding)  = [string(encoding[i]) for i in sort(keys(encoding) |> collect)]
+trees = XGB.trees(mach.fitresult[1])
 encoding     = get_encoding(mach.fitresult[2])
 classlabels  = get_classlabels(encoding)
 featurenames = mach.report.vals[1].features
-# ds_safetest = vcat(y_train, "nothing")
 
-# solem = solemodel(trees, Matrix(X_train), y_train)
 solem = solemodel(trees, Matrix(X_train), y_train; classlabels, featurenames)
 solem = solemodel(trees, Matrix(X_train), y_train; classlabels, featurenames, keep_condensed = false)
 
 @test SoleData.scalarlogiset(X_test; allow_propositional = true) isa PropositionalLogiset
 
 # Make test instances flow into the model
-preds = apply(solem, X_test)
+X_test_f32 = mapcols(col -> Float32.(col), X_test)
+preds = apply(solem, X_test_f32)
+predsl = CategoricalArrays.levelcode.(categorical(preds)) .- 1
+
+# TODO fix in rule-and-branch.jl
 # preds2 = apply!(solem, X_test, y_test)
 
 # @test preds == preds2
-accuracy = sum(preds .== y_test)/length(y_test)
-@test accuracy > 0.9
-
-# apply!(solem, X_test, y_test, mode = :append)
-
-solem = @test_throws ErrorException solemodel(trees, Matrix(X_train), y_train; classlabels, keep_condensed = true)
-solem = @test_nowarn solemodel(trees, Matrix(X_train), y_train; classlabels, keep_condensed = false)
-
-printmodel(solem; max_depth = 7, show_intermediate_finals = true, show_metrics = true)
-
-# comparision with XGBoost.jl
 
+# ---------------------------------------------------------------------------- #
+#                                 julia XGBoost                                #
+# ---------------------------------------------------------------------------- #
 yl_train = CategoricalArrays.levelcode.(categorical(y_train)) .- 1
 # create and train a gradient boosted tree model of 5 trees
 bst = XGB.xgboost(
     (X_train, yl_train),
     num_round=10,
     num_class=3,
-    max_depth=6,
+    tree_method="exact",
     objective="multi:softmax"
 )
 # obtain model predictions
-ŷ = XGB.predict(bst, X_test)
+xg_preds = XGB.predict(bst, X_test)
 
-predsl = CategoricalArrays.levelcode.(categorical(preds)) .- 1
-@test predsl == ŷ
+@test predsl == xg_preds
 
+# ---------------------------------------------------------------------------- #
+#                                    Accuracy                                  #
+# ---------------------------------------------------------------------------- #
+xg_accuracy = sum(preds .== y_test)/length(y_test)
+# @test accuracy >= 0.8
+
+# decision tree
+Tree = MLJ.@load DecisionTreeClassifier pkg=DecisionTree
+dt_model = Tree(max_depth=-1, min_samples_leaf=1, min_samples_split=2)
+dt_mach = machine(dt_model, X_train, y_train)
+fit!(dt_mach, verbosity=0)
+dt_solem = solemodel(fitted_params(dt_mach).tree)
+dt_preds = apply(dt_solem, X_test)
+dt_accuracy = sum(dt_preds .== y_test)/length(y_test)
+
+# random forest
+Forest = MLJ.@load RandomForestClassifier pkg=DecisionTree
+rm_model = Forest(;max_depth=3, min_samples_leaf=1, min_samples_split=2, n_trees=10, rng)
+rm_mach = machine(rm_model, X_train, y_train)
+fit!(rm_mach, verbosity=0)
+classlabels = (rm_mach).fitresult[2]
+classlabels = classlabels[sortperm((rm_mach).fitresult[3])]
+featurenames = report(rm_mach).features
+rm_solem = solemodel(fitted_params(rm_mach).forest; classlabels, featurenames)
+rm_preds = apply(rm_solem, X_test)
+rm_accuracy = sum(rm_preds .== y_test)/length(y_test)
+
+println("XGBoost      accuracy: ", xg_accuracy)
+println("DecisionTree accuracy: ", dt_accuracy)
+println("RandomForest accuracy: ", rm_accuracy)
+
+@test xg_accuracy ≥ rm_accuracy ≥ dt_accuracy
+
+# ---------------------------------------------------------------------------- #
+#                               XGBoost Alphabet                               #
+# ---------------------------------------------------------------------------- #
 @test_nowarn alphabet(fitted_params(mach).fitresult[1])
 
 # ---------------------------------------------------------------------------- #
 #                                Data Validation                               #
 # ---------------------------------------------------------------------------- #
 @testset "data validation" begin
-    for seed in 1:40
-        train, test = partition(eachindex(y), train_ratio; shuffle=true, rng=Xoshiro(seed))
-        X_train, y_train = X[train, :], y[train]
-        X_test, y_test = X[test, :], y[test]
-        for num_round in 10:10:100
-            for eta in 0.1:0.1:0.9
-                model = XGTrees(; num_round, eta, objective="multi:softmax")
-                mach = machine(model, X_train, y_train)
-                fit!(mach)
-                trees = XGB.trees(mach.fitresult[1])
-                solem = solemodel(trees, Matrix(X_train), y_train; classlabels, featurenames, use_float32=true)
-                X_test_f32 = mapcols(col -> Float32.(col), X_test)
-                preds = apply(solem, X_test_f32)
-                predsl = CategoricalArrays.levelcode.(categorical(preds))
-
-                yl_train = CategoricalArrays.levelcode.(categorical(y_train)) .- 1
-                bst = XGB.xgboost((X_train, yl_train); num_round, eta, num_class=3, objective="multi:softmax")
-                ŷ = XGB.predict(bst, X_test)
-
-                @test (predsl .-1) == ŷ
+    XGTrees = MLJ.@load XGBoostClassifier pkg=XGBoost
+
+    for train_ratio in 0.5:0.1:0.9
+        for seed in 1:40
+            train, test = partition(eachindex(y), train_ratio; shuffle=true, rng=Xoshiro(seed))
+            X_train, y_train = X[train, :], y[train]
+            X_test, y_test = X[test, :], y[test]
+
+            for num_round in 10:10:50
+                for eta in 0.1:0.1:0.6
+                    model = XGTrees(; num_round, eta, objective="multi:softmax")
+                    mach = machine(model, X_train, y_train)
+                    fit!(mach, verbosity=0)
+                    trees = XGB.trees(mach.fitresult[1])
+                    encoding     = get_encoding(mach.fitresult[2])
+                    classlabels  = get_classlabels(encoding)
+                    featurenames = mach.report.vals[1].features
+                    solem = solemodel(trees, Matrix(X_train), y_train; classlabels, featurenames)
+                    X_test_f32 = mapcols(col -> Float32.(col), X_test)
+                    preds = apply(solem, X_test_f32)
+                    predsl = CategoricalArrays.levelcode.(categorical(preds)) .- 1
+
+                    yl_train = CategoricalArrays.levelcode.(categorical(y_train)) .- 1
+                    bst = XGB.xgboost((X_train, yl_train); num_round, eta, num_class=3, objective="multi:softmax")
+                    xg_preds = XGB.predict(bst, X_test)
+
+                    if predsl != xg_preds
+                        println("train_ratio: ", train_ratio)
+                        println("seed: ", seed)
+                        println("num_round: ", num_round)
+                        println("eta: ", eta)
+                        gino
+                    end
+                    @test predsl == xg_preds
+                end
             end
         end
     end

From c948f27aa57c7e6f954b33427189a6924417d735 Mon Sep 17 00:00:00 2001
From: PasoStudio73 <paso.studio73@gmail.com>
Date: Wed, 16 Apr 2025 23:09:22 +0200
Subject: [PATCH 27/44] xgboost apply!

---
 .github/workflows/ci.yml              |  3 +-
 Project.toml                          |  2 +-
 src/SoleModels.jl                     |  1 -
 src/utils/models/ensembles.jl         | 44 ++++++++++++---------------
 test/XGBoostExt/xgboost_classifier.jl |  7 ++---
 5 files changed, 25 insertions(+), 32 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 9cdf0cf..ba475a2 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -16,4 +16,5 @@ jobs:
       - uses: codecov/codecov-action@v5
         with:
           token: ${{ secrets.CODECOV_TOKEN }}
-          slug: aclai-lab/SoleModels.jl
\ No newline at end of file
+          slug: aclai-lab/SoleModels.jl
+
diff --git a/Project.toml b/Project.toml
index fa6d55d..d5c217b 100644
--- a/Project.toml
+++ b/Project.toml
@@ -84,4 +84,4 @@ SoleData = "123f1ae1-6307-4526-ab5b-aab3a92a2b8c"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Test", "DataFrames", "Random", "MLJ", "MultiData", "Markdown", "InteractiveUtils", "BenchmarkTools", "MLJBase", "XGBoost", "DecisionTree", "MLJDecisionTreeInterface", "SoleData"]
+test = ["Test", "DataFrames", "Random", "MLJ", "MultiData", "Markdown", "InteractiveUtils", "BenchmarkTools", "MLJBase", "XGBoost", "DecisionTree", "MLJDecisionTreeInterface", "SoleData", "CategoricalArrays"]
diff --git a/src/SoleModels.jl b/src/SoleModels.jl
index a3ae567..74a9782 100644
--- a/src/SoleModels.jl
+++ b/src/SoleModels.jl
@@ -61,7 +61,6 @@ export height
 export DecisionEnsemble, models
 export DecisionForest, trees
 export DecisionSet, rules, nrules
-
 export DecisionXGBoost
 
 export MixedModel
diff --git a/src/utils/models/ensembles.jl b/src/utils/models/ensembles.jl
index 6738264..261823c 100644
--- a/src/utils/models/ensembles.jl
+++ b/src/utils/models/ensembles.jl
@@ -507,30 +507,24 @@ function apply(
 end
 
 # TODO parallelize
-# function apply!(
-#     m::DecisionXGBoost,
-#     d::AbstractInterpretationSet,
-#     y::AbstractVector;
-#     mode = :replace,
-#     leavesonly = false,
-#     # show_progress = false, # length(ntrees(m)) > 15,
-#     suppress_parity_warning = false,
-#     kwargs...
-# )
-#     # @show y
-#     y = __apply_pre(m, d, y)
-#     # _d = SupportedLogiset(d) TODO?
-#     # @show y
-#     preds = hcat([apply!(subm, d, y; mode, leavesonly, kwargs...) for subm in models(m)]...)
-
-#     preds = __apply_post(m, preds)
-
-#     preds = [
-#         weighted_aggregation(m)(preds[i,:]; suppress_parity_warning, kwargs...)
-#         for i in 1:size(preds,1)
-#     ]
+function apply!(
+    m::DecisionXGBoost,
+    d::AbstractInterpretationSet,
+    y::AbstractVector;
+    mode::Symbol=:replace,
+    leavesonly::Bool=false,
+    suppress_parity_warning::Bool=true,
+    kwargs...
+)
+    y = __apply_pre(m, d, y)
 
-#     preds = __apply_pre(m, d, preds)
-#     return __apply!(m, mode, preds, y, leavesonly)
-# end
+    preds = hcat([apply_leaf_scores(subm, d; suppress_parity_warning, kwargs...) for subm in models(m)]...)
+    preds = __apply_post(m, preds)
+    preds = [
+        scored_aggregation(m)(pred, sort(unique(m.info.supporting_labels)))
+        for pred in eachrow(preds)
+    ]
+    preds = __apply_pre(m, d, preds)
 
+    return __apply!(m, mode, preds, y, leavesonly)
+end
diff --git a/test/XGBoostExt/xgboost_classifier.jl b/test/XGBoostExt/xgboost_classifier.jl
index d25b661..7df4871 100644
--- a/test/XGBoostExt/xgboost_classifier.jl
+++ b/test/XGBoostExt/xgboost_classifier.jl
@@ -59,10 +59,9 @@ X_test_f32 = mapcols(col -> Float32.(col), X_test)
 preds = apply(solem, X_test_f32)
 predsl = CategoricalArrays.levelcode.(categorical(preds)) .- 1
 
-# TODO fix in rule-and-branch.jl
-# preds2 = apply!(solem, X_test, y_test)
-
-# @test preds == preds2
+apply!(solem, X_test, y_test)
+@test solem.info.supporting_predictions == preds
+@test solem.info.supporting_labels == y_test
 
 # ---------------------------------------------------------------------------- #
 #                                 julia XGBoost                                #

From ef113800b632c06315c24e5624f2efb640bf77e0 Mon Sep 17 00:00:00 2001
From: Perro2110 <perrottamarco2011@gmail.com>
Date: Fri, 18 Apr 2025 13:21:33 +0200
Subject: [PATCH 28/44] extractrules --> modalextractrules

---
 src/SoleModels.jl         |  5 ++++-
 src/deprecate.jl          |  2 +-
 src/rule-extraction.jl    | 16 ++++++++--------
 src/utils/models/other.jl |  2 +-
 4 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/src/SoleModels.jl b/src/SoleModels.jl
index 0fbd307..4b0164e 100644
--- a/src/SoleModels.jl
+++ b/src/SoleModels.jl
@@ -93,7 +93,10 @@ export subtreeheight
 include("symbolic-utils.jl")
 
 export PlainRuleExtractor
-export modalextractrules, listrules, joinrules
+
+
+
+export extractrules, listrules, joinrules
 
 include("rule-extraction.jl")
 
diff --git a/src/deprecate.jl b/src/deprecate.jl
index a1f16d8..3f944f1 100644
--- a/src/deprecate.jl
+++ b/src/deprecate.jl
@@ -3,7 +3,7 @@ const MixedSymbolicModel = MixedModel
 const List = DecisionList
 const Tree = DecisionTree
 const Forest = DecisionForest
-
+const modalextractrules = extractrules; export modalextractrules
 
 
 @inline function apply(
diff --git a/src/rule-extraction.jl b/src/rule-extraction.jl
index d94fb05..50a86c0 100644
--- a/src/rule-extraction.jl
+++ b/src/rule-extraction.jl
@@ -6,7 +6,7 @@ An exact or heuristic logical method for extracting logical rule from symbolic m
 
 Refer to [SolePostHoc](https://github.com/aclai-lab/SolePostHoc.jl) for rule extraction methods.
 
-See also [`modalextractrules`](@ref), [`Rule`](@ref)], [`issymbolicmodel`](@ref).
+See also [`extractrules`](@ref), [`Rule`](@ref)], [`issymbolicmodel`](@ref).
 """
 abstract type RuleExtractor end
 
@@ -16,33 +16,33 @@ Return whether a rule extraction method is known to be exact (as opposed to heur
 isexact(::RuleExtractor) = false
 
 """
-    modalextractrules(re::RuleExtractor, m, args...; kwargs...)
+    extractrules(re::RuleExtractor, m, args...; kwargs...)
 
 Extract rules from symbolic model `m`, using a rule extraction method `re`.
 """
-function modalextractrules(re::RuleExtractor, m, args...; kwargs...)
-    return error("Please, provide method modalextractrules(::$(typeof(m)), args...; kwargs...).")
+function extractrules(re::RuleExtractor, m, args...; kwargs...)
+    return error("Please, provide method extractrules(::$(typeof(m)), args...; kwargs...).")
 end
 
 # Helpers
 function (RE::Type{<:RuleExtractor})(args...; kwargs...)
-    return modalextractrules(RE(), args...; kwargs...)
+    return extractrules(RE(), args...; kwargs...)
 end
 
 # Helpers
 function (re::RuleExtractor)(args...; kwargs...)
-    return modalextractrules(re, args...; kwargs...)
+    return extractrules(re, args...; kwargs...)
 end
 
 """
 Plain extraction method involves listing one rule per each possible symbolic path within the model.
-With this method, [`modalextractrules`](@ref) redirects to [`listrules`](@ref).
+With this method, [`extractrules`](@ref) redirects to [`listrules`](@ref).
 
 See also [`listrules`](@ref), [`Rule`](@ref)], [`issymbolicmodel`](@ref).
 """
 struct PlainRuleExtractor <: RuleExtractor end
 isexact(::PlainRuleExtractor) = true
-function modalextractrules(::PlainRuleExtractor, m, args...; kwargs...)
+function extractrules(::PlainRuleExtractor, m, args...; kwargs...)
     if haslistrules(m)
         listrules(m, args...; kwargs...)
     else
diff --git a/src/utils/models/other.jl b/src/utils/models/other.jl
index bb9280b..60f79f6 100644
--- a/src/utils/models/other.jl
+++ b/src/utils/models/other.jl
@@ -361,7 +361,7 @@ iscomplete(m::DecisionSet) = m.iscomplete
 isnonoverlapping(m::DecisionSet) = m.isnonoverlapping
 
 function listrules(m::DecisionSet)
-    isnonoverlapping || error("Cannot listrules from non-overlapping decision set. Try `modalextractrules` with heuristics, instead.")
+    isnonoverlapping || error("Cannot listrules from non-overlapping decision set. Try `extractrules` with heuristics, instead.")
     rules(m)
 end
 

From d26e1980d97b8e6c13cd11e3e7574f3bc4bad6d4 Mon Sep 17 00:00:00 2001
From: Perro2110 <perrottamarco2011@gmail.com>
Date: Fri, 18 Apr 2025 18:26:40 +0200
Subject: [PATCH 29/44] minor add in TODO

---
 TODO.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/TODO.md b/TODO.md
index e6e2383..04cfba6 100644
--- a/TODO.md
+++ b/TODO.md
@@ -51,6 +51,6 @@ Test:
     # Distribution of covered examples for consequent
     # Distribution of examples on which the rule was built
     ✔ Testing parser error @done(24-05-31 11:12)
-
+    ☐ Add test for rule-extraction.jl 
 Questions:
-    ✔ Readmetrics for CN2 statistics @done(24-05-31 11:12)
\ No newline at end of file
+    ✔ Readmetrics for CN2 statistics @done(24-05-31 11:12)

From 9638489e267cec7df5513ff07dcb40196fe55a24 Mon Sep 17 00:00:00 2001
From: PasoStudio73 <paso.studio73@gmail.com>
Date: Sat, 19 Apr 2025 00:28:33 +0200
Subject: [PATCH 30/44] fix tests & deps

---
 Project.toml                          |  2 +-
 test/DecisionTreeExt/forest.jl        | 32 +++++++++++++++++
 test/DecisionTreeExt/tree.jl          | 49 +++++++++++++++++++++++++++
 test/XGBoostExt/xgboost_classifier.jl | 10 ++----
 4 files changed, 84 insertions(+), 9 deletions(-)

diff --git a/Project.toml b/Project.toml
index d5c217b..a9779fa 100644
--- a/Project.toml
+++ b/Project.toml
@@ -84,4 +84,4 @@ SoleData = "123f1ae1-6307-4526-ab5b-aab3a92a2b8c"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Test", "DataFrames", "Random", "MLJ", "MultiData", "Markdown", "InteractiveUtils", "BenchmarkTools", "MLJBase", "XGBoost", "DecisionTree", "MLJDecisionTreeInterface", "SoleData", "CategoricalArrays"]
+test = ["Test", "DataFrames", "Random", "MLJ", "MultiData", "Markdown", "InteractiveUtils", "BenchmarkTools", "MLJBase", "XGBoost", "DecisionTree", "MLJModelInterface", "MLJDecisionTreeInterface", "SoleData"]
diff --git a/test/DecisionTreeExt/forest.jl b/test/DecisionTreeExt/forest.jl
index f82afd2..fa7dccc 100644
--- a/test/DecisionTreeExt/forest.jl
+++ b/test/DecisionTreeExt/forest.jl
@@ -62,3 +62,35 @@ accuracy = sum(preds .== y_test)/length(y_test)
 printmodel(solem; max_depth = 7, show_intermediate_finals = true, show_metrics = true)
 
 # @test_broken printmodel.(listrules(solem, min_lift = 1.0, min_ninstances = 0); show_metrics = true);
+
+# ---------------------------------------------------------------------------- #
+#                                Data Validation                               #
+# ---------------------------------------------------------------------------- #
+@testset "data validation" begin
+  Forest = MLJ.@load RandomForestClassifier pkg=DecisionTree
+
+  for train_ratio in 0.5:0.1:0.9
+      for seed in 1:40
+          train, test = partition(eachindex(y), train_ratio; shuffle=true, rng=Xoshiro(seed))
+          X_train, y_train = X[train, :], y[train]
+          X_test, y_test = X[test, :], y[test]
+
+          for n_trees in 10:10:60
+              # solemodel
+              model = Forest(; n_trees, rng=Xoshiro(seed))
+              mach = machine(model, X_train, y_train)
+              fit!(mach, verbosity=0)
+              classlabels = (mach).fitresult[2][sortperm((mach).fitresult[3])]
+              featurenames = MLJ.report(mach).features
+              solem = solemodel(MLJ.fitted_params(mach).forest; classlabels, featurenames)
+              preds = apply!(solem, X_test, y_test)
+
+              # decisiontree
+              rf_model = DT.build_forest(y_train, Matrix(X_train), -1, n_trees; rng=Xoshiro(seed))
+              rf_preds = DT.apply_forest(rf_model, Matrix(X_test))
+
+              @test preds == rf_preds
+          end
+      end
+  end
+end
diff --git a/test/DecisionTreeExt/tree.jl b/test/DecisionTreeExt/tree.jl
index a0c349a..c7c1311 100644
--- a/test/DecisionTreeExt/tree.jl
+++ b/test/DecisionTreeExt/tree.jl
@@ -7,6 +7,7 @@ using DataFrames
 using MLJDecisionTreeInterface
 using SoleModels
 using Random
+using CategoricalArrays
 
 import DecisionTree as DT
 
@@ -75,3 +76,51 @@ printmodel.(sort(interesting_rules, by = readmetrics); show_metrics = (; round_d
 
 @test length(joinrules(interesting_rules)) == 3
 @test (natoms.((interesting_rules)) |> sum) == (natoms.(joinrules(interesting_rules)) |> sum)
+
+# ---------------------------------------------------------------------------- #
+#                                Data Validation                               #
+# ---------------------------------------------------------------------------- #
+@testset "data validation" begin
+  Tree = MLJ.@load DecisionTreeClassifier pkg=DecisionTree
+
+  for train_ratio in 0.5:0.1:0.9
+      for seed in 1:40
+          train, test = partition(eachindex(y), train_ratio; shuffle=true, rng=Xoshiro(seed))
+          X_train, y_train = X[train, :], y[train]
+          X_test, y_test = X[test, :], y[test]
+
+          for max_depth in 2:1:6
+              # solemodel
+              model = Tree(; max_depth, rng=Xoshiro(seed))
+              mach = machine(model, X_train, y_train)
+              fit!(mach, verbosity=0)
+              solem = solemodel(MLJ.fitted_params(mach).tree)
+              preds = apply!(solem, X_test, y_test)
+
+              # decisiontree
+              dt_model = DT.build_tree(y_train, Matrix(X_train), 0, max_depth; rng=Xoshiro(seed))
+              dt_preds = DT.apply_tree(dt_model, Matrix(X_test))
+
+              @test preds == dt_preds
+          end
+      end
+  end
+end
+
+### the problem rises in fit! in MLJDecisionTreeInterface
+Tree = MLJ.@load DecisionTreeClassifier pkg=DecisionTree
+seed = 1
+max_depth = 3
+train_ratio = 0.5
+train, test = partition(eachindex(y), train_ratio; shuffle=true, rng=Xoshiro(seed))
+X_train, y_train = X[train, :], y[train]
+X_test, y_test = X[test, :], y[test]
+
+model = Tree(; max_depth, rng=Xoshiro(seed))
+mach = machine(model, X_train, y_train)
+fit!(mach, verbosity=0)
+solem = solemodel(MLJ.fitted_params(mach).tree)
+preds = apply!(solem, X_test, y_test)
+
+dt_model = DT.build_tree(y_train, Matrix(X_train), 0, max_depth; rng=Xoshiro(seed))
+dt_preds = DT.apply_tree(dt_model, Matrix(X_test))
\ No newline at end of file
diff --git a/test/XGBoostExt/xgboost_classifier.jl b/test/XGBoostExt/xgboost_classifier.jl
index 7df4871..a885861 100644
--- a/test/XGBoostExt/xgboost_classifier.jl
+++ b/test/XGBoostExt/xgboost_classifier.jl
@@ -1,6 +1,7 @@
 using Test
 
 using MLJ
+using MLJ.CategoricalArrays: levelcode, categorical
 using MLJBase
 using DataFrames
 
@@ -141,20 +142,13 @@ println("RandomForest accuracy: ", rm_accuracy)
                     featurenames = mach.report.vals[1].features
                     solem = solemodel(trees, Matrix(X_train), y_train; classlabels, featurenames)
                     X_test_f32 = mapcols(col -> Float32.(col), X_test)
-                    preds = apply(solem, X_test_f32)
+                    preds = apply!(solem, X_test_f32, y_test)
                     predsl = CategoricalArrays.levelcode.(categorical(preds)) .- 1
 
                     yl_train = CategoricalArrays.levelcode.(categorical(y_train)) .- 1
                     bst = XGB.xgboost((X_train, yl_train); num_round, eta, num_class=3, objective="multi:softmax")
                     xg_preds = XGB.predict(bst, X_test)
 
-                    if predsl != xg_preds
-                        println("train_ratio: ", train_ratio)
-                        println("seed: ", seed)
-                        println("num_round: ", num_round)
-                        println("eta: ", eta)
-                        gino
-                    end
                     @test predsl == xg_preds
                 end
             end

From b2a48698b59819e8492fdb569dad9e2aa1ebcc07 Mon Sep 17 00:00:00 2001
From: PasoStudio73 <paso.studio73@gmail.com>
Date: Wed, 23 Apr 2025 17:28:00 +0200
Subject: [PATCH 31/44] posthoc

---
 src/evaluate.jl | 29 ++++++++++++++++++++++++++---
 1 file changed, 26 insertions(+), 3 deletions(-)

diff --git a/src/evaluate.jl b/src/evaluate.jl
index a460498..cc459ef 100644
--- a/src/evaluate.jl
+++ b/src/evaluate.jl
@@ -322,8 +322,20 @@ function evaluaterule(
     classmask = (Y .== outcome(consequent(rule)))
     checkmask, explanations = begin
         if compute_explanations
+
+            ### from Perry's SoleModels fix for SolePostHoc
             # Note: This is kind of quick and dirty.
-            disjs = SoleLogics.disjuncts(SoleLogics.LeftmostDisjunctiveForm(antecedent(rule)))
+            # disjs = SoleLogics.disjuncts(SoleLogics.LeftmostDisjunctiveForm(antecedent(rule)))
+            ante = antecedent(rule)
+            if (ante isa SyntaxBranch)
+                # Radice disgiuntiva: trasformiamo in forma “disjunctive” e poi estraiamo i disgiunti
+                dnf = SoleLogics.LeftmostDisjunctiveForm(ante)
+                disjs = SoleLogics.disjuncts(dnf)
+            else
+                # Non è un OR in radice → un singolo disgiunto
+                disjs = [ante] 
+            end
+
             checkmatrix = hcat([check(disj, X; kwargs...) for disj in disjs]...)
             # @show checkmatrix
             checkmask = map(any, eachrow(checkmatrix))
@@ -337,11 +349,22 @@ function evaluaterule(
     end
     pos_checkmask = checkmask[classmask]
     neg_checkmask = checkmask[(!).(classmask)]
+
+    ### from Perry's SoleModels fix for SolePostHoc
+    # Controlli per array vuoti
+    sensitivity = length(pos_checkmask) > 0 ? sum(pos_checkmask)/length(pos_checkmask) : 0.0
+    specificity = length(neg_checkmask) > 0 ? 1-(sum(neg_checkmask)/length(neg_checkmask)) : 1.0
+
     out = (;
         classmask = classmask,
         checkmask = checkmask,
-        sensitivity = sum(pos_checkmask)/length(pos_checkmask),
-        specificity = 1-(sum(neg_checkmask)/length(neg_checkmask)),
+
+        ### from Perry's SoleModels fix for SolePostHoc
+        # sensitivity = sum(pos_checkmask)/length(pos_checkmask),
+        # specificity = 1-(sum(neg_checkmask)/length(neg_checkmask)),
+        sensitivity = sensitivity,
+        specificity = specificity,
+
         explanations = explanations,
     )
     return out

From 1b5a6c633e93156d018dcd30bdc2c2c7ee4775c2 Mon Sep 17 00:00:00 2001
From: PasoStudio73 <paso.studio73@gmail.com>
Date: Sun, 27 Apr 2025 13:07:48 +0200
Subject: [PATCH 32/44] test fixed

---
 test/DecisionTreeExt/tree.jl | 68 ++++++++++++++----------------------
 1 file changed, 26 insertions(+), 42 deletions(-)

diff --git a/test/DecisionTreeExt/tree.jl b/test/DecisionTreeExt/tree.jl
index c7c1311..909eb31 100644
--- a/test/DecisionTreeExt/tree.jl
+++ b/test/DecisionTreeExt/tree.jl
@@ -81,46 +81,30 @@ printmodel.(sort(interesting_rules, by = readmetrics); show_metrics = (; round_d
 #                                Data Validation                               #
 # ---------------------------------------------------------------------------- #
 @testset "data validation" begin
-  Tree = MLJ.@load DecisionTreeClassifier pkg=DecisionTree
-
-  for train_ratio in 0.5:0.1:0.9
-      for seed in 1:40
-          train, test = partition(eachindex(y), train_ratio; shuffle=true, rng=Xoshiro(seed))
-          X_train, y_train = X[train, :], y[train]
-          X_test, y_test = X[test, :], y[test]
-
-          for max_depth in 2:1:6
-              # solemodel
-              model = Tree(; max_depth, rng=Xoshiro(seed))
-              mach = machine(model, X_train, y_train)
-              fit!(mach, verbosity=0)
-              solem = solemodel(MLJ.fitted_params(mach).tree)
-              preds = apply!(solem, X_test, y_test)
-
-              # decisiontree
-              dt_model = DT.build_tree(y_train, Matrix(X_train), 0, max_depth; rng=Xoshiro(seed))
-              dt_preds = DT.apply_tree(dt_model, Matrix(X_test))
-
-              @test preds == dt_preds
-          end
-      end
-  end
+    Tree = MLJ.@load DecisionTreeClassifier pkg=DecisionTree
+
+    for train_ratio in 0.5:0.1:0.9
+        for seed in 1:40
+            train, test = partition(eachindex(y), train_ratio; shuffle=true, rng=Xoshiro(seed))
+            X_train, y_train = X[train, :], y[train]
+            X_test, y_test = X[test, :], y[test]
+
+            for max_depth in 2:1:6
+                # solemodel
+                model = Tree(; max_depth, rng=Xoshiro(seed))
+                mach = machine(model, X_train, y_train)
+                fit!(mach, verbosity=0)
+                solem = solemodel(MLJ.fitted_params(mach).tree)
+                preds = apply!(solem, X_test, y_test)
+
+                # decisiontree
+                y_coded_train = @. CategoricalArrays.levelcode(y_train)
+                dt_model = DT.build_tree(y_coded_train, Matrix(X_train), 0, max_depth; rng=Xoshiro(seed))
+                dt_preds = DT.apply_tree(dt_model, Matrix(X_test))
+
+                preds_coded = CategoricalArrays.levelcode.(CategoricalArray(preds))
+                @test preds_coded == dt_preds
+            end
+        end
+    end
 end
-
-### the problem rises in fit! in MLJDecisionTreeInterface
-Tree = MLJ.@load DecisionTreeClassifier pkg=DecisionTree
-seed = 1
-max_depth = 3
-train_ratio = 0.5
-train, test = partition(eachindex(y), train_ratio; shuffle=true, rng=Xoshiro(seed))
-X_train, y_train = X[train, :], y[train]
-X_test, y_test = X[test, :], y[test]
-
-model = Tree(; max_depth, rng=Xoshiro(seed))
-mach = machine(model, X_train, y_train)
-fit!(mach, verbosity=0)
-solem = solemodel(MLJ.fitted_params(mach).tree)
-preds = apply!(solem, X_test, y_test)
-
-dt_model = DT.build_tree(y_train, Matrix(X_train), 0, max_depth; rng=Xoshiro(seed))
-dt_preds = DT.apply_tree(dt_model, Matrix(X_test))
\ No newline at end of file

From dbe8e83501c744233316957e44e67a844fdeba1a Mon Sep 17 00:00:00 2001
From: PasoStudio73 <paso.studio73@gmail.com>
Date: Sun, 27 Apr 2025 17:59:19 +0200
Subject: [PATCH 33/44] fix extras deps

---
 Project.toml                             |  18 +-
 ext/MLJXGBoostInterfaceExt.jl            | 308 -----------------------
 test/XGBoostExt/xgboost_classifier.jl    |   9 +-
 test/XGBoostExt/xgboost_predict_issue.jl | 156 ------------
 4 files changed, 12 insertions(+), 479 deletions(-)
 delete mode 100644 ext/MLJXGBoostInterfaceExt.jl
 delete mode 100644 test/XGBoostExt/xgboost_predict_issue.jl

diff --git a/Project.toml b/Project.toml
index 98fba71..999ae09 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,12 +1,7 @@
 name = "SoleModels"
 uuid = "4249d9c7-3290-4ddd-961c-e1d3ec2467f8"
 license = "MIT"
-authors = [
-    "Michele GHIOTTI",
-    "Giovanni PAGLIARINI",
-    "Edoardo PONSANESI",
-    "Eduard I. STAN",
-]
+authors = ["Michele GHIOTTI", "Giovanni PAGLIARINI", "Edoardo PONSANESI", "Eduard I. STAN"]
 version = "0.10.0"
 
 [deps]
@@ -45,7 +40,6 @@ XGBoostExt = "XGBoost"
 
 [compat]
 AbstractTrees = "0.4"
-BenchmarkTools = "1"
 CSV = "0.10"
 CategoricalArrays = "0.10"
 DataFrames = "1"
@@ -74,8 +68,10 @@ ZipFile = "0.10"
 julia = "1"
 
 [extras]
-BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
+CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
+DecisionTree = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb"
+FunctionWrappers = "069b7b12-0de2-55c6-9aab-29f3d0a68a2e"
 InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
 MLJ = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7"
 MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
@@ -83,10 +79,12 @@ MLJDecisionTreeInterface = "c6f25543-311c-4c74-83dc-3ea6d1015661"
 MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea"
 Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
 MultiData = "8cc5100c-b3d1-4f82-90cb-0ea93d317aba"
-PlutoUI = "7f904dfe-b85e-4ff6-b463-dae2292396a8"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 SoleData = "123f1ae1-6307-4526-ab5b-aab3a92a2b8c"
+SoleLogics = "b002da8f-3cb3-4d91-bbe3-2953433912b5"
+SoleModels = "4249d9c7-3290-4ddd-961c-e1d3ec2467f8"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+XGBoost = "009559a3-9522-5dbb-924b-0b6ed2b22bb9"
 
 [targets]
-test = ["Test", "DataFrames", "Random", "MLJ", "MultiData", "Markdown", "InteractiveUtils", "BenchmarkTools", "MLJBase", "XGBoost", "DecisionTree", "MLJModelInterface", "MLJDecisionTreeInterface", "SoleData"]
+test = ["Test", "CategoricalArrays", "DataFrames", "Random", "MLJ", "MultiData", "Markdown", "InteractiveUtils", "MLJBase", "XGBoost", "DecisionTree", "MLJModelInterface", "MLJDecisionTreeInterface", "SoleData", "SoleModels", "SoleLogics", "FunctionWrappers"]
diff --git a/ext/MLJXGBoostInterfaceExt.jl b/ext/MLJXGBoostInterfaceExt.jl
deleted file mode 100644
index caf64fe..0000000
--- a/ext/MLJXGBoostInterfaceExt.jl
+++ /dev/null
@@ -1,308 +0,0 @@
-module MLJXGBoostInterfaceExt
-
-import MLJModelInterface as MMI
-import XGBoost as XGB
-import Tables
-using CategoricalArrays
-using AbstractTrees
-
-import Sole: AbstractModel
-import Sole: VariableValue, ScalarCondition, Atom, ConstantModel, Branch, DecisionTree
-
-const PKG = "MLJXGBoostInterface"
-
-abstract type XGBoostAbstractRegressor <: MMI.Deterministic end
-abstract type XGBoostAbstractClassifier <: MMI.Probabilistic end
-
-const XGTypes = Union{XGBoostAbstractRegressor,XGBoostAbstractClassifier}
-
-struct TreePrinter{T}
-    tree::T
-    features::Vector{Symbol}
-end
-(c::TreePrinter)(depth) = AbstractTrees.print_tree(c.tree, depth, feature_names = c.features)
-(c::TreePrinter)() = AbstractTrees.print_tree(c.tree, 5, feature_names = c.features)
-
-Base.show(stream::IO, c::TreePrinter) =
-    print(stream, "TreePrinter object (call with display depth)")
-
-function classes(y)
-    p = CategoricalArrays.pool(y)
-    [p[i] for i in 1:length(p)]
-end
-
-# function modelexpr(name::Symbol, absname::Symbol, obj::AbstractString, objvalidate::Symbol)
-function modelexpr(name::Symbol, absname::Symbol)
-    metric = absname == :XGBoostAbstractClassifier ? "mlogloss" : "rmse"
-    quote
-        MMI.@mlj_model mutable struct $name <: $absname
-        # MMI.@mlj_model mutable struct $name
-            # ref: https://xgboost.readthedocs.io/en/stable/parameter.html
-            # general parameters
-            booster::String                     = "gbtree"
-            # device::String                  = "cpu"
-            eval_metric::String                 = $metric
-            objective::Union{String, Nothing}   = nothing
-            num_round::Int                      = 100::(_ ≥ 0)
-            early_stopping_rounds::Int          = 0::(_ ≥ 0)
-            
-
-            # parameters for tree booster
-            eta::Float64                        = 0.3::(0.0 ≤ _ ≤ 1.0)
-            alpha::Float64                      = 0::(_ ≥ 0)
-            gamma::Float64                      = 0::(_ ≥ 0)
-            lambda::Float64                     = 1::(_ ≥ 0)
-
-            max_depth::Int                      = 6::(_ ≥ 0)
-            min_child_weight::Float64           = 1::(_ ≥ 0)
-            max_delta_step::Float64             = 0::(_ ≥ 0)
-            subsample::Float64                  = 1::(0 < _ ≤ 1)
-            sampling_method::String             = "uniform"
-
-            colsample_bynode::Float64           = 1::(0 < _ ≤ 1)        
-            colsample_bylevel::Float64          = 1::(0 < _ ≤ 1)
-            colsample_bytree::Float64           = 1::(0 < _ ≤ 1)
-
-            tree_method::String                 = "auto"
-
-            # scale_pos_weight::Float64           = 1.0
-        end   
-
-
-        #     # additional parameters for dart booster
-        #     one_drop::Union{Int,Bool}       = 0::(0 ≤ _ ≤ 1)
-        #     normalize_type::String          = "tree"
-        #     rate_drop::Float64              = 0::(0 ≤ _ ≤ 1)
-        #     sample_type::String             = "uniform"
-        #     skip_drop::Float64              = 0::(0 ≤ _ ≤ 1)
-
-        #     # additional parameters for linear booster
-        #     feature_selector::String        = "cyclic"
-        #     top_k::Int                      = 0::(_ ≥ 0)
-
-        #     # additional parameters for tweedie regression
-        #     tweedie_variance_power::Float64 = 1.5::(1 < _ < 2)
-
-        #     # additional parameters for pseudo-huber
-        #     # quantile_alpha TODO
-
-        #     # additional parameters for quantile loss
-        #     # quantile_alpha TODO
-
-        #     # learning task parameters
-        #     base_score::Float64             = 0.5
-
-
-        #     # test::Int = 1::(_ ≥ 0)
-        #     # sketch_eps::Float64 = 0.03::(0 < _ < 1)
-        #     # predictor::String = "cpu_predictor"
-        #     # watchlist = nothing  # if this is nothing we will not pass it so as to use default
-        #     # importance_type::String = "gain"
-        # end
-    end
-end
-
-# eval(modelexpr(:XGBoostClassifier, :XGBoostAbstractClassifier, "automatic", :validate_class_objective))
-# eval(modelexpr(:XGBoostCount, :XGBoostAbstractRegressor, "count:poisson", :validate_count_objective))
-# eval(modelexpr(:XGBoostRegressor, :XGBoostAbstractRegressor, "reg:squarederror", :validate_reg_objective))
-
-eval(modelexpr(:XGBoostClassifier, :XGBoostAbstractClassifier))
-eval(modelexpr(:XGBoostCount,      :XGBoostAbstractRegressor))
-eval(modelexpr(:XGBoostRegressor,  :XGBoostAbstractRegressor))
-
-MMI.reports_feature_importances(::Type{<:XGBoostAbstractRegressor}) = true
-MMI.reports_feature_importances(::Type{<:XGBoostAbstractClassifier}) = true
-
-export XGBoostClassifier, XGBoostCount, XGBoostRegressor
-
-function MMI.fit(
-    m::XGBoostClassifier,
-    verbosity::Int,
-    X,
-    y,
-    features,
-    classes,
-    )
-
-    integers_seen = unique(y)
-    classes_seen  = MMI.decoder(classes)(integers_seen)
-
-    # dX = if isnothing(weight)
-    #     XGB.DMatrix(X, y_code; feature_names=names(X))
-    #     # XGB.DMatrix(MMI.matrix(X), y_code)
-    # else
-    #     XGB.DMatrix(X, y_code; feature_names=names(X), weight = weight)
-    #     # XGB.DMatrix(MMI.matrix(X), y_code; feature_names=names(X), weight = weight)
-    # end
-
-    # bst = xgboost(dm; kwargs(model, verbosity, objective)..., num_class...)
-    nclass = length(classes_seen)
-    if isnothing(m.objective)
-        m.objective = nclass == 2 ? "binary:logistic" : "multi:softprob"
-    end
-    
-    params = Dict((field, getfield(m, field)) for field in fieldnames(typeof(m)))
-    bst = XGB.xgboost((X, y.-1); verbosity=verbosity, params..., num_class=nclass)
-
-    # imp = XGB.importancetable(bst)
-    ts = XGB.trees(bst)
-
-    verbosity < 2 || AbstractTrees.print_tree(ts, m.max_depth)
-
-    fitresult = (bst, classes_seen, integers_seen, features)
-
-    cache  = nothing
-    report = (
-        classes_seen=nclass,
-        print_tree=TreePrinter(ts, features),
-        features=features,
-    )
-    return fitresult, cache, report
-end
-
-get_encoding(classes_seen) = Dict(MMI.int(c) => c for c in classes(classes_seen))
-classlabels(encoding) = [string(encoding[i]) for i in sort(keys(encoding) |> collect)]
-
-struct InfoXGBNode
-    node::XGB.Node
-    info::NamedTuple
-end
-AbstractTrees.nodevalue(n::InfoXGBNode) = n.node
-
-struct InfoXGBLeaf
-    node::XGB.Node
-    info::NamedTuple
-end
-AbstractTrees.nodevalue(l::InfoXGBLeaf) = l.node
-
-# struct InfoNode{S,T} <: AbstractTrees.AbstractNode{DecisionTree.Node{S,T}}
-#     node::DecisionTree.Node{S,T}
-#     info::NamedTuple
-# end
-# AbstractTrees.nodevalue(n::InfoNode) = n.node
-
-# struct InfoLeaf{T} <: AbstractTrees.AbstractNode{DecisionTree.Leaf{T}}
-#     leaf::DecisionTree.Leaf{T}
-#     info::NamedTuple
-# end
-# AbstractTrees.nodevalue(l::InfoLeaf) = l.leaf
-
-isleaf(node::XGB.Node) = isempty(node.children) ? true : false
-
-wrap(vecnode::Vector{<:XGB.Node}, info::NamedTuple=NamedTuple()) = MLJXGBoostInterface.wrap.(vecnode, Ref(info))
-# wrap(tree::DecisionTree.Root, info::NamedTuple=NamedTuple()) = wrap(tree.node, info)
-wrap(node::XGB.Node, info::NamedTuple=NamedTuple()) = isleaf(node) ? InfoXGBLeaf(node, info) : InfoXGBNode(node, info)
-# wrap(leaf::DecisionTree.Leaf, info::NamedTuple=NamedTuple()) = InfoLeaf(leaf, info)
-
-function MMI.fitted_params(::XGBoostAbstractClassifier, fitresult)
-    raw_tree = XGB.trees(fitresult[1])
-    encoding = get_encoding(fitresult[2])
-    features = fitresult[4]
-    classlabels = MLJXGBoostInterface.classlabels(encoding)
-    info = (featurenames=features, classlabels)
-    tree = MLJXGBoostInterface.wrap(raw_tree, info,)
-    (; tree, raw_tree, encoding, features)
-end
-
-function AbstractTrees.children(node::InfoXGBNode) 
-    (wrap(node.children[1], node.info), wrap(node.children[2], node.info))
-end
-AbstractTrees.children(node::InfoXGBLeaf) = ()
-
-# to get column names based on table access type:
-_columnnames(X) = _columnnames(X, Val(Tables.columnaccess(X))) |> collect
-_columnnames(X, ::Val{true}) = Tables.columnnames(Tables.columns(X))
-_columnnames(X, ::Val{false}) = Tables.columnnames(first(Tables.rows(X)))
-
-MMI.reformat(::XGBoostAbstractClassifier, X, y) =
-    (XGB.DMatrix(X), MMI.int(y), _columnnames(X), classes(y))
-# MMI.reformat(::Regressor, X, y) =
-#     (Tables.matrix(X), float(y), _columnnames(X))
-# MMI.selectrows(::TreeModel, I, Xmatrix, y, meta...) =
-#     (view(Xmatrix, I, :), view(y, I), meta...)
-
-split2id(str::String) = parse(Int, filter(isdigit, str)) + 1
-
-function solemodel(
-    tree::Vector{<:InfoXGBNode},
-    raw_tree::Vector{<:XGB.Node},
-    encoding::Dict,
-    features::Vector{Symbol};
-    kwargs...
-)
-    dt = DecisionTree[]
-    @show encoding
-    for (i, t) in enumerate(tree)
-        idx = (i - 1) % length(encoding) + 1
-        push!(dt, MLJXGBoostInterface.solemodel(t; majority=encoding[idx], kwargs...))
-    end
-
-    return dt
-end
-function solemodel(tree::InfoXGBNode, keep_condensed = false; majority, use_featurenames = true, kwargs...)
-    # @show fieldnames(typeof(tree))
-    use_featurenames = use_featurenames ? tree.info.featurenames : false
-    root, info = begin
-        if keep_condensed
-            root = MLJXGBoostInterface.solemodel(tree.node; majority=majority, use_featurenames = use_featurenames, kwargs...)
-            info = (;
-                apply_preprocess=(y -> UInt32(findfirst(x -> x == y, tree.info.classlabels))),
-                apply_postprocess=(y -> tree.info.classlabels[y]),
-            )
-            root, info
-        else
-            root = MLJXGBoostInterface.solemodel(tree.node; majority=majority, replace_classlabels = tree.info.classlabels, use_featurenames = use_featurenames, kwargs...)
-            info = (;)
-            root, info
-        end
-    end
-
-    info = merge(info, (;
-            featurenames=tree.info.featurenames,
-            # 
-            supporting_predictions=root.info[:supporting_predictions],
-            supporting_labels=root.info[:supporting_labels],
-        )
-    )
-    
-    return DecisionTree(root, info)
-end
-
-function solemodel(tree::XGB.Node; majority, replace_classlabels = nothing, use_featurenames = false)
-    if isempty(tree.children)
-    # leaf
-        prediction = majority.ref
-        # labels = tree.leaf
-        # if !isnothing(replace_classlabels)
-        #     prediction = replace_classlabels[prediction]
-        #     labels = replace_classlabels[labels]
-        # end
-        # info = (;
-        #     supporting_predictions = fill(prediction, length(labels)),
-        #     supporting_labels = labels,
-        # )
-        ### TODO
-        labels = [1,1,1,1]
-        info = (;
-        supporting_predictions = fill(prediction, length(labels)),
-        supporting_labels = labels,
-    )
-        return ConstantModel(prediction, info)
-    else
-    # node
-        test_operator = (<)
-        # @show fieldnames(typeof(tree))
-        feature = (use_featurenames != false) ? VariableValue(use_featurenames[split2id(tree.split)]) : VariableValue(split2id(tree.split))
-        cond = ScalarCondition(feature, test_operator, tree.split_condition)
-        antecedent = Atom(cond)
-        lefttree = MLJXGBoostInterface.solemodel(tree.children[1]; majority=majority, replace_classlabels=replace_classlabels, use_featurenames=use_featurenames)
-        righttree = MLJXGBoostInterface.solemodel(tree.children[2]; majority=majority, replace_classlabels=replace_classlabels, use_featurenames=use_featurenames)
-        info = (;
-            supporting_predictions = [lefttree.info[:supporting_predictions]..., righttree.info[:supporting_predictions]...],
-            supporting_labels = [lefttree.info[:supporting_labels]..., righttree.info[:supporting_labels]...],
-        )
-        return Branch(antecedent, lefttree, righttree, info)
-    end
-end
-
-end
\ No newline at end of file
diff --git a/test/XGBoostExt/xgboost_classifier.jl b/test/XGBoostExt/xgboost_classifier.jl
index a885861..2685325 100644
--- a/test/XGBoostExt/xgboost_classifier.jl
+++ b/test/XGBoostExt/xgboost_classifier.jl
@@ -1,7 +1,6 @@
 using Test
 
 using MLJ
-using MLJ.CategoricalArrays: levelcode, categorical
 using MLJBase
 using DataFrames
 
@@ -58,7 +57,7 @@ solem = solemodel(trees, Matrix(X_train), y_train; classlabels, featurenames, ke
 # Make test instances flow into the model
 X_test_f32 = mapcols(col -> Float32.(col), X_test)
 preds = apply(solem, X_test_f32)
-predsl = CategoricalArrays.levelcode.(categorical(preds)) .- 1
+predsl = CategoricalArrays.levelcode.(CategoricalArrays.categorical(preds)) .- 1
 
 apply!(solem, X_test, y_test)
 @test solem.info.supporting_predictions == preds
@@ -67,7 +66,7 @@ apply!(solem, X_test, y_test)
 # ---------------------------------------------------------------------------- #
 #                                 julia XGBoost                                #
 # ---------------------------------------------------------------------------- #
-yl_train = CategoricalArrays.levelcode.(categorical(y_train)) .- 1
+yl_train = CategoricalArrays.levelcode.(CategoricalArrays.categorical(y_train)) .- 1
 # create and train a gradient boosted tree model of 5 trees
 bst = XGB.xgboost(
     (X_train, yl_train),
@@ -143,9 +142,9 @@ println("RandomForest accuracy: ", rm_accuracy)
                     solem = solemodel(trees, Matrix(X_train), y_train; classlabels, featurenames)
                     X_test_f32 = mapcols(col -> Float32.(col), X_test)
                     preds = apply!(solem, X_test_f32, y_test)
-                    predsl = CategoricalArrays.levelcode.(categorical(preds)) .- 1
+                    predsl = CategoricalArrays.levelcode.(CategoricalArrays.categorical(preds)) .- 1
 
-                    yl_train = CategoricalArrays.levelcode.(categorical(y_train)) .- 1
+                    yl_train = CategoricalArrays.levelcode.(CategoricalArrays.categorical(y_train)) .- 1
                     bst = XGB.xgboost((X_train, yl_train); num_round, eta, num_class=3, objective="multi:softmax")
                     xg_preds = XGB.predict(bst, X_test)
 
diff --git a/test/XGBoostExt/xgboost_predict_issue.jl b/test/XGBoostExt/xgboost_predict_issue.jl
deleted file mode 100644
index 198093f..0000000
--- a/test/XGBoostExt/xgboost_predict_issue.jl
+++ /dev/null
@@ -1,156 +0,0 @@
-using MLJ
-using DataFrames
-
-import MLJModelInterface as MMI
-using SoleModels
-import XGBoost as XGB
-using CategoricalArrays
-using Random
-
-# References:
-# https://github.com/chengjunhou/xgb2sql/issues/1
-# https://xgboost.readthedocs.io/en/latest/R-package/xgboostfromJSON.html
-
-# per me
-# https://xgboost.readthedocs.io/en/latest/build.html
-
-function predict_xgboost_bag(trees, X; n_classes=0, objective="binary:logistic")
-    n_samples = size(X, 1)
-    ntree_limit = length(trees)
-    n_classes == 0 && throw(ArgumentError("n_classes must be specified for multi-class predictions"))
-    
-    # Initialize predictions
-    if startswith(objective, "multi:softprob") || startswith(objective, "multi:softmax")
-        # For multi-class probabilities, we need a matrix
-        raw_preds = zeros(Float32, n_samples, n_classes)
-    else
-        # For binary and regression, a vector is sufficient
-        raw_preds = zeros(Float32, n_samples)
-    end
-    
-    # Iterate through trees and accumulate predictions
-    for i in 1:ntree_limit
-        tree = trees[i]
-        tree_preds = predict_tree(tree, X)
-
-        if startswith(objective, "multi:softprob") || startswith(objective, "multi:softmax")
-            # For multi-class softprob, each tree outputs predictions for a specific class
-            class_idx = (i - 1) % n_classes + 1
-            raw_preds[:, class_idx] .+= tree_preds
-        else
-            # For binary or regression, simply add the predictions
-            raw_preds .+= tree_preds
-        end
-    end
-    # Apply appropriate transformation based on objective
-    if objective == "binary:logistic"
-        # Apply sigmoid transformation
-        return 1.0 ./ (1.0 .+ exp.(-raw_preds))
-    elseif objective == "multi:softprob"
-        # Apply softmax transformation
-        exp_preds = exp.(raw_preds)
-        row_sums = sum(exp_preds, dims=2)
-        return exp_preds ./ row_sums
-    elseif objective == "multi:softmax"
-        # Return class with highest score
-        if n_classes > 1
-            _, indices = findmax(raw_preds, dims=2)
-            return [idx[2] for idx in indices]
-        else
-            return raw_preds .> 0
-        end
-    elseif objective == "count:poisson"
-        # Apply exponential transformation for Poisson
-        return exp.(raw_preds)
-    else
-        # For regression or other objectives, return raw predictions
-        return raw_preds
-    end
-end
-
-function predict_tree(tree, X)
-    n_samples = size(X, 1)
-    predictions = zeros(Float32, n_samples)
-    
-    for i in 1:n_samples
-        predictions[i] = traverse_tree(tree, X[i, :])
-    end
-    return predictions
-end
-
-function traverse_tree(tree, x)
-    # Start at root node
-    node = tree  # Adjust based on your tree structure
-    
-    # Traverse until reaching a leaf
-    while !isempty(node.children)
-        # Get the split feature and value
-        feature_idx = node.split
-        split_value = Float32(node.split_condition)
-        
-        # Decide which child to go to
-        if x[feature_idx] < split_value
-            node = node.children[1]
-        else
-            node = node.children[2]
-        end
-    end
-    # Return the leaf value
-    return Float32(node.leaf)
-end
-
-X, y = @load_iris
-X = DataFrame(X)
-train_ratio = 0.8
-seed, num_round, eta = 3, 1, 0.1
-rng = Xoshiro(seed)
-train, test = partition(eachindex(y), train_ratio; shuffle=true, rng)
-X_train, y_train = X[train, :], y[train]
-X_test, y_test = X[test, :], y[test]
-
-XGTrees = MLJ.@load XGBoostClassifier pkg=XGBoost
-model = XGTrees(; num_round, eta, objective="multi:softprob")
-mach = machine(model, X_train, y_train)
-fit!(mach)
-# mlj_predict = predict(mach, DataFrame(X_test[27,:]))
-mlj_predict = predict(mach, DataFrame(X_test[28,:]))
-
-trees = XGB.trees(mach.fitresult[1])
-get_encoding(classes_seen) = Dict(MMI.int(c) => c for c in MMI.classes(classes_seen))
-get_classlabels(encoding)  = [string(encoding[i]) for i in sort(keys(encoding) |> collect)]
-encoding     = get_encoding(mach.fitresult[2])
-classlabels  = get_classlabels(encoding)
-featurenames = mach.report.vals[1].features
-
-solem = solemodel(trees, Matrix(X_train), y_train; classlabels, featurenames, use_float32=false)
-preds = apply(solem, DataFrame(reshape(Float32.(Vector(X_test[28,:])), 1, :), :auto)) # NOT WORKING
-@test preds[1] == "versicolor"
-
-solem = solemodel(trees, Matrix(X_train), y_train; classlabels, featurenames, use_float32=true)
-preds = apply(solem, DataFrame(reshape(Float32.(Vector(X_test[28,:])), 1, :), :auto)) # WORKING
-@test preds[1] == "virginica"
-
-solem = solemodel(trees, Matrix(X_train), y_train; classlabels, featurenames)
-preds = apply(solem, DataFrame(reshape(Float32.(Vector(X_test[28,:])), 1, :), :auto)) # WORKING
-@test preds[1] == "virginica"
-
-predsl = CategoricalArrays.levelcode.(categorical(preds)) .- 1
-
-yl_train = CategoricalArrays.levelcode.(categorical(y_train)) .- 1
-bst = XGB.xgboost((X_train, yl_train); num_round, eta, num_class=3, objective="multi:softprob")
-xtrs = XGB.trees(bst)
-# yyy = XGB.predict(bst, DataFrame(X_test[27,:])) # WORKING
-yyy = XGB.predict(bst, DataFrame(X_test[28,:])) # NOT WORKING
-
-
-# # For multi-class classification
-rename!(X_test, [:f0, :f1, :f2, :f3])
-# class_probs = predict_xgboost_bag(trees, DataFrame(X_test[27,:]); n_classes=3, objective="multi:softprob") # WORKING
-class_probs = predict_xgboost_bag(trees, DataFrame(X_test[28,:]); n_classes=3, objective="multi:softprob") # NOT WORKING
-class_preds = [argmax(probs) for probs in eachrow(class_probs)] .-1
-
-X_train32 = DataFrame(Float32.(Matrix(X_train)), [:f0, :f1, :f2, :f3])
-bst32 = XGB.xgboost((X_train32, yl_train); num_round, eta, num_class=3, objective="multi:softprob")
-xtrs32 = XGB.trees(bst32)
-X_test32 = DataFrame(reshape(Float32.(Vector(X_test[28,:])), 1, :), [:f0, :f1, :f2, :f3])
-class_probs32 = predict_xgboost_bag(xtrs32, X_test32; n_classes=3, objective="multi:softprob") # NOT WORKING

From 6c34e4df37c948d162fd0dcffae657219bd39c9e Mon Sep 17 00:00:00 2001
From: PasoStudio73 <paso.studio73@gmail.com>
Date: Sun, 27 Apr 2025 18:24:19 +0200
Subject: [PATCH 34/44] cleaned unused packages, tests still working

---
 Project.toml | 22 +---------------------
 1 file changed, 1 insertion(+), 21 deletions(-)

diff --git a/Project.toml b/Project.toml
index 999ae09..131ef09 100644
--- a/Project.toml
+++ b/Project.toml
@@ -6,29 +6,18 @@ version = "0.10.0"
 
 [deps]
 AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
-CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
 CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
-DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
 FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
 FunctionWrappers = "069b7b12-0de2-55c6-9aab-29f3d0a68a2e"
-Graphs = "86223c79-3864-5bf0-83f7-82e725a168b6"
-HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
 IterTools = "c8e1da08-722c-5040-9ed9-7db0dc04731e"
 Lazy = "50d2b5c4-7a5e-59d5-8109-a42b560f39c0"
-LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
-Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
 PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
-ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
-Revise = "295af30f-e4ad-537b-8983-00126c2a3abe"
 SoleBase = "4475fa32-7023-44a0-aa70-4813b230e492"
 SoleData = "123f1ae1-6307-4526-ab5b-aab3a92a2b8c"
 SoleLogics = "b002da8f-3cb3-4d91-bbe3-2953433912b5"
 StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
-Suppressor = "fd094767-a336-5f1f-9728-57cf17d0bbfb"
-Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
 ThreadSafeDicts = "4239201d-c60e-5e0a-9702-85d713665ba7"
-ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea"
 
 [weakdeps]
 DecisionTree = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb"
@@ -40,31 +29,22 @@ XGBoostExt = "XGBoost"
 
 [compat]
 AbstractTrees = "0.4"
-CSV = "0.10"
 CategoricalArrays = "0.10"
 DataFrames = "1"
-DataStructures = "0.18"
 DecisionTree = "0.12"
 FillArrays = "1"
 FunctionWrappers = "1"
-Graphs = "1.8"
-HTTP = "1.9"
 IterTools = "1"
 Lazy = "0.15.1"
 PrettyTables = "2.2"
-ProgressMeter = "1"
 Random = "1"
 Reexport = "1"
-Revise = "3"
-SoleBase = "0.11 - 0.13"
+SoleBase = "0.13.0"
 SoleData = "0.15, 0.16"
 SoleLogics = "0.11 - 0.13"
 StatsBase = "0.30 - 0.34"
-Suppressor = "0.2"
-Tables = "1"
 ThreadSafeDicts = "0.1"
 XGBoost = "2"
-ZipFile = "0.10"
 julia = "1"
 
 [extras]

From bcefa20fed26f9127a13c1c41e4915d282f65361 Mon Sep 17 00:00:00 2001
From: PasoStudio73 <paso.studio73@gmail.com>
Date: Sun, 27 Apr 2025 18:37:32 +0200
Subject: [PATCH 35/44] updated SoleData dep

---
 Project.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Project.toml b/Project.toml
index 131ef09..d75341c 100644
--- a/Project.toml
+++ b/Project.toml
@@ -40,8 +40,8 @@ PrettyTables = "2.2"
 Random = "1"
 Reexport = "1"
 SoleBase = "0.13.0"
-SoleData = "0.15, 0.16"
-SoleLogics = "0.11 - 0.13"
+SoleData = "0.16.1"
+SoleLogics = "0.13"
 StatsBase = "0.30 - 0.34"
 ThreadSafeDicts = "0.1"
 XGBoost = "2"

From ba4d143f6e561e7284ca6572e87ca5287cb58b2b Mon Sep 17 00:00:00 2001
From: PasoStudio73 <paso.studio73@gmail.com>
Date: Sun, 27 Apr 2025 21:51:51 +0200
Subject: [PATCH 36/44] updated solelogics dep

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index d75341c..ddfa3e3 100644
--- a/Project.toml
+++ b/Project.toml
@@ -41,7 +41,7 @@ Random = "1"
 Reexport = "1"
 SoleBase = "0.13.0"
 SoleData = "0.16.1"
-SoleLogics = "0.13"
+SoleLogics = "0.13.1"
 StatsBase = "0.30 - 0.34"
 ThreadSafeDicts = "0.1"
 XGBoost = "2"

From 72efb5499df12474d2cae0098d0e9ef8b52eb736 Mon Sep 17 00:00:00 2001
From: PasoStudio73 <paso.studio73@gmail.com>
Date: Fri, 2 May 2025 11:49:20 +0200
Subject: [PATCH 37/44] added a @show for checking

---
 ext/DecisionTreeExt.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ext/DecisionTreeExt.jl b/ext/DecisionTreeExt.jl
index 1755fb5..041d887 100644
--- a/ext/DecisionTreeExt.jl
+++ b/ext/DecisionTreeExt.jl
@@ -57,6 +57,7 @@ function SoleModels.solemodel(
     keep_condensed = false,
     kwargs...
 ) where {T,orig_O}
+@show "PASO"
     # TODO rewrite error according to orig_O
     # if isnothing(classlabels)
     #     error("Please, provide classlabels argument, as in solemodel(forest; classlabels = classlabels, kwargs...). If your forest was trained via MLJ, use `classlabels = (mach).fitresult[2][sortperm((mach).fitresult[3])]`. Also consider providing `featurenames = report(mach).features`.")

From bd889ac829212aae1c5d383edc4eafb5c735b28c Mon Sep 17 00:00:00 2001
From: PasoStudio73 <paso.studio73@gmail.com>
Date: Fri, 2 May 2025 12:28:42 +0200
Subject: [PATCH 38/44] removed guard

---
 ext/DecisionTreeExt.jl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ext/DecisionTreeExt.jl b/ext/DecisionTreeExt.jl
index 041d887..1755fb5 100644
--- a/ext/DecisionTreeExt.jl
+++ b/ext/DecisionTreeExt.jl
@@ -57,7 +57,6 @@ function SoleModels.solemodel(
     keep_condensed = false,
     kwargs...
 ) where {T,orig_O}
-@show "PASO"
     # TODO rewrite error according to orig_O
     # if isnothing(classlabels)
     #     error("Please, provide classlabels argument, as in solemodel(forest; classlabels = classlabels, kwargs...). If your forest was trained via MLJ, use `classlabels = (mach).fitresult[2][sortperm((mach).fitresult[3])]`. Also consider providing `featurenames = report(mach).features`.")

From 83ffb33bf2fb42c61541c640fe8432fcfd166e26 Mon Sep 17 00:00:00 2001
From: PasoStudio73 <paso.studio73@gmail.com>
Date: Sat, 7 Jun 2025 15:36:47 +0200
Subject: [PATCH 39/44] test working

---
 Project.toml                          | 34 ++++++++++++++-------------
 pluto-demo.jl                         | 10 ++++----
 test/DecisionTreeExt/adaboost.jl      | 13 ----------
 test/DecisionTreeExt/forest.jl        | 12 ----------
 test/DecisionTreeExt/tree.jl          | 13 ----------
 test/XGBoostExt/xgboost_classifier.jl |  2 +-
 test/base.jl                          |  7 ------
 test/juliacon2024.jl                  | 12 +++++-----
 test/linear-form-utilities.jl         |  5 ----
 test/misc.jl                          | 11 ---------
 test/parse.jl                         |  7 ------
 test/runtests.jl                      | 30 +++++++++++++++++++----
 test/test_tree.jl                     |  3 ---
 13 files changed, 55 insertions(+), 104 deletions(-)

diff --git a/Project.toml b/Project.toml
index ddfa3e3..86ccd06 100644
--- a/Project.toml
+++ b/Project.toml
@@ -2,7 +2,7 @@ name = "SoleModels"
 uuid = "4249d9c7-3290-4ddd-961c-e1d3ec2467f8"
 license = "MIT"
 authors = ["Michele GHIOTTI", "Giovanni PAGLIARINI", "Edoardo PONSANESI", "Eduard I. STAN"]
-version = "0.10.0"
+version = "0.10.1"
 
 [deps]
 AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
@@ -30,41 +30,43 @@ XGBoostExt = "XGBoost"
 [compat]
 AbstractTrees = "0.4"
 CategoricalArrays = "0.10"
-DataFrames = "1"
-DecisionTree = "0.12"
 FillArrays = "1"
 FunctionWrappers = "1"
 IterTools = "1"
-Lazy = "0.15.1"
+Lazy = "0.15"
 PrettyTables = "2.2"
-Random = "1"
 Reexport = "1"
-SoleBase = "0.13.0"
-SoleData = "0.16.1"
-SoleLogics = "0.13.1"
+SoleBase = "0.13"
+SoleData = "0.16"
+SoleLogics = "0.13"
 StatsBase = "0.30 - 0.34"
 ThreadSafeDicts = "0.1"
-XGBoost = "2"
 julia = "1"
 
 [extras]
-CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 DecisionTree = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb"
-FunctionWrappers = "069b7b12-0de2-55c6-9aab-29f3d0a68a2e"
 InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
 MLJ = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7"
-MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
 MLJDecisionTreeInterface = "c6f25543-311c-4c74-83dc-3ea6d1015661"
 MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea"
 Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
 MultiData = "8cc5100c-b3d1-4f82-90cb-0ea93d317aba"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
-SoleData = "123f1ae1-6307-4526-ab5b-aab3a92a2b8c"
-SoleLogics = "b002da8f-3cb3-4d91-bbe3-2953433912b5"
-SoleModels = "4249d9c7-3290-4ddd-961c-e1d3ec2467f8"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 XGBoost = "009559a3-9522-5dbb-924b-0b6ed2b22bb9"
 
 [targets]
-test = ["Test", "CategoricalArrays", "DataFrames", "Random", "MLJ", "MultiData", "Markdown", "InteractiveUtils", "MLJBase", "XGBoost", "DecisionTree", "MLJModelInterface", "MLJDecisionTreeInterface", "SoleData", "SoleModels", "SoleLogics", "FunctionWrappers"]
+test = [
+    "DataFrames",
+    "DecisionTree",
+    "InteractiveUtils",
+    "MLJ",
+    "MLJDecisionTreeInterface",
+    "MLJModelInterface",
+    "Markdown",
+    "MultiData",
+    "Random",
+    "Test",
+    "XGBoost"
+]
diff --git a/pluto-demo.jl b/pluto-demo.jl
index ce978a6..095231b 100644
--- a/pluto-demo.jl
+++ b/pluto-demo.jl
@@ -1,13 +1,13 @@
 ### A Pluto.jl notebook ###
 # v0.19.38
 
-using Markdown
-using InteractiveUtils
+# using Markdown
+# using InteractiveUtils
 
 # ╔═╡ 7685d19e-cc98-4031-a6f9-29ecccc9f417
 begin
-	using SoleModels
-	using DataFrames
+	# using SoleModels
+	# using DataFrames
 
 	# Load an example time-series classification dataset as a tuple (DataFrame, Vector{String})
 	X_df, y = SoleModels.load_arff_dataset("NATOPS")
@@ -32,7 +32,7 @@ end
 
 # ╔═╡ 1ccda54b-1b70-4353-ace6-fe277e5bf67f
 begin
-	using MultiData
+	# using MultiData
 	
 	# Construct a logiset from a DataFrame
 	logiset = scalarlogiset(X_df, features)
diff --git a/test/DecisionTreeExt/adaboost.jl b/test/DecisionTreeExt/adaboost.jl
index 1c2b921..445b8a7 100644
--- a/test/DecisionTreeExt/adaboost.jl
+++ b/test/DecisionTreeExt/adaboost.jl
@@ -1,16 +1,3 @@
-using Test
-
-using MLJ
-using MLJBase
-using DataFrames
-using CategoricalArrays
-
-using MLJDecisionTreeInterface
-using SoleModels
-using Random
-
-import DecisionTree as DT
-
 X, y = @load_iris
 X = DataFrame(X)
 
diff --git a/test/DecisionTreeExt/forest.jl b/test/DecisionTreeExt/forest.jl
index fa7dccc..d4577aa 100644
--- a/test/DecisionTreeExt/forest.jl
+++ b/test/DecisionTreeExt/forest.jl
@@ -1,15 +1,3 @@
-using Test
-
-using MLJ
-using MLJBase
-using DataFrames
-
-using MLJDecisionTreeInterface
-using SoleModels
-using Random
-
-import DecisionTree as DT
-
 X, y = @load_iris
 X = DataFrame(X)
 
diff --git a/test/DecisionTreeExt/tree.jl b/test/DecisionTreeExt/tree.jl
index 909eb31..2d864a0 100644
--- a/test/DecisionTreeExt/tree.jl
+++ b/test/DecisionTreeExt/tree.jl
@@ -1,16 +1,3 @@
-using Test
-
-using MLJ
-using MLJBase
-using DataFrames
-
-using MLJDecisionTreeInterface
-using SoleModels
-using Random
-using CategoricalArrays
-
-import DecisionTree as DT
-
 X, y = @load_iris
 X = DataFrame(X)
 
diff --git a/test/XGBoostExt/xgboost_classifier.jl b/test/XGBoostExt/xgboost_classifier.jl
index 2685325..0d2c1b3 100644
--- a/test/XGBoostExt/xgboost_classifier.jl
+++ b/test/XGBoostExt/xgboost_classifier.jl
@@ -1,7 +1,7 @@
 using Test
 
 using MLJ
-using MLJBase
+# using MLJBase
 using DataFrames
 
 using SoleModels
diff --git a/test/base.jl b/test/base.jl
index 5dd8767..f9c98dc 100644
--- a/test/base.jl
+++ b/test/base.jl
@@ -1,10 +1,3 @@
-using SoleModels
-using SoleLogics
-using FunctionWrappers: FunctionWrapper
-using SoleModels: AbstractModel
-using SoleModels: ConstantModel, LeafModel
-using Test
-
 # base.jl
 
 io = IOBuffer()
diff --git a/test/juliacon2024.jl b/test/juliacon2024.jl
index 93f135c..ab50e19 100644
--- a/test/juliacon2024.jl
+++ b/test/juliacon2024.jl
@@ -1,12 +1,12 @@
 # JuliaCon2024 demo
 
 # Load packages
-begin
-    using MLJ
-    using MLJDecisionTreeInterface
-    using DataFrames
-    using Random
-end
+# begin
+#     using MLJ
+#     using MLJDecisionTreeInterface
+#     using DataFrames
+#     using Random
+# end
 
 # Load dataset
 X, y = begin
diff --git a/test/linear-form-utilities.jl b/test/linear-form-utilities.jl
index d55fcb2..09fbb9b 100644
--- a/test/linear-form-utilities.jl
+++ b/test/linear-form-utilities.jl
@@ -1,8 +1,3 @@
-using Test
-using SoleLogics
-using SoleModels
-
-
 b = Branch(LeftmostConjunctiveForm((@atoms p q r s)), "YES", "NO")
 
 @test_nowarn b[1:3]
diff --git a/test/misc.jl b/test/misc.jl
index b048a49..a4594a4 100644
--- a/test/misc.jl
+++ b/test/misc.jl
@@ -1,14 +1,3 @@
-# using Revise
-
-# using Reexport
-using FunctionWrappers: FunctionWrapper
-using Test
-using SoleLogics
-using SoleModels
-using SoleModels: AbstractModel
-using SoleModels: ConstantModel, LeafModel
-using SoleModels: listrules, displaymodel, submodels
-
 io = IOBuffer()
 
 parse_other_kind_of_formula = SoleLogics.parseformula
diff --git a/test/parse.jl b/test/parse.jl
index a4e905e..5d1a019 100644
--- a/test/parse.jl
+++ b/test/parse.jl
@@ -1,10 +1,3 @@
-using Test
-using SoleModels
-using SoleData
-using SoleData: AbstractUnivariateFeature, Feature
-using SoleData: ScalarCondition
-using SoleData: feature
-
 ############################################################################################
 ############################ Orange parser #################################################
 ############################################################################################
diff --git a/test/runtests.jl b/test/runtests.jl
index 986db9e..f78c9ae 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,8 +1,28 @@
-# using Revise
-using SoleModels
-using SoleLogics
-using Test
-using Random
+using Distributed
+addprocs(2)
+
+@everywhere begin
+    using SoleModels
+    using SoleModels: AbstractModel
+    using SoleModels: ConstantModel, LeafModel
+    using SoleModels: listrules, displaymodel, submodels
+    using SoleData
+    using SoleData: AbstractUnivariateFeature, Feature
+    using SoleData: ScalarCondition
+    using SoleData: feature
+    using SoleLogics
+    using CategoricalArrays
+    using Markdown
+    using MultiData
+    using InteractiveUtils
+    using MLJ
+    using MLJDecisionTreeInterface
+    import DecisionTree as DT
+    using DataFrames
+    using Test
+    using Random
+    using FunctionWrappers: FunctionWrapper
+end
 
 function run_tests(list)
     println("\n" * ("#"^50))
diff --git a/test/test_tree.jl b/test/test_tree.jl
index af4cb1b..38c8450 100644
--- a/test/test_tree.jl
+++ b/test/test_tree.jl
@@ -8,9 +8,6 @@
         # "yes"   "no"      "yes"   "no"
 ##################################################
 
-using SoleLogics
-using SoleModels
-
 formula_p = SoleLogics.parseformula("p")
 formula_q = SoleLogics.parseformula("q")
 formula_r = SoleLogics.parseformula("r")

From 54ae95f557e1835c1039f7b21f5e18f36ed859b3 Mon Sep 17 00:00:00 2001
From: PasoStudio73 <paso.studio73@gmail.com>
Date: Sat, 7 Jun 2025 15:46:05 +0200
Subject: [PATCH 40/44] updated github ci

---
 .cirrus.yml                  | 20 ---------------
 .github/dependabot.yml       |  7 ++++++
 .github/workflows/TagBot.yml | 16 ++++++++++++
 .github/workflows/ci.yml     | 49 ++++++++++++++++++++++++++++--------
 4 files changed, 61 insertions(+), 31 deletions(-)
 delete mode 100644 .cirrus.yml
 create mode 100644 .github/dependabot.yml

diff --git a/.cirrus.yml b/.cirrus.yml
deleted file mode 100644
index 013f726..0000000
--- a/.cirrus.yml
+++ /dev/null
@@ -1,20 +0,0 @@
-freebsd_instance:
-  image_family: freebsd-14-2
-task:
-  name: FreeBSD
-  artifacts_cache:
-    folder: ~/.julia/artifacts
-  env:
-    matrix:
-      - JULIA_VERSION: 1.9
-      - JULIA_VERSION: 1
-      - JULIA_VERSION: nightly
-  allow_failures: $JULIA_VERSION == 'nightly'
-  install_script:
-    - sh -c "$(fetch https://raw.githubusercontent.com/ararslan/CirrusCI.jl/master/bin/install.sh -o -)"
-  build_script:
-    - cirrusjl build
-  test_script:
-    - cirrusjl test
-  coverage_script:
-    - cirrusjl coverage codecov
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 0000000..700707c
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,7 @@
+# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
+version: 2
+updates:
+  - package-ecosystem: "github-actions"
+    directory: "/" # Location of package manifests
+    schedule:
+      interval: "weekly"
diff --git a/.github/workflows/TagBot.yml b/.github/workflows/TagBot.yml
index f49313b..0cd3114 100644
--- a/.github/workflows/TagBot.yml
+++ b/.github/workflows/TagBot.yml
@@ -4,6 +4,22 @@ on:
     types:
       - created
   workflow_dispatch:
+    inputs:
+      lookback:
+        default: "3"
+permissions:
+  actions: read
+  checks: read
+  contents: write
+  deployments: read
+  issues: read
+  discussions: read
+  packages: read
+  pages: read
+  pull-requests: read
+  repository-projects: read
+  security-events: read
+  statuses: read
 jobs:
   TagBot:
     if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot'
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ba475a2..d12f804 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1,20 +1,47 @@
-name: Upload coverage reports to Codecov
-on: [push, pull_request]
+name: CI
+on:
+  push:
+    branches:
+      - lumen-refactoring
+    tags: ['*']
+  pull_request:
+  workflow_dispatch:
+concurrency:
+  # Skip intermediate builds: always.
+  # Cancel intermediate builds: only if it is a pull request build.
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }}
 jobs:
-  run:
-    runs-on: ubuntu-latest
+  test:
+    name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }}
+    runs-on: ${{ matrix.os }}
+    timeout-minutes: 60
+    permissions: # needed to allow julia-actions/cache to proactively delete old caches that it has created
+      actions: write
+      contents: read
+    strategy:
+      fail-fast: false
+      matrix:
+        version:
+          - '1'
+          - 'lts'
+          - 'pre'
+        os:
+          - ubuntu-latest
+        arch:
+          - x64
     steps:
-      - name: Checkout
-        uses: actions/checkout@v2
-      - name: Set up Julia 1.9.0
-        uses: julia-actions/setup-julia@v1
+      - uses: actions/checkout@v4
+      - uses: julia-actions/setup-julia@v2
         with:
-          version: "1.9.0"
+          version: ${{ matrix.version }}
+          arch: ${{ matrix.arch }}
+      - uses: julia-actions/cache@v2
       - uses: julia-actions/julia-buildpkg@v1
       - uses: julia-actions/julia-runtest@v1
       - uses: julia-actions/julia-processcoverage@v1
       - uses: codecov/codecov-action@v5
         with:
+          files: lcov.info
           token: ${{ secrets.CODECOV_TOKEN }}
-          slug: aclai-lab/SoleModels.jl
-
+          fail_ci_if_error: false
\ No newline at end of file

From 4275b3d5af9a9c941d87d8863ef0361b286d86cd Mon Sep 17 00:00:00 2001
From: PasoStudio73 <paso.studio73@gmail.com>
Date: Sat, 7 Jun 2025 15:54:30 +0200
Subject: [PATCH 41/44] added distributed package

---
 Project.toml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Project.toml b/Project.toml
index 86ccd06..e70222d 100644
--- a/Project.toml
+++ b/Project.toml
@@ -46,6 +46,7 @@ julia = "1"
 [extras]
 DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
 DecisionTree = "7806a523-6efd-50cb-b5f6-3fa6f1930dbb"
+Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
 InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
 MLJ = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7"
 MLJDecisionTreeInterface = "c6f25543-311c-4c74-83dc-3ea6d1015661"
@@ -60,6 +61,7 @@ XGBoost = "009559a3-9522-5dbb-924b-0b6ed2b22bb9"
 test = [
     "DataFrames",
     "DecisionTree",
+    "Distributed",
     "InteractiveUtils",
     "MLJ",
     "MLJDecisionTreeInterface",

From 42c052ebcee8d31b6ec2585c21521f479b387648 Mon Sep 17 00:00:00 2001
From: PasoStudio73 <paso.studio73@gmail.com>
Date: Sat, 7 Jun 2025 16:05:36 +0200
Subject: [PATCH 42/44] added MLJXGBoostInterface package

---
 Project.toml                          |  2 ++
 test/XGBoostExt/xgboost_classifier.jl | 13 -------------
 2 files changed, 2 insertions(+), 13 deletions(-)

diff --git a/Project.toml b/Project.toml
index e70222d..c927db9 100644
--- a/Project.toml
+++ b/Project.toml
@@ -51,6 +51,7 @@ InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
 MLJ = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7"
 MLJDecisionTreeInterface = "c6f25543-311c-4c74-83dc-3ea6d1015661"
 MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea"
+MLJXGBoostInterface = "54119dfa-1dab-4055-a167-80440f4f7a91"
 Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
 MultiData = "8cc5100c-b3d1-4f82-90cb-0ea93d317aba"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
@@ -66,6 +67,7 @@ test = [
     "MLJ",
     "MLJDecisionTreeInterface",
     "MLJModelInterface",
+    "MLJXGBoostInterface",
     "Markdown",
     "MultiData",
     "Random",
diff --git a/test/XGBoostExt/xgboost_classifier.jl b/test/XGBoostExt/xgboost_classifier.jl
index 0d2c1b3..ad04f0a 100644
--- a/test/XGBoostExt/xgboost_classifier.jl
+++ b/test/XGBoostExt/xgboost_classifier.jl
@@ -1,16 +1,3 @@
-using Test
-
-using MLJ
-# using MLJBase
-using DataFrames
-
-using SoleModels
-
-import MLJModelInterface as MMI
-import XGBoost as XGB
-
-using Random, CategoricalArrays
-
 X, y = @load_iris
 X = DataFrame(X)
 

From 1218cb12ca668b9c2c76cd8cbc95dad6ca097622 Mon Sep 17 00:00:00 2001
From: PasoStudio73 <paso.studio73@gmail.com>
Date: Sat, 7 Jun 2025 16:16:39 +0200
Subject: [PATCH 43/44] fix XGB

---
 test/runtests.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/runtests.jl b/test/runtests.jl
index f78c9ae..4897596 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -18,6 +18,7 @@ addprocs(2)
     using MLJ
     using MLJDecisionTreeInterface
     import DecisionTree as DT
+    import XGBoost as XGB
     using DataFrames
     using Test
     using Random

From 37daad695e48c31254e96b101ecebe1c49caa63c Mon Sep 17 00:00:00 2001
From: PasoStudio73 <paso.studio73@gmail.com>
Date: Sat, 7 Jun 2025 16:33:47 +0200
Subject: [PATCH 44/44] added MMI

---
 test/runtests.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/runtests.jl b/test/runtests.jl
index 4897596..a9b6a96 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -18,6 +18,7 @@ addprocs(2)
     using MLJ
     using MLJDecisionTreeInterface
     import DecisionTree as DT
+    import MLJModelInterface as MMI
     import XGBoost as XGB
     using DataFrames
     using Test