@@ -498,6 +498,9 @@ def __init__(
498
498
self .hn_beta_vec = np .ones (self .c_k ) / 2
499
499
self .hn_root = None
500
500
501
+ # p_params
502
+ self .p_theta_vec = np .ones (self .c_k ) / self .c_k
503
+
501
504
self .set_h0_params (
502
505
h0_g ,
503
506
h0_beta_vec ,
@@ -882,129 +885,145 @@ def visualize_posterior(self,filename=None,format=None):
882
885
def get_p_params (self ):
883
886
"""Get the parameters of the predictive distribution.
884
887
885
- This model does not have a simple parametric expression of the predictive distribution.
886
- Therefore, this function returns ``None``.
887
-
888
888
Returns
889
889
-------
890
- ``None``
890
+ p_params : dict of {str: numpy.ndarray}
891
+ * ``"p_theta_vec"`` : the value of ``self.p_theta_vec``
891
892
"""
892
- return None
893
+ return { "p_theta_vec" : self . p_theta_vec }
893
894
894
- def _calc_pred_dist_leaf (self ,node ,x ):
895
- try :
896
- node .sub_model .calc_pred_dist (x )
897
- except :
898
- node .sub_model .calc_pred_dist ()
899
-
900
- def _calc_pred_dist_recursion (self ,node ,x ):
901
- if node .leaf == False : # 内部ノード
902
- self ._calc_pred_dist_recursion (node .children [x [node .k ]],x )
895
+ def _calc_pred_dist_leaf (self ,node :_LearnNode ):
896
+ return node .hn_beta_vec / node .hn_beta_vec .sum ()
897
+
898
+ def _calc_pred_dist_recursion (self ,node :_LearnNode ,x ,i ):
899
+ if node .depth < self .c_d_max and i - 1 - node .depth >= 0 : # 内部ノード
900
+ if node .children [x [i - node .depth - 1 ]] is None :
901
+ node .children [x [i - node .depth - 1 ]] = _LearnNode (
902
+ node .depth + 1 ,
903
+ self .c_k ,
904
+ self .h0_g ,
905
+ self .hn_g ,
906
+ self .h0_beta_vec ,
907
+ self .hn_beta_vec ,
908
+ )
909
+ if node .depth + 1 == self .c_d_max :
910
+ node .children [x [i - node .depth - 1 ]].h0_g = 0.0
911
+ node .children [x [i - node .depth - 1 ]].hn_g = 0.0
912
+ node .children [x [i - node .depth - 1 ]].leaf = True
913
+ tmp1 = self ._calc_pred_dist_recursion (node .children [x [i - node .depth - 1 ]],x ,i )
914
+ tmp2 = (1 - node .hn_g ) * self ._calc_pred_dist_leaf (node ) + node .hn_g * tmp1
915
+ return tmp2
903
916
else : # 葉ノード
904
- return self ._calc_pred_dist_leaf (node , x )
917
+ return self ._calc_pred_dist_leaf (node )
905
918
906
919
def calc_pred_dist (self ,x ):
907
920
"""Calculate the parameters of the predictive distribution.
908
921
909
922
Parameters
910
923
----------
911
924
x : numpy ndarray
912
- values of explanatory variables whose dtype is int
925
+ 1-dimensional int array
913
926
"""
914
- return
915
927
_check .nonneg_int_vec (x ,'x' ,DataFormatError )
916
- if x .shape [0 ] != self .c_k :
917
- raise (DataFormatError (f"x.shape[0] must equal to c_k:{ self .c_k } " ))
918
928
if x .max () >= self .c_k :
919
929
raise (DataFormatError (f"x.max() must smaller than c_k:{ self .c_k } " ))
920
- self ._tmp_x [:] = x
921
- for root in self .hn_metatree_list :
922
- self ._calc_pred_dist_recursion (root ,self ._tmp_x )
923
-
924
- def _make_prediction_recursion_squared (self ,node ):
925
- if node .leaf == False : # 内部ノード
926
- return ((1 - node .hn_g ) * node .sub_model .make_prediction (loss = 'squared' )
927
- + node .hn_g * self ._make_prediction_recursion_squared (node .children [self ._tmp_x [node .k ]]))
928
- else : # 葉ノード
929
- return node .sub_model .make_prediction (loss = 'squared' )
930
-
931
- def _make_prediction_leaf_01 (self ,node ):
932
- mode = node .sub_model .make_prediction (loss = '0-1' )
933
- pred_dist = node .sub_model .make_prediction (loss = 'KL' )
934
- if type (pred_dist ) is np .ndarray :
935
- mode_prob = pred_dist [mode ]
936
- elif hasattr (pred_dist ,'pdf' ):
937
- mode_prob = pred_dist .pdf (mode )
938
- elif hasattr (pred_dist ,'pmf' ):
939
- mode_prob = pred_dist .pmf (mode )
940
- else :
941
- mode_prob = None
942
- return mode , mode_prob
943
-
944
- def _make_prediction_recursion_01 (self ,node ):
945
- if node .leaf == False : # 内部ノード
946
- mode1 ,mode_prob1 = self ._make_prediction_leaf_01 (node )
947
- mode2 ,mode_prob2 = self ._make_prediction_recursion_01 (node .children [self ._tmp_x [node .k ]])
948
- if (1 - node .hn_g ) * mode_prob1 > node .hn_g * mode_prob2 :
949
- return mode1 ,mode_prob1
950
- else :
951
- return mode2 ,mode_prob2
952
- else : # 葉ノード
953
- return self ._make_prediction_leaf_01 (node )
930
+ i = x .shape [0 ] - 1
931
+
932
+ if self .hn_root is None :
933
+ self .hn_root = _LearnNode (
934
+ 0 ,
935
+ self .c_k ,
936
+ self .hn_g ,
937
+ self .hn_g ,
938
+ self .h0_beta_vec ,
939
+ self .hn_beta_vec ,
940
+ )
941
+
942
+ self .p_theta_vec [:] = self ._calc_pred_dist_recursion (self .hn_root ,x ,i )
954
943
955
- def make_prediction (self ,loss = "0-1 " ):
944
+ def make_prediction (self ,loss = "KL " ):
956
945
"""Predict a new data point under the given criterion.
957
946
958
947
Parameters
959
948
----------
960
949
loss : str, optional
961
- Loss function underlying the Bayes risk function, by default \" 0-1 \" .
962
- This function supports \" squared \" , \" 0-1\" .
950
+ Loss function underlying the Bayes risk function, by default \" KL \" .
951
+ This function supports \" KL \" and \" 0-1\" .
963
952
964
953
Returns
965
954
-------
966
955
predicted_value : {float, numpy.ndarray}
967
956
The predicted value under the given loss function.
957
+ If the loss function is \" KL\" , the predictive
958
+ distribution will be returned as a 1-dimensional
959
+ numpy.ndarray that consists of occurence probabilities.
968
960
"""
969
- if loss == "squared" :
970
- tmp_pred_vec = np .empty (len (self .hn_metatree_list ))
971
- for i ,metatree in enumerate (self .hn_metatree_list ):
972
- tmp_pred_vec [i ] = self ._make_prediction_recursion_squared (metatree )
973
- return self .hn_metatree_prob_vec @ tmp_pred_vec
961
+ if loss == "KL" :
962
+ return self .p_theta_vec
974
963
elif loss == "0-1" :
975
- tmp_mode = np .empty (len (self .hn_metatree_list ))
976
- tmp_mode_prob_vec = np .empty (len (self .hn_metatree_list ))
977
- for i ,metatree in enumerate (self .hn_metatree_list ):
978
- tmp_mode [i ],tmp_mode_prob_vec [i ] = self ._make_prediction_recursion_01 (metatree )
979
- return tmp_mode [np .argmax (self .hn_metatree_prob_vec * tmp_mode_prob_vec )]
964
+ return np .argmax (self .p_theta_vec )
980
965
else :
981
966
raise (CriteriaError ("Unsupported loss function! "
982
- + "This function supports \" squared \" and \" 0-1 \" ." ))
967
+ + "This function supports \" 0-1 \" and \" KL \" ." ))
983
968
984
- def pred_and_update (self ,x ,y ,loss = "0-1" ):
969
+ def _pred_and_update_leaf (self ,node :_LearnNode ,x ,i ):
970
+ tmp = node .hn_beta_vec / node .hn_beta_vec .sum ()
971
+ node .hn_beta_vec [x [i ]] += 1
972
+ return tmp
973
+
974
+ def _pred_and_update_recursion (self ,node :_LearnNode ,x ,i ):
975
+ if node .depth < self .c_d_max and i - 1 - node .depth >= 0 : # 内部ノード
976
+ if node .children [x [i - node .depth - 1 ]] is None :
977
+ node .children [x [i - node .depth - 1 ]] = _LearnNode (
978
+ node .depth + 1 ,
979
+ self .c_k ,
980
+ self .h0_g ,
981
+ self .hn_g ,
982
+ self .h0_beta_vec ,
983
+ self .hn_beta_vec ,
984
+ )
985
+ if node .depth + 1 == self .c_d_max :
986
+ node .children [x [i - node .depth - 1 ]].h0_g = 0.0
987
+ node .children [x [i - node .depth - 1 ]].hn_g = 0.0
988
+ node .children [x [i - node .depth - 1 ]].leaf = True
989
+ tmp1 = self ._pred_and_update_recursion (node .children [x [i - node .depth - 1 ]],x ,i )
990
+ tmp2 = (1 - node .hn_g ) * self ._pred_and_update_leaf (node ,x ,i ) + node .hn_g * tmp1
991
+ node .hn_g = node .hn_g * tmp1 [x [i ]] / tmp2 [x [i ]]
992
+ return tmp2
993
+ else : # 葉ノード
994
+ return self ._pred_and_update_leaf (node ,x ,i )
995
+
996
+ def pred_and_update (self ,x ,loss = "KL" ):
985
997
"""Predict a new data point and update the posterior sequentially.
986
998
987
999
Parameters
988
1000
----------
989
1001
x : numpy.ndarray
990
- It must be a degree-dimensional vector
991
- y : numpy ndarray
992
- values of objective variable whose dtype may be int or float
1002
+ 1-dimensional int array
993
1003
loss : str, optional
994
- Loss function underlying the Bayes risk function, by default \" 0-1 \" .
995
- This function supports \" squared \" , \" 0-1\" , and \" KL \" .
1004
+ Loss function underlying the Bayes risk function, by default \" KL \" .
1005
+ This function supports \" KL \" , and \" 0-1\" .
996
1006
997
1007
Returns
998
1008
-------
999
1009
predicted_value : {float, numpy.ndarray}
1000
1010
The predicted value under the given loss function.
1001
1011
"""
1002
1012
_check .nonneg_int_vec (x ,'x' ,DataFormatError )
1003
- if x .shape [- 1 ] != self .c_k :
1004
- raise (DataFormatError (f"x.shape[-1] must equal to c_k:{ self .c_k } " ))
1005
1013
if x .max () >= self .c_k :
1006
1014
raise (DataFormatError (f"x.max() must smaller than c_k:{ self .c_k } " ))
1007
- self .calc_pred_dist (x )
1015
+ i = x .shape [0 ] - 1
1016
+
1017
+ if self .hn_root is None :
1018
+ self .hn_root = _LearnNode (
1019
+ 0 ,
1020
+ self .c_k ,
1021
+ self .hn_g ,
1022
+ self .hn_g ,
1023
+ self .h0_beta_vec ,
1024
+ self .hn_beta_vec ,
1025
+ )
1026
+
1027
+ self .p_theta_vec [:] = self ._pred_and_update_recursion (self .hn_root ,x ,i )
1008
1028
prediction = self .make_prediction (loss = loss )
1009
- self .update_posterior (x ,y ,alg_type = 'given_MT' )
1010
1029
return prediction
0 commit comments