We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 55bb759 commit a763469Copy full SHA for a763469
pomdp_py/algorithms/value_iteration.pyx
@@ -45,7 +45,7 @@ cdef class _PolicyTreeNode:
45
trans_prob = self._agent.transition_model.probability(sp, s, self.action)
46
obsrv_prob = self._agent.observation_model.probability(o, sp, self.action)
47
if len(self.children) > 0:
48
- subtree_value = self.children[o].values[s] # corresponds to V_{oi(p)} in paper
+ subtree_value = self.children[o].values[sp] # corresponds to V_{oi(p)} in paper
49
else:
50
subtree_value = 0.0
51
reward = self._agent.reward_model.sample(s, self.action, sp)
0 commit comments