9
9
import xml .etree .ElementTree as ET
10
10
11
11
def to_pomdp_file (agent , output_path = None ,
12
- discount_factor = 0.95 ):
12
+ discount_factor = 0.95 , float_precision = 9 ):
13
13
"""
14
14
Pass in an Agent, and use its components to generate
15
15
a .pomdp file to `output_path`.
@@ -30,6 +30,8 @@ def to_pomdp_file(agent, output_path=None,
30
30
output_path (str): The path of the output file to write in. Optional.
31
31
Default None.
32
32
discount_factor (float): The discount factor
33
+ float_precision (int): Number of decimals for float to str conversion.
34
+ Default 6.
33
35
Returns:
34
36
(list, list, list): The list of states, actions, observations that
35
37
are ordered in the same way as they are in the .pomdp file.
@@ -42,7 +44,7 @@ def to_pomdp_file(agent, output_path=None,
42
44
except NotImplementedError :
43
45
raise ValueError ("S, A, O must be enumerable for a given agent to convert to .pomdp format" )
44
46
45
- content = "discount: %f\n " % discount_factor
47
+ content = f "discount: %. { float_precision } f\n " % discount_factor
46
48
content += "values: reward\n " # We only consider reward, not cost.
47
49
48
50
list_of_states = " " .join (str (s ) for s in all_states )
@@ -62,7 +64,7 @@ def to_pomdp_file(agent, output_path=None,
62
64
63
65
# Starting belief state - they need to be normalized
64
66
total_belief = sum (agent .belief [s ] for s in all_states )
65
- content += "start: %s\n " % (" " .join (["% f" % (agent .belief [s ]/ total_belief )
67
+ content += "start: %s\n " % (" " .join ([f"%. { float_precision } f" % (agent .belief [s ]/ total_belief )
66
68
for s in all_states ]))
67
69
68
70
# State transition probabilities - they need to be normalized
@@ -75,7 +77,7 @@ def to_pomdp_file(agent, output_path=None,
75
77
total_prob = sum (probs )
76
78
for i , s_next in enumerate (all_states ):
77
79
prob_norm = probs [i ] / total_prob
78
- content += 'T : %s : %s : %s %f\n ' % (a , s , s_next , prob_norm )
80
+ content += f 'T : %s : %s : %s %. { float_precision } f\n ' % (a , s , s_next , prob_norm )
79
81
80
82
# Observation probabilities - they need to be normalized
81
83
for s_next in all_states :
@@ -90,15 +92,15 @@ def to_pomdp_file(agent, output_path=None,
90
92
.format (s_next , a )
91
93
for i , o in enumerate (all_observations ):
92
94
prob_norm = probs [i ] / total_prob
93
- content += 'O : %s : %s : %s %f\n ' % (a , s_next , o , prob_norm )
95
+ content += f 'O : %s : %s : %s %. { float_precision } f\n ' % (a , s_next , o , prob_norm )
94
96
95
97
# Immediate rewards
96
98
for s in all_states :
97
99
for a in all_actions :
98
100
for s_next in all_states :
99
101
# We will take the argmax reward, which works for deterministic rewards.
100
102
r = agent .reward_model .sample (s , a , s_next )
101
- content += 'R : %s : %s : %s : * %f\n ' % (a , s , s_next , r )
103
+ content += f 'R : %s : %s : %s : * %. { float_precision } f\n ' % (a , s , s_next , r )
102
104
103
105
if output_path is not None :
104
106
with open (output_path , "w" ) as f :
0 commit comments